diff --git a/8224675-Late-GC-barrier-insertion-for-ZGC.patch b/8224675-Late-GC-barrier-insertion-for-ZGC.patch index 95dc4c9534025966ef3685ce9256373b8bcb7e46..50e15d1eb5affe295e471492566ff76974cabbb4 100644 --- a/8224675-Late-GC-barrier-insertion-for-ZGC.patch +++ b/8224675-Late-GC-barrier-insertion-for-ZGC.patch @@ -2812,7 +2812,7 @@ index 5454d1350..d7eb3996b 100644 --- a/src/hotspot/share/opto/loopnode.cpp +++ b/src/hotspot/share/opto/loopnode.cpp @@ -3017,9 +3018,7 @@ void PhaseIdealLoop::build_and_optimize() { - build_loop_late( visited, worklist, nstack ); + if (C->failing()) { return; } if (_verify_only) { - // restore major progress flag diff --git a/2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch b/Add-riscv64-support.patch similarity index 64% rename from 2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch rename to Add-riscv64-support.patch index 13815b71215d789e75976982277c6f8b05762627..59017ae02c5b6c185a441f428acd08dfc203eb6e 100644 --- a/2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch +++ b/Add-riscv64-support.patch @@ -1,228 +1,198 @@ -From 77eaf1804b7e56ed17a6c3a478e6ee9df89ea024 Mon Sep 17 00:00:00 2001 -From: misaka00251 -Date: Wed, 9 Aug 2023 02:24:23 +0800 -Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch) +From dfa792539047c39d0d25244265bc8368163d5768 Mon Sep 17 00:00:00 2001 +From: Fei Yang +Date: Thu, 24 Mar 2022 09:22:46 +0000 +Subject: [PATCH 001/140] Cherry-picked JDK-8276799: initial load of RISC-V + backend (cannot pass compilation) --- - make/autoconf/build-aux/config.sub | 7 + + make/autoconf/build-aux/config.guess | 2 +- make/autoconf/hotspot.m4 | 3 +- - make/autoconf/libraries.m4 | 4 +- - make/autoconf/platform.m4 | 10 +- - make/hotspot/gensrc/GensrcAdlc.gmk | 16 +- - src/hotspot/cpu/aarch64/aarch64.ad | 40 +- - .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 4 +- - .../cpu/aarch64/macroAssembler_aarch64.cpp | 64 + - .../cpu/aarch64/macroAssembler_aarch64.hpp | 3 + - src/hotspot/cpu/arm/arm.ad | 10 +- - src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp | 5 +- - src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp | 5 +- - src/hotspot/cpu/ppc/ppc.ad | 16 +- - .../cpu/riscv/abstractInterpreter_riscv.cpp | 185 + - src/hotspot/cpu/riscv/assembler_riscv.cpp | 365 + - src/hotspot/cpu/riscv/assembler_riscv.hpp | 2004 +++ + make/autoconf/libraries.m4 | 8 +- + make/autoconf/platform.m4 | 6 +- + make/hotspot/gensrc/GensrcAdlc.gmk | 9 +- + .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 6 +- + src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp | 7 +- + src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp | 8 +- + .../cpu/riscv/abstractInterpreter_riscv.cpp | 177 + + src/hotspot/cpu/riscv/assembler_riscv.cpp | 372 + + src/hotspot/cpu/riscv/assembler_riscv.hpp | 3047 +++++ .../cpu/riscv/assembler_riscv.inline.hpp | 47 + - src/hotspot/cpu/riscv/bytes_riscv.hpp | 169 + - src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 352 + - src/hotspot/cpu/riscv/c1_Defs_riscv.hpp | 85 + - .../cpu/riscv/c1_FpuStackSim_riscv.cpp | 31 + - .../cpu/riscv/c1_FpuStackSim_riscv.hpp | 33 + - src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp | 391 + - src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp | 149 + - .../cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 287 + - .../cpu/riscv/c1_LIRAssembler_arith_riscv.hpp | 36 + - .../riscv/c1_LIRAssembler_arraycopy_riscv.cpp | 387 + - .../riscv/c1_LIRAssembler_arraycopy_riscv.hpp | 51 + - .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 2275 ++++ + src/hotspot/cpu/riscv/bytes_riscv.hpp | 167 + + src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 353 + + src/hotspot/cpu/riscv/c1_Defs_riscv.hpp | 84 + + .../cpu/riscv/c1_FpuStackSim_riscv.cpp | 30 + + .../cpu/riscv/c1_FpuStackSim_riscv.hpp | 32 + + src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp | 388 + + src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp | 148 + + .../cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 281 + + .../cpu/riscv/c1_LIRAssembler_arith_riscv.hpp | 37 + + .../riscv/c1_LIRAssembler_arraycopy_riscv.cpp | 388 + + .../riscv/c1_LIRAssembler_arraycopy_riscv.hpp | 52 + + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 2267 ++++ .../cpu/riscv/c1_LIRAssembler_riscv.hpp | 132 + - .../cpu/riscv/c1_LIRGenerator_riscv.cpp | 1083 ++ + .../cpu/riscv/c1_LIRGenerator_riscv.cpp | 1075 ++ src/hotspot/cpu/riscv/c1_LIR_riscv.cpp | 55 + src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp | 33 + - src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp | 85 + - .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 441 + - .../cpu/riscv/c1_MacroAssembler_riscv.hpp | 121 + - src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 1206 ++ - src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 72 + - src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 91 + + src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp | 83 + + .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 432 + + .../cpu/riscv/c1_MacroAssembler_riscv.hpp | 120 + + src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 1172 ++ + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 65 + + .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 1646 +++ + .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 193 + + src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 83 + src/hotspot/cpu/riscv/c2_init_riscv.cpp | 38 + + .../riscv/c2_safepointPollStubTable_riscv.cpp | 47 + src/hotspot/cpu/riscv/codeBuffer_riscv.hpp | 36 + - src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 154 + - src/hotspot/cpu/riscv/copy_riscv.hpp | 60 + - src/hotspot/cpu/riscv/depChecker_riscv.hpp | 32 + - src/hotspot/cpu/riscv/disassembler_riscv.hpp | 37 + - src/hotspot/cpu/riscv/frame_riscv.cpp | 683 + - src/hotspot/cpu/riscv/frame_riscv.hpp | 200 + - src/hotspot/cpu/riscv/frame_riscv.inline.hpp | 257 + - .../gc/g1/g1BarrierSetAssembler_riscv.cpp | 479 + + src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 149 + + src/hotspot/cpu/riscv/copy_riscv.hpp | 136 + + src/hotspot/cpu/riscv/disassembler_riscv.hpp | 58 + + .../cpu/riscv/foreign_globals_riscv.cpp | 44 + + .../cpu/riscv/foreign_globals_riscv.hpp | 32 + + src/hotspot/cpu/riscv/frame_riscv.cpp | 697 + + src/hotspot/cpu/riscv/frame_riscv.hpp | 202 + + src/hotspot/cpu/riscv/frame_riscv.inline.hpp | 248 + + .../gc/g1/g1BarrierSetAssembler_riscv.cpp | 484 + .../gc/g1/g1BarrierSetAssembler_riscv.hpp | 78 + - .../gc/shared/barrierSetAssembler_riscv.cpp | 226 + - .../gc/shared/barrierSetAssembler_riscv.hpp | 75 + - .../cardTableBarrierSetAssembler_riscv.cpp | 120 + - .../cardTableBarrierSetAssembler_riscv.hpp | 43 + - .../modRefBarrierSetAssembler_riscv.cpp | 54 + + .../cpu/riscv/gc/g1/g1Globals_riscv.hpp | 31 + + .../gc/shared/barrierSetAssembler_riscv.cpp | 302 + + .../gc/shared/barrierSetAssembler_riscv.hpp | 79 + + .../gc/shared/barrierSetNMethod_riscv.cpp | 171 + + .../cardTableBarrierSetAssembler_riscv.cpp | 111 + + .../cardTableBarrierSetAssembler_riscv.hpp | 42 + + .../modRefBarrierSetAssembler_riscv.cpp | 55 + .../modRefBarrierSetAssembler_riscv.hpp | 55 + - .../c1/shenandoahBarrierSetC1_riscv.cpp | 124 + - .../shenandoahBarrierSetAssembler_riscv.cpp | 743 ++ - .../shenandoahBarrierSetAssembler_riscv.hpp | 92 + - .../riscv/gc/shenandoah/shenandoah_riscv64.ad | 188 + - .../cpu/riscv/globalDefinitions_riscv.hpp | 44 + - src/hotspot/cpu/riscv/globals_riscv.hpp | 120 + + .../c1/shenandoahBarrierSetC1_riscv.cpp | 117 + + .../shenandoahBarrierSetAssembler_riscv.cpp | 712 ++ + .../shenandoahBarrierSetAssembler_riscv.hpp | 88 + + .../riscv/gc/shenandoah/shenandoah_riscv64.ad | 285 + + .../riscv/gc/z/zBarrierSetAssembler_riscv.cpp | 441 + + .../riscv/gc/z/zBarrierSetAssembler_riscv.hpp | 101 + + src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp | 212 + + src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp | 36 + + src/hotspot/cpu/riscv/gc/z/z_riscv64.ad | 233 + + .../cpu/riscv/globalDefinitions_riscv.hpp | 52 + + src/hotspot/cpu/riscv/globals_riscv.hpp | 99 + src/hotspot/cpu/riscv/icBuffer_riscv.cpp | 79 + - src/hotspot/cpu/riscv/icache_riscv.cpp | 61 + + src/hotspot/cpu/riscv/icache_riscv.cpp | 51 + src/hotspot/cpu/riscv/icache_riscv.hpp | 42 + - src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1932 +++ - src/hotspot/cpu/riscv/interp_masm_riscv.hpp | 283 + - src/hotspot/cpu/riscv/interpreterRT_riscv.cpp | 296 + + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1940 +++ + src/hotspot/cpu/riscv/interp_masm_riscv.hpp | 285 + + src/hotspot/cpu/riscv/interpreterRT_riscv.cpp | 295 + src/hotspot/cpu/riscv/interpreterRT_riscv.hpp | 68 + - .../cpu/riscv/javaFrameAnchor_riscv.hpp | 89 + - .../cpu/riscv/jniFastGetField_riscv.cpp | 193 + - src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 108 + - .../cpu/riscv/macroAssembler_riscv.cpp | 5861 +++++++++ - .../cpu/riscv/macroAssembler_riscv.hpp | 975 ++ - .../cpu/riscv/macroAssembler_riscv.inline.hpp | 30 + - src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 440 + - src/hotspot/cpu/riscv/methodHandles_riscv.hpp | 58 + - src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 404 + - src/hotspot/cpu/riscv/nativeInst_riscv.hpp | 561 + - src/hotspot/cpu/riscv/registerMap_riscv.hpp | 46 + - .../cpu/riscv/register_definitions_riscv.cpp | 193 + - src/hotspot/cpu/riscv/register_riscv.cpp | 69 + - src/hotspot/cpu/riscv/register_riscv.hpp | 337 + + .../cpu/riscv/javaFrameAnchor_riscv.hpp | 86 + + .../cpu/riscv/jniFastGetField_riscv.cpp | 214 + + src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 106 + + .../cpu/riscv/macroAssembler_riscv.cpp | 4016 ++++++ + .../cpu/riscv/macroAssembler_riscv.hpp | 858 ++ + .../cpu/riscv/macroAssembler_riscv.inline.hpp | 31 + + src/hotspot/cpu/riscv/matcher_riscv.hpp | 169 + + src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 461 + + src/hotspot/cpu/riscv/methodHandles_riscv.hpp | 57 + + src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 429 + + src/hotspot/cpu/riscv/nativeInst_riscv.hpp | 572 + + src/hotspot/cpu/riscv/registerMap_riscv.cpp | 45 + + src/hotspot/cpu/riscv/registerMap_riscv.hpp | 43 + + src/hotspot/cpu/riscv/register_riscv.cpp | 73 + + src/hotspot/cpu/riscv/register_riscv.hpp | 324 + src/hotspot/cpu/riscv/relocInfo_riscv.cpp | 113 + - src/hotspot/cpu/riscv/relocInfo_riscv.hpp | 45 + - src/hotspot/cpu/riscv/riscv.ad | 10685 ++++++++++++++++ - src/hotspot/cpu/riscv/riscv_b.ad | 605 + - src/hotspot/cpu/riscv/riscv_v.ad | 1723 +++ - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 2738 ++++ - src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 3743 ++++++ - src/hotspot/cpu/riscv/stubRoutines_riscv.cpp | 60 + - src/hotspot/cpu/riscv/stubRoutines_riscv.hpp | 179 + - .../templateInterpreterGenerator_riscv.cpp | 1841 +++ - src/hotspot/cpu/riscv/templateTable_riscv.cpp | 4028 ++++++ + src/hotspot/cpu/riscv/relocInfo_riscv.hpp | 44 + + src/hotspot/cpu/riscv/riscv.ad | 10611 ++++++++++++++++ + src/hotspot/cpu/riscv/riscv_b.ad | 527 + + src/hotspot/cpu/riscv/riscv_v.ad | 2065 +++ + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 2761 ++++ + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 3864 ++++++ + src/hotspot/cpu/riscv/stubRoutines_riscv.cpp | 58 + + src/hotspot/cpu/riscv/stubRoutines_riscv.hpp | 161 + + .../templateInterpreterGenerator_riscv.cpp | 1794 +++ + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 3951 ++++++ src/hotspot/cpu/riscv/templateTable_riscv.hpp | 42 + - src/hotspot/cpu/riscv/vmStructs_riscv.hpp | 43 + - .../cpu/riscv/vm_version_ext_riscv.cpp | 91 + - .../cpu/riscv/vm_version_ext_riscv.hpp | 55 + - src/hotspot/cpu/riscv/vm_version_riscv.cpp | 190 + - src/hotspot/cpu/riscv/vm_version_riscv.hpp | 65 + - src/hotspot/cpu/riscv/vmreg_riscv.cpp | 60 + - src/hotspot/cpu/riscv/vmreg_riscv.hpp | 64 + - src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp | 47 + + .../riscv/universalNativeInvoker_riscv.cpp | 33 + + .../cpu/riscv/universalUpcallHandle_riscv.cpp | 42 + + src/hotspot/cpu/riscv/vmStructs_riscv.hpp | 42 + + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 230 + + src/hotspot/cpu/riscv/vm_version_riscv.hpp | 72 + + src/hotspot/cpu/riscv/vmreg_riscv.cpp | 64 + + src/hotspot/cpu/riscv/vmreg_riscv.hpp | 68 + + src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp | 46 + src/hotspot/cpu/riscv/vtableStubs_riscv.cpp | 260 + - src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp | 5 +- - src/hotspot/cpu/s390/s390.ad | 16 +- - src/hotspot/cpu/sparc/sparc.ad | 10 +- - src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 5 +- - src/hotspot/cpu/x86/macroAssembler_x86.cpp | 93 + - src/hotspot/cpu/x86/macroAssembler_x86.hpp | 2 + - src/hotspot/cpu/x86/x86.ad | 14 +- - src/hotspot/cpu/x86/x86_32.ad | 19 +- - src/hotspot/cpu/x86/x86_64.ad | 24 +- - src/hotspot/os/linux/os_linux.cpp | 11 +- - .../os_cpu/linux_riscv/atomic_linux_riscv.hpp | 113 + - .../linux_riscv/bytes_linux_riscv.inline.hpp | 44 + - .../linux_riscv/copy_linux_riscv.inline.hpp | 116 + + src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp | 9 +- + src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 7 +- + src/hotspot/os/linux/os_linux.cpp | 2 + + .../linux_riscv/assembler_linux_riscv.cpp | 26 + + .../os_cpu/linux_riscv/atomic_linux_riscv.hpp | 134 + + .../os_cpu/linux_riscv/bytes_linux_riscv.hpp | 45 + + .../os_cpu/linux_riscv/copy_linux_riscv.hpp | 31 + + .../linux_riscv/gc/z/zSyscall_linux_riscv.hpp | 42 + .../linux_riscv/globals_linux_riscv.hpp | 43 + - .../linux_riscv/orderAccess_linux_riscv.hpp | 73 + - .../os_cpu/linux_riscv/os_linux_riscv.cpp | 628 + - .../os_cpu/linux_riscv/os_linux_riscv.hpp | 40 + + .../linux_riscv/orderAccess_linux_riscv.hpp | 63 + + .../os_cpu/linux_riscv/os_linux_riscv.cpp | 466 + + .../os_cpu/linux_riscv/os_linux_riscv.hpp | 59 + .../prefetch_linux_riscv.inline.hpp | 38 + - .../os_cpu/linux_riscv/thread_linux_riscv.cpp | 103 + - .../os_cpu/linux_riscv/thread_linux_riscv.hpp | 67 + + .../os_cpu/linux_riscv/thread_linux_riscv.cpp | 92 + + .../os_cpu/linux_riscv/thread_linux_riscv.hpp | 48 + .../linux_riscv/vmStructs_linux_riscv.hpp | 55 + - .../linux_riscv/vm_version_linux_riscv.cpp | 116 + - src/hotspot/share/adlc/archDesc.cpp | 5 + - src/hotspot/share/adlc/formssel.cpp | 2 + - src/hotspot/share/c1/c1_LIR.cpp | 113 +- - src/hotspot/share/c1/c1_LIR.hpp | 208 +- + .../linux_riscv/vm_version_linux_riscv.cpp | 118 + + src/hotspot/share/c1/c1_LIR.cpp | 112 +- + src/hotspot/share/c1/c1_LIR.hpp | 209 +- src/hotspot/share/c1/c1_LIRAssembler.cpp | 15 +- - src/hotspot/share/c1/c1_LIRAssembler.hpp | 4 +- - src/hotspot/share/c1/c1_LinearScan.cpp | 14 +- - src/hotspot/share/classfile/vmSymbols.cpp | 2 + - src/hotspot/share/classfile/vmSymbols.hpp | 1 + - .../gc/shenandoah/shenandoahArguments.cpp | 2 +- + src/hotspot/share/c1/c1_LIRAssembler.hpp | 5 +- + src/hotspot/share/c1/c1_LinearScan.cpp | 18 +- + .../gc/shenandoah/shenandoahArguments.cpp | 4 +- + src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp | 4 +- .../share/jfr/utilities/jfrBigEndian.hpp | 2 +- - src/hotspot/share/opto/c2compiler.cpp | 1 + - src/hotspot/share/opto/chaitin.cpp | 90 +- - src/hotspot/share/opto/chaitin.hpp | 32 +- - src/hotspot/share/opto/intrinsicnode.hpp | 5 +- - src/hotspot/share/opto/library_call.cpp | 13 +- - src/hotspot/share/opto/machnode.cpp | 2 +- - src/hotspot/share/opto/machnode.hpp | 4 + - src/hotspot/share/opto/matcher.cpp | 41 +- - src/hotspot/share/opto/matcher.hpp | 6 +- - src/hotspot/share/opto/node.cpp | 21 + - src/hotspot/share/opto/node.hpp | 5 + - src/hotspot/share/opto/opcodes.cpp | 4 +- - src/hotspot/share/opto/opcodes.hpp | 2 + - src/hotspot/share/opto/phase.cpp | 2 + - src/hotspot/share/opto/phase.hpp | 1 + - src/hotspot/share/opto/postaloc.cpp | 53 +- - src/hotspot/share/opto/regmask.cpp | 46 +- - src/hotspot/share/opto/regmask.hpp | 10 +- - src/hotspot/share/opto/superword.cpp | 7 +- - src/hotspot/share/opto/type.cpp | 14 +- - src/hotspot/share/opto/type.hpp | 12 +- - src/hotspot/share/opto/vectornode.cpp | 4 +- - .../share/runtime/abstract_vm_version.cpp | 12 +- + src/hotspot/share/opto/regmask.hpp | 2 +- + .../share/runtime/abstract_vm_version.cpp | 3 +- + src/hotspot/share/runtime/synchronizer.cpp | 2 +- src/hotspot/share/runtime/thread.hpp | 2 +- - src/hotspot/share/runtime/thread.inline.hpp | 2 +- - src/hotspot/share/utilities/debug.cpp | 1 + + src/hotspot/share/runtime/thread.inline.hpp | 4 +- src/hotspot/share/utilities/macros.hpp | 26 + - .../share/classes/java/lang/StringLatin1.java | 5 + .../native/libsaproc/LinuxDebuggerLocal.c | 49 +- - .../linux/native/libsaproc/libproc.h | 2 + - .../linux/native/libsaproc/ps_proc.c | 4 + - .../classes/sun/jvm/hotspot/HotSpotAgent.java | 4 + + .../linux/native/libsaproc/libproc.h | 4 +- + .../classes/sun/jvm/hotspot/HotSpotAgent.java | 3 + .../debugger/MachineDescriptionRISCV64.java | 40 + - .../debugger/linux/LinuxCDebugger.java | 11 +- + .../debugger/linux/LinuxCDebugger.java | 13 +- .../linux/riscv64/LinuxRISCV64CFrame.java | 90 + .../riscv64/LinuxRISCV64ThreadContext.java | 48 + - .../debugger/proc/ProcDebuggerLocal.java | 6 + .../proc/riscv64/ProcRISCV64Thread.java | 88 + .../riscv64/ProcRISCV64ThreadContext.java | 48 + .../riscv64/ProcRISCV64ThreadFactory.java | 46 + .../remote/riscv64/RemoteRISCV64Thread.java | 55 + .../riscv64/RemoteRISCV64ThreadContext.java | 48 + .../riscv64/RemoteRISCV64ThreadFactory.java | 46 + - .../riscv64/RISCV64ThreadContext.java | 172 + - .../sun/jvm/hotspot/runtime/Threads.java | 3 + - .../LinuxRISCV64JavaThreadPDAccess.java | 132 + + .../debugger/risv64/RISCV64ThreadContext.java | 172 + + .../sun/jvm/hotspot/runtime/Threads.java | 5 +- + .../LinuxRISCV64JavaThreadPDAccess.java | 134 + .../riscv64/RISCV64CurrentFrameGuess.java | 223 + - .../hotspot/runtime/riscv64/RISCV64Frame.java | 554 + - .../riscv64/RISCV64JavaCallWrapper.java | 58 + + .../hotspot/runtime/riscv64/RISCV64Frame.java | 556 + + .../riscv64/RISCV64JavaCallWrapper.java | 61 + .../runtime/riscv64/RISCV64RegisterMap.java | 53 + - .../jvm/hotspot/utilities/PlatformInfo.java | 2 +- - src/utils/hsdis/hsdis.c | 6 +- - test/hotspot/jtreg/compiler/c2/TestBit.java | 6 +- - ...eSHA1IntrinsicsOptionOnUnsupportedCPU.java | 4 + - ...HA256IntrinsicsOptionOnUnsupportedCPU.java | 4 + - ...HA512IntrinsicsOptionOnUnsupportedCPU.java | 4 + - .../cli/TestUseSHAOptionOnUnsupportedCPU.java | 4 + - .../testcases/GenericTestCaseForOtherCPU.java | 10 +- - ...nericTestCaseForUnsupportedRISCV64CPU.java | 102 + - .../string/TestStringLatin1IndexOfChar.java | 153 + - .../loopopts/superword/ProdRed_Double.java | 2 +- - .../loopopts/superword/ProdRed_Float.java | 2 +- - .../loopopts/superword/ProdRed_Int.java | 2 +- - .../loopopts/superword/ReductionPerf.java | 2 +- - .../superword/SumRedAbsNeg_Double.java | 2 +- - .../superword/SumRedAbsNeg_Float.java | 2 +- - .../loopopts/superword/SumRedSqrt_Double.java | 2 +- - .../loopopts/superword/SumRed_Double.java | 2 +- - .../loopopts/superword/SumRed_Float.java | 2 +- - .../loopopts/superword/SumRed_Int.java | 2 +- - .../argumentcorruption/CheckLongArgs.java | 2 +- - .../criticalnatives/lookup/LookUp.java | 2 +- - .../sha/predicate/IntrinsicPredicates.java | 9 +- - .../NMT/CheckForProperDetailStackTrace.java | 3 +- - .../ReservedStack/ReservedStackTest.java | 3 +- - test/hotspot/jtreg/test_env.sh | 5 + - ...stMutuallyExclusivePlatformPredicates.java | 3 +- - .../nsk/jvmti/GetThreadInfo/thrinfo001.java | 2 +- - .../jdk/jfr/event/os/TestCPUInformation.java | 5 +- - test/lib/jdk/test/lib/Platform.java | 5 + - .../bench/java/lang/StringIndexOfChar.java | 221 + - 218 files changed, 57653 insertions(+), 221 deletions(-) + .../jvm/hotspot/utilities/PlatformInfo.java | 4 +- + test/hotspot/jtreg/compiler/c2/TestBit.java | 7 +- + ...eSHA1IntrinsicsOptionOnUnsupportedCPU.java | 5 +- + ...HA256IntrinsicsOptionOnUnsupportedCPU.java | 5 +- + ...HA512IntrinsicsOptionOnUnsupportedCPU.java | 5 +- + .../cli/TestUseSHAOptionOnUnsupportedCPU.java | 5 +- + .../testcases/GenericTestCaseForOtherCPU.java | 11 +- + ...nericTestCaseForUnsupportedRISCV64CPU.java | 115 + + .../loopopts/superword/ProdRed_Double.java | 4 +- + .../loopopts/superword/ProdRed_Float.java | 4 +- + .../loopopts/superword/ProdRed_Int.java | 4 +- + .../loopopts/superword/ReductionPerf.java | 4 +- + .../superword/SumRedAbsNeg_Double.java | 4 +- + .../superword/SumRedAbsNeg_Float.java | 4 +- + .../loopopts/superword/SumRedSqrt_Double.java | 4 +- + .../loopopts/superword/SumRed_Double.java | 4 +- + .../loopopts/superword/SumRed_Float.java | 4 +- + .../loopopts/superword/SumRed_Int.java | 4 +- + .../sha/predicate/IntrinsicPredicates.java | 11 +- + .../NMT/CheckForProperDetailStackTrace.java | 4 +- + .../ReservedStack/ReservedStackTest.java | 4 +- + .../HeapMonitorEventsForTwoThreadsTest.java | 1 - + ...stMutuallyExclusivePlatformPredicates.java | 2 +- + .../jdk/jfr/event/os/TestCPUInformation.java | 6 +- + test/lib/jdk/test/lib/Platform.java | 4 + + 187 files changed, 59079 insertions(+), 189 deletions(-) create mode 100644 src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -248,20 +218,26 @@ Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch) create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/c1_globals_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/c2_globals_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/c2_init_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/codeBuffer_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/compiledIC_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/copy_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/depChecker_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/disassembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/frame_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/frame_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/frame_riscv.inline.hpp create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp @@ -270,6 +246,11 @@ Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch) create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad + create mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/gc/z/z_riscv64.ad create mode 100644 src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/globals_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/icBuffer_riscv.cpp @@ -285,12 +266,13 @@ Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch) create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp + create mode 100644 src/hotspot/cpu/riscv/matcher_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/register_definitions_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/register_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/register_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.cpp @@ -305,18 +287,20 @@ Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch) create mode 100644 src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/vmStructs_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp create mode 100644 src/hotspot/cpu/riscv/vtableStubs_riscv.cpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp create mode 100644 src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp create mode 100644 src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp create mode 100644 src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp @@ -335,101 +319,95 @@ Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch) create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java create mode 100644 test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java - create mode 100644 test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java - create mode 100644 test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java - -diff --git a/make/autoconf/build-aux/config.sub b/make/autoconf/build-aux/config.sub -index 3c280ac7c..eda408e01 100644 ---- a/make/autoconf/build-aux/config.sub -+++ b/make/autoconf/build-aux/config.sub -@@ -48,6 +48,13 @@ if ! echo $* | grep '^aarch64-' >/dev/null ; then - exit - fi - -+# Canonicalize for riscv which autoconf-config.sub doesn't handle -+if echo $* | grep '^riscv\(32\|64\)-linux' > /dev/null ; then -+ result=`echo $@ | sed 's/linux/unknown-linux/'` -+ echo $result -+ exit -+fi -+ - while test $# -gt 0 ; do - case $1 in - -- ) # Stop option processing + +diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess +index a88a9adec3f..15111d827ab 100644 +--- a/make/autoconf/build-aux/config.guess ++++ b/make/autoconf/build-aux/config.guess +@@ -1,6 +1,6 @@ + #!/bin/sh + # +-# Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved. + # Copyright (c) 2021, Azul Systems, Inc. All rights reserved. + # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + # diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4 -index a3e1e00b2..01ef26c10 100644 +index 9bb34363e5c..f84e8f84c60 100644 --- a/make/autoconf/hotspot.m4 +++ b/make/autoconf/hotspot.m4 -@@ -367,7 +367,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], +@@ -370,7 +370,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], AC_MSG_CHECKING([if shenandoah can be built]) if HOTSPOT_CHECK_JVM_FEATURE(shenandoahgc); then if test "x$OPENJDK_TARGET_CPU_ARCH" = "xx86" || \ - test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then + test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \ -+ test "x$OPENJDK_TARGET_CPU" = "xriscv64" ; then ++ test "x$OPENJDK_TARGET_CPU" = "xriscv64"; then AC_MSG_RESULT([yes]) else DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES shenandoahgc" diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4 -index 16e906bdc..c01fdbcce 100644 +index 16e906bdc6a..5c49fd9285d 100644 --- a/make/autoconf/libraries.m4 +++ b/make/autoconf/libraries.m4 -@@ -110,7 +110,7 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES], - GLOBAL_LIBS="" - fi - -- BASIC_JDKLIB_LIBS="" -+ BASIC_JDKLIB_LIBS="-latomic" - if test "x$TOOLCHAIN_TYPE" != xmicrosoft; then - BASIC_JDKLIB_LIBS="-ljava -ljvm" - fi -@@ -147,6 +147,8 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES], - wsock32.lib winmm.lib version.lib psapi.lib" +@@ -1,5 +1,5 @@ + # +-# Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. + # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + # + # This code is free software; you can redistribute it and/or modify it +@@ -130,6 +130,12 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES], + BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lthread" fi -+ BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic" -+ - JDKLIB_LIBS="$BASIC_JDKLIB_LIBS" - JDKEXE_LIBS="" - JVM_LIBS="$BASIC_JVM_LIBS" ++ # Because RISC-V only has word-sized atomics, it requries libatomic where ++ # other common architectures do not. So link libatomic by default. ++ if test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xriscv64; then ++ BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic" ++ fi ++ + # perfstat lib + if test "x$OPENJDK_TARGET_OS" = xaix; then + BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lperfstat" diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4 -index f89b22f5f..48d615992 100644 +index 26a58eb2ee8..67972d89248 100644 --- a/make/autoconf/platform.m4 +++ b/make/autoconf/platform.m4 -@@ -120,6 +120,12 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_CPU], - VAR_CPU_BITS=64 - VAR_CPU_ENDIAN=little - ;; -+ riscv32) -+ VAR_CPU=riscv32 -+ VAR_CPU_ARCH=riscv -+ VAR_CPU_BITS=32 -+ VAR_CPU_ENDIAN=little -+ ;; - riscv64) - VAR_CPU=riscv64 - VAR_CPU_ARCH=riscv -@@ -564,8 +570,10 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], +@@ -1,5 +1,5 @@ + # +-# Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. + # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + # + # This code is free software; you can redistribute it and/or modify it +@@ -554,6 +554,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], + HOTSPOT_$1_CPU_DEFINE=PPC64 + elif test "x$OPENJDK_$1_CPU" = xppc64le; then + HOTSPOT_$1_CPU_DEFINE=PPC64 ++ elif test "x$OPENJDK_$1_CPU" = xriscv64; then ++ HOTSPOT_$1_CPU_DEFINE=RISCV64 + + # The cpu defines below are for zero, we don't support them directly. + elif test "x$OPENJDK_$1_CPU" = xsparc; then +@@ -564,8 +566,6 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], HOTSPOT_$1_CPU_DEFINE=S390 elif test "x$OPENJDK_$1_CPU" = xs390x; then HOTSPOT_$1_CPU_DEFINE=S390 -+ elif test "x$OPENJDK_$1_CPU" = xriscv32; then -+ HOTSPOT_$1_CPU_DEFINE=RISCV32 - elif test "x$OPENJDK_$1_CPU" = xriscv64; then +- elif test "x$OPENJDK_$1_CPU" = xriscv64; then - HOTSPOT_$1_CPU_DEFINE=RISCV -+ HOTSPOT_$1_CPU_DEFINE=RISCV64 + elif test "x$OPENJDK_$1_CPU" = xloongarch64; then + HOTSPOT_$1_CPU_DEFINE=LOONGARCH64 elif test "x$OPENJDK_$1_CPU" != x; then - HOTSPOT_$1_CPU_DEFINE=$(echo $OPENJDK_$1_CPU | tr a-z A-Z) - fi diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk -index c5a3ac572..9de6f663c 100644 +index c5a3ac5724b..67f4c6f0574 100644 --- a/make/hotspot/gensrc/GensrcAdlc.gmk +++ b/make/hotspot/gensrc/GensrcAdlc.gmk @@ -1,5 +1,5 @@ @@ -439,17 +417,10 @@ index c5a3ac572..9de6f663c 100644 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it -@@ -150,6 +150,20 @@ ifeq ($(call check-jvm-feature, compiler2), true) +@@ -150,6 +150,13 @@ ifeq ($(call check-jvm-feature, compiler2), true) $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \ ))) -+ ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64) -+ AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ -+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_neon.ad \ -+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \ -+ ))) -+ endif -+ + ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv) + AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \ @@ -460,95 +431,17 @@ index c5a3ac572..9de6f663c 100644 ifeq ($(call check-jvm-feature, shenandoahgc), true) AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \ -diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad -index 1e4ee33a9..ac5d56f0f 100644 ---- a/src/hotspot/cpu/aarch64/aarch64.ad -+++ b/src/hotspot/cpu/aarch64/aarch64.ad -@@ -2062,15 +2062,17 @@ const bool Matcher::match_rule_supported(int opcode) { - return true; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - - // TODO - // identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen -- bool ret_value = match_rule_supported(opcode); -+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { -+ return false; -+ } - // Add rules here. - -- return ret_value; // Per default match rules are supported. -+ return true; // Per default match rules are supported. - } - - const bool Matcher::has_predicated_vectors(void) { -@@ -2129,6 +2131,14 @@ const int Matcher::min_vector_size(const BasicType bt) { - return size; - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ - // Vector ideal reg. - const uint Matcher::vector_ideal_reg(int len) { - switch(len) { -@@ -15515,15 +15525,16 @@ instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, - ins_pipe(pipe_class_memory); - %} - --instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, -+instruct string_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, - iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, - iRegINoSp tmp3, rFlagsReg cr) - %{ - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ predicate(((StrIndexOfCharNode*)n) ->encoding() == StrIntrinsicNode::U); - effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, - TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); - -- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %} -+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} - - ins_encode %{ - __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, -@@ -15533,6 +15544,25 @@ instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, - ins_pipe(pipe_class_memory); - %} - -+instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, -+ iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, rFlagsReg cr) -+%{ -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); -+ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); -+ -+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result" %} -+ -+ ins_encode %{ -+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, -+ $result$$Register, $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register); -+ %} -+ ins_pipe(pipe_class_memory); -+%} -+ - instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt, - iRegI_R0 result, rFlagsReg cr) - %{ diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp -index fdd2c0ca3..1a35be210 100644 +index fdd2c0ca3d7..63f193de86e 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -1593,7 +1593,9 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { } @@ -556,131 +449,21 @@ index fdd2c0ca3..1a35be210 100644 -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on aarch64"); ++ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on aarch64"); Assembler::Condition acond, ncond; switch (condition) { -diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -index 5753cc9a6..21c6fdf19 100644 ---- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -@@ -4829,6 +4829,70 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1, - BIND(DONE); - } - -+void MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, Register tmp3) -+{ -+ Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE; -+ Register cnt1_neg = cnt1; -+ Register ch1 = rscratch1; -+ Register result_tmp = rscratch2; -+ -+ cbz(cnt1, NOMATCH); -+ -+ cmp(cnt1, (u1)8); -+ br(LT, DO1_SHORT); -+ -+ orr(ch, ch, ch, LSL, 8); -+ orr(ch, ch, ch, LSL, 16); -+ orr(ch, ch, ch, LSL, 32); -+ -+ sub(cnt1, cnt1, 8); -+ mov(result_tmp, cnt1); -+ lea(str1, Address(str1, cnt1)); -+ sub(cnt1_neg, zr, cnt1); -+ -+ mov(tmp3, 0x0101010101010101); -+ -+ BIND(CH1_LOOP); -+ ldr(ch1, Address(str1, cnt1_neg)); -+ eor(ch1, ch, ch1); -+ sub(tmp1, ch1, tmp3); -+ orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f); -+ bics(tmp1, tmp1, tmp2); -+ br(NE, HAS_ZERO); -+ adds(cnt1_neg, cnt1_neg, 8); -+ br(LT, CH1_LOOP); -+ -+ cmp(cnt1_neg, (u1)8); -+ mov(cnt1_neg, 0); -+ br(LT, CH1_LOOP); -+ b(NOMATCH); -+ -+ BIND(HAS_ZERO); -+ rev(tmp1, tmp1); -+ clz(tmp1, tmp1); -+ add(cnt1_neg, cnt1_neg, tmp1, LSR, 3); -+ b(MATCH); -+ -+ BIND(DO1_SHORT); -+ mov(result_tmp, cnt1); -+ lea(str1, Address(str1, cnt1)); -+ sub(cnt1_neg, zr, cnt1); -+ BIND(DO1_LOOP); -+ ldrb(ch1, Address(str1, cnt1_neg)); -+ cmp(ch, ch1); -+ br(EQ, MATCH); -+ adds(cnt1_neg, cnt1_neg, 1); -+ br(LT, DO1_LOOP); -+ BIND(NOMATCH); -+ mov(result, -1); -+ b(DONE); -+ BIND(MATCH); -+ add(result, result_tmp, cnt1_neg); -+ BIND(DONE); -+} -+ - // Compare strings. - void MacroAssembler::string_compare(Register str1, Register str2, - Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, -diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp -index 7e23c16a4..c3d472a9a 100644 ---- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp -+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp -@@ -1260,6 +1260,9 @@ public: - void string_indexof_char(Register str1, Register cnt1, - Register ch, Register result, - Register tmp1, Register tmp2, Register tmp3); -+ void stringL_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, Register tmp3); - void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2, - FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5, - FloatRegister tmpC1, FloatRegister tmpC2, FloatRegister tmpC3, -diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad -index 51f2d9ce7..71f83521e 100644 ---- a/src/hotspot/cpu/arm/arm.ad -+++ b/src/hotspot/cpu/arm/arm.ad -@@ -1093,7 +1093,7 @@ const bool Matcher::match_rule_supported(int opcode) { - return true; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - - // TODO - // identify extra cases that we might want to provide match rules for -@@ -1121,6 +1121,14 @@ const int Matcher::vector_width_in_bytes(BasicType bt) { - return MaxVectorSize; - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ - // Vector ideal reg corresponding to specified size in bytes - const uint Matcher::vector_ideal_reg(int size) { - assert(MaxVectorSize >= size, ""); diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp -index f0a7229aa..2d06d3d58 100644 +index f0a7229aa18..cb095052534 100644 --- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -1824,7 +1824,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { } @@ -688,75 +471,44 @@ index f0a7229aa..2d06d3d58 100644 -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on arm"); ++ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on arm"); + AsmCondition acond = al; AsmCondition ncond = nv; if (opr1 != opr2) { diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp -index 847f7d61d..d081116be 100644 +index 847f7d61d2f..d74db914331 100644 --- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp -@@ -1554,7 +1554,10 @@ inline void load_to_reg(LIR_Assembler *lasm, LIR_Opr src, LIR_Opr dst) { +@@ -1,6 +1,6 @@ + /* +- * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2012, 2019, SAP SE. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2012, 2021 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -1553,8 +1553,10 @@ inline void load_to_reg(LIR_Assembler *lasm, LIR_Opr src, LIR_Opr dst) { + } } - --void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on ppc"); -+ ++ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on ppc"); + +-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { if (opr1->is_equal(opr2) || opr1->is_same_register(opr2)) { load_to_reg(this, opr1, result); // Condition doesn't matter. return; -diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad -index ebbe80a26..df66a46dc 100644 ---- a/src/hotspot/cpu/ppc/ppc.ad -+++ b/src/hotspot/cpu/ppc/ppc.ad -@@ -2242,15 +2242,17 @@ const bool Matcher::match_rule_supported(int opcode) { - return true; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - - // TODO - // identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen -- bool ret_value = match_rule_supported(opcode); -+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { -+ return false; -+ } - // Add rules here. - -- return ret_value; // Per default match rules are supported. -+ return true; // Per default match rules are supported. - } - - const bool Matcher::has_predicated_vectors(void) { -@@ -2310,6 +2312,14 @@ const int Matcher::min_vector_size(const BasicType bt) { - return max_vector_size(bt); // Same as max. - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ - // PPC implementation uses VSX load/store instructions (if - // SuperwordUseVSX) which support 4 byte but not arbitrary alignment - const bool Matcher::misaligned_vectors_ok() { diff --git a/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp new file mode 100644 -index 000000000..5661b7425 +index 00000000000..31c63abe71d --- /dev/null +++ b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp -@@ -0,0 +1,185 @@ +@@ -0,0 +1,177 @@ +/* -+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -784,13 +536,13 @@ index 000000000..5661b7425 +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/constMethod.hpp" ++#include "oops/klass.inline.hpp" +#include "oops/method.hpp" +#include "runtime/frame.inline.hpp" +#include "utilities/align.hpp" +#include "utilities/debug.hpp" +#include "utilities/macros.hpp" + -+ +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { @@ -880,7 +632,6 @@ index 000000000..5661b7425 + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a + // skeletal state -+ + assert_cond(method != NULL && caller != NULL && interpreter_frame != NULL); + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = (method->max_locals() - method->size_of_parameters()) * @@ -894,14 +645,6 @@ index 000000000..5661b7425 + // NOTE the difference in using sender_sp and interpreter_frame_sender_sp + // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) + // and sender_sp is fp -+ // -+ // The interpreted method entry on riscv aligns SP to 16 bytes -+ // before generating the fixed part of the activation frame. So there -+ // may be a gap between the locals block and the saved sender SP. For -+ // an interpreted caller we need to recreate this gap and exactly -+ // align the incoming parameters with the caller's temporary -+ // expression stack. For other types of caller frame it doesn't -+ // matter. + intptr_t* locals = NULL; + if (caller->is_interpreted_frame()) { + locals = caller->interpreter_frame_last_sp() + caller_actual_parameters - 1; @@ -935,6 +678,7 @@ index 000000000..5661b7425 + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + + extra_locals); + } ++ + *interpreter_frame->interpreter_frame_cache_addr() = + method->constants()->cache(); + *interpreter_frame->interpreter_frame_mirror_addr() = @@ -942,14 +686,14 @@ index 000000000..5661b7425 +} diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp new file mode 100644 -index 000000000..40ecf1a6c +index 00000000000..f15ef5304c5 --- /dev/null +++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp -@@ -0,0 +1,365 @@ +@@ -0,0 +1,372 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -969,6 +713,7 @@ index 000000000..40ecf1a6c + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. ++ * + */ + +#include @@ -983,8 +728,6 @@ index 000000000..40ecf1a6c +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/sharedRuntime.hpp" + -+#define __ _masm. -+ +int AbstractAssembler::code_fill_byte() { + return 0; +} @@ -999,7 +742,7 @@ index 000000000..40ecf1a6c + } +} + -+void Assembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) { ++void Assembler::addw(Register Rd, Register Rn, int64_t increment, Register temp) { + if (is_imm_in_range(increment, 12, 0)) { + addiw(Rd, Rn, increment); + } else { @@ -1019,7 +762,7 @@ index 000000000..40ecf1a6c + } +} + -+void Assembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) { ++void Assembler::subw(Register Rd, Register Rn, int64_t decrement, Register temp) { + if (is_imm_in_range(-decrement, 12, 0)) { + addiw(Rd, Rn, -decrement); + } else { @@ -1033,11 +776,12 @@ index 000000000..40ecf1a6c + add_uw(Rd, Rs, zr); +} + -+void Assembler::li(Register Rd, int64_t imm) { ++void Assembler::_li(Register Rd, int64_t imm) { + // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff + int shift = 12; + int64_t upper = imm, lower = imm; -+ // Split imm to a lower 12-bit sign-extended part and the remainder, because addi will sign-extend the lower imm. ++ // Split imm to a lower 12-bit sign-extended part and the remainder, ++ // because addi will sign-extend the lower imm. + lower = ((int32_t)imm << 20) >> 20; + upper -= lower; + @@ -1051,8 +795,7 @@ index 000000000..40ecf1a6c + if (lower != 0) { + addi(Rd, Rd, lower); + } -+ } -+ else { ++ } else { + // 32-bit integer + Register hi_Rd = zr; + if (upper != 0) { @@ -1066,30 +809,30 @@ index 000000000..40ecf1a6c +} + +void Assembler::li64(Register Rd, int64_t imm) { -+ // Load upper 32 bits. Upper = imm[63:32], but if imm[31] = 1 or (imm[31:28] == 0x7ff && imm[19] == 1), -+ // upper = imm[63:32] + 1. -+ int64_t lower = imm & 0xffffffff; -+ lower -= ((lower << 44) >> 44); -+ int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; -+ int32_t upper = (tmp_imm - (int32_t)lower) >> 32; -+ -+ // Load upper 32 bits -+ int64_t up = upper, lo = upper; -+ lo = (lo << 52) >> 52; -+ up -= lo; -+ up = (int32_t)up; -+ lui(Rd, up); -+ addi(Rd, Rd, lo); -+ -+ // Load the rest 32 bits. -+ slli(Rd, Rd, 12); -+ addi(Rd, Rd, (int32_t)lower >> 20); -+ slli(Rd, Rd, 12); -+ lower = ((int32_t)imm << 12) >> 20; -+ addi(Rd, Rd, lower); -+ slli(Rd, Rd, 8); -+ lower = imm & 0xff; -+ addi(Rd, Rd, lower); ++ // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or ++ // (imm[31:28] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1. ++ int64_t lower = imm & 0xffffffff; ++ lower -= ((lower << 44) >> 44); ++ int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; ++ int32_t upper = (tmp_imm - (int32_t)lower) >> 32; ++ ++ // Load upper 32 bits ++ int64_t up = upper, lo = upper; ++ lo = (lo << 52) >> 52; ++ up -= lo; ++ up = (int32_t)up; ++ lui(Rd, up); ++ addi(Rd, Rd, lo); ++ ++ // Load the rest 32 bits. ++ slli(Rd, Rd, 12); ++ addi(Rd, Rd, (int32_t)lower >> 20); ++ slli(Rd, Rd, 12); ++ lower = ((int32_t)imm << 12) >> 20; ++ addi(Rd, Rd, lower); ++ slli(Rd, Rd, 8); ++ lower = imm & 0xff; ++ addi(Rd, Rd, lower); +} + +void Assembler::li32(Register Rd, int32_t imm) { @@ -1162,15 +905,16 @@ index 000000000..40ecf1a6c + +#define INSN(NAME, REGISTER) \ + void Assembler::NAME(const Address &adr, Register temp) { \ -+ switch(adr.getMode()) { \ ++ switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(adr.target(), temp); \ + break; \ + } \ + case Address::base_plus_offset: { \ -+ Address tmp_adr = form_address(adr.base(), adr.offset(), 12, temp); \ -+ jalr(REGISTER, tmp_adr.base(), tmp_adr.offset()); \ ++ int32_t offset = 0; \ ++ baseOffset(temp, adr, offset); \ ++ jalr(REGISTER, temp, offset); \ + break; \ + } \ + default: \ @@ -1230,9 +974,9 @@ index 000000000..40ecf1a6c + } +#endif + assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1), -+ "bit 47 overflows in address constant"); -+ // Load upper 31 bits -+ int32_t imm = imm64 >> 17; ++ "48-bit overflow in address constant"); ++ // Load upper 32 bits ++ int32_t imm = imm64 >> 16; + int64_t upper = imm, lower = imm; + lower = (lower << 52) >> 52; + upper -= lower; @@ -1240,13 +984,13 @@ index 000000000..40ecf1a6c + lui(Rd, upper); + addi(Rd, Rd, lower); + -+ // Load the rest 17 bits. ++ // Load the rest 16 bits. + slli(Rd, Rd, 11); -+ addi(Rd, Rd, (imm64 >> 6) & 0x7ff); -+ slli(Rd, Rd, 6); ++ addi(Rd, Rd, (imm64 >> 5) & 0x7ff); ++ slli(Rd, Rd, 5); + -+ // Here, remove the addi instruct and return the offset directly. This offset will be used by following jalr/ld. -+ offset = imm64 & 0x3f; ++ // This offset will be used by following jalr/ld. ++ offset = imm64 & 0x1f; +} + +void Assembler::movptr(Register Rd, uintptr_t imm64) { @@ -1259,6 +1003,13 @@ index 000000000..40ecf1a6c + addi(Rd, Rd, offset); +} + ++void Assembler::ifence() { ++ fence_i(); ++ if (UseConservativeFence) { ++ fence(ir, ir); ++ } ++} ++ +#define INSN(NAME, NEG_INSN) \ + void Assembler::NAME(Register Rs, Register Rt, const address &dest) { \ + NEG_INSN(Rt, Rs, dest); \ @@ -1313,14 +1064,14 @@ index 000000000..40ecf1a6c +} diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp new file mode 100644 -index 000000000..d4da30ed6 +index 00000000000..4923962a496 --- /dev/null +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp -@@ -0,0 +1,2004 @@ +@@ -0,0 +1,3047 @@ +/* -+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -1348,6 +1099,7 @@ index 000000000..d4da30ed6 + +#include "asm/register.hpp" +#include "assembler_riscv.inline.hpp" ++#include "metaprogramming/enableIf.hpp" + +#define XLEN 64 + @@ -1359,10 +1111,10 @@ index 000000000..d4da30ed6 +class Argument { + public: + enum { -+ n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...) -+ n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... ) ++ n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...) ++ n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... ) + -+ n_int_register_parameters_j = 8, // x11, ... x17, x10 (rj_rarg0, j_rarg1, ...) ++ n_int_register_parameters_j = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...) + n_float_register_parameters_j = 8 // f10, f11, ... f17 (j_farg0, j_farg1, ...) + }; +}; @@ -1386,7 +1138,21 @@ index 000000000..d4da30ed6 +REGISTER_DECLARATION(FloatRegister, c_farg6, f16); +REGISTER_DECLARATION(FloatRegister, c_farg7, f17); + -+// java function register(caller-save registers) ++// Symbolically name the register arguments used by the Java calling convention. ++// We have control over the convention for java so we can do what we please. ++// What pleases us is to offset the java calling convention so that when ++// we call a suitable jni method the arguments are lined up and we don't ++// have to do much shuffling. A suitable jni method is non-static and a ++// small number of arguments. ++// ++// |------------------------------------------------------------------------| ++// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7 | ++// |------------------------------------------------------------------------| ++// | x10 x11 x12 x13 x14 x15 x16 x17 | ++// |------------------------------------------------------------------------| ++// | j_rarg7 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6 | ++// |------------------------------------------------------------------------| ++ +REGISTER_DECLARATION(Register, j_rarg0, c_rarg1); +REGISTER_DECLARATION(Register, j_rarg1, c_rarg2); +REGISTER_DECLARATION(Register, j_rarg2, c_rarg3); @@ -1396,6 +1162,8 @@ index 000000000..d4da30ed6 +REGISTER_DECLARATION(Register, j_rarg6, c_rarg7); +REGISTER_DECLARATION(Register, j_rarg7, c_rarg0); + ++// Java floating args are passed as per C ++ +REGISTER_DECLARATION(FloatRegister, j_farg0, f10); +REGISTER_DECLARATION(FloatRegister, j_farg1, f11); +REGISTER_DECLARATION(FloatRegister, j_farg2, f12); @@ -1412,6 +1180,9 @@ index 000000000..d4da30ed6 +// thread pointer +REGISTER_DECLARATION(Register, tp, x4); + ++// registers used to hold VM data either temporarily within a method ++// or across method calls ++ +// volatile (caller-save) registers + +// current method -- must be in a call-clobbered register @@ -1434,9 +1205,6 @@ index 000000000..d4da30ed6 +// locals on stack +REGISTER_DECLARATION(Register, xlocals, x24); + -+/* If you use x4(tp) as java thread pointer according to the instruction manual, -+ * it overlaps with the register used by c++ thread. -+ */ +// java thread pointer +REGISTER_DECLARATION(Register, xthread, x23); +// bytecode pointer @@ -1446,13 +1214,13 @@ index 000000000..d4da30ed6 +// Java stack pointer +REGISTER_DECLARATION(Register, esp, x20); + -+// tempory register(caller-save registers) ++// temporary register(caller-save registers) +REGISTER_DECLARATION(Register, t0, x5); +REGISTER_DECLARATION(Register, t1, x6); +REGISTER_DECLARATION(Register, t2, x7); + +const Register g_INTArgReg[Argument::n_int_register_parameters_c] = { -+ c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7 ++ c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7 +}; + +const FloatRegister g_FPArgReg[Argument::n_float_register_parameters_c] = { @@ -1469,6 +1237,7 @@ index 000000000..d4da30ed6 + + private: + Register _base; ++ Register _index; + int64_t _offset; + enum mode _mode; + @@ -1481,46 +1250,40 @@ index 000000000..d4da30ed6 + + public: + Address() -+ : _base(noreg), _offset(0), _mode(no_mode), _target(NULL) { } ++ : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { } + Address(Register r) -+ : _base(r), _offset(0), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, int o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, long o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, long long o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, unsigned int o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, unsigned long o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, unsigned long long o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+#ifdef ASSERT ++ : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { } ++ ++ template::value)> ++ Address(Register r, T o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) {} ++ + Address(Register r, ByteSize disp) -+ : _base(r), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(NULL) { } -+#endif ++ : Address(r, in_bytes(disp)) {} + Address(address target, RelocationHolder const& rspec) + : _base(noreg), ++ _index(noreg), + _offset(0), + _mode(literal), + _rspec(rspec), -+ _target(target) { } ++ _target(target) { } + Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type); + + const Register base() const { -+ guarantee((_mode == base_plus_offset || _mode == pcrel || _mode == literal), "wrong mode"); ++ guarantee((_mode == base_plus_offset | _mode == pcrel | _mode == literal), "wrong mode"); + return _base; + } + long offset() const { + return _offset; + } -+ ++ Register index() const { ++ return _index; ++ } + mode getMode() const { + return _mode; + } + -+ bool uses(Register reg) const { return _base == reg;} ++ bool uses(Register reg) const { return _base == reg; } + const address target() const { return _target; } + const RelocationHolder& rspec() const { return _rspec; } + ~Address() { @@ -1575,6 +1338,14 @@ index 000000000..d4da30ed6 + + enum { instruction_size = 4 }; + ++ //---< calculate length of instruction >--- ++ // We just use the values set above. ++ // instruction must start at passed address ++ static unsigned int instr_len(unsigned char *instr) { return instruction_size; } ++ ++ //---< longest instructions >--- ++ static unsigned int instr_maxlen() { return instruction_size; } ++ + enum RoundingMode { + rne = 0b000, // round to Nearest, ties to Even + rtz = 0b001, // round towards Zero @@ -1584,34 +1355,41 @@ index 000000000..d4da30ed6 + rdy = 0b111, // in instruction's rm field, selects dynamic rounding mode.In Rounding Mode register, Invalid. + }; + -+ Address form_address_complex(Register base, int64_t offset, int8_t expect_offbits, Register temp = t0) { -+ assert_different_registers(noreg, temp, base); -+ int64_t upper = offset, lower = offset; -+ -+ int8_t shift = 64 - expect_offbits; -+ lower = (offset << shift) >> shift; -+ upper -= lower; -+ -+ li(temp, upper); -+ add(temp, temp, base); -+ return Address(temp, lower); ++ void baseOffset32(Register Rd, const Address &adr, int32_t &offset) { ++ assert(Rd != noreg, "Rd must not be empty register!"); ++ guarantee(Rd != adr.base(), "should use different registers!"); ++ if (is_offset_in_range(adr.offset(), 32)) { ++ int32_t imm = adr.offset(); ++ int32_t upper = imm, lower = imm; ++ lower = (imm << 20) >> 20; ++ upper -= lower; ++ lui(Rd, upper); ++ offset = lower; ++ } else { ++ movptr_with_offset(Rd, (address)(uintptr_t)adr.offset(), offset); ++ } ++ add(Rd, Rd, adr.base()); + } + -+ Address form_address(Register base, int64_t offset, int8_t expect_offbits, Register temp = t0) { -+ if (is_offset_in_range(offset, expect_offbits)) { -+ return Address(base, offset); ++ void baseOffset(Register Rd, const Address &adr, int32_t &offset) { ++ if (is_offset_in_range(adr.offset(), 12)) { ++ assert(Rd != noreg, "Rd must not be empty register!"); ++ addi(Rd, adr.base(), adr.offset()); ++ offset = 0; ++ } else { ++ baseOffset32(Rd, adr, offset); + } -+ return form_address_complex(base, offset, expect_offbits, temp); + } + -+ void li(Register Rd, int64_t imm); // optimized load immediate ++ void _li(Register Rd, int64_t imm); // optimized load immediate + void li32(Register Rd, int32_t imm); + void li64(Register Rd, int64_t imm); + void movptr(Register Rd, address addr); + void movptr_with_offset(Register Rd, address addr, int32_t &offset); + void movptr(Register Rd, uintptr_t imm64); ++ void ifence(); + void j(const address &dest, Register temp = t0); -+ void j(const Address &adr, Register temp = t0) ; ++ void j(const Address &adr, Register temp = t0); + void j(Label &l, Register temp = t0); + void jal(Label &l, Register temp = t0); + void jal(const address &dest, Register temp = t0); @@ -1633,7 +1411,7 @@ index 000000000..d4da30ed6 + static inline uint32_t extract(uint32_t val, unsigned msb, unsigned lsb) { + assert_cond(msb >= lsb && msb <= 31); + unsigned nbits = msb - lsb + 1; -+ uint32_t mask = checked_cast(right_n_bits(nbits)); ++ uint32_t mask = (1U << nbits) - 1; + uint32_t result = val >> lsb; + result &= mask; + return result; @@ -1650,8 +1428,8 @@ index 000000000..d4da30ed6 + assert_cond(a != NULL); + assert_cond(msb >= lsb && msb <= 31); + unsigned nbits = msb - lsb + 1; -+ guarantee(val < (1ULL << nbits), "Field too big for insn"); -+ unsigned mask = checked_cast(right_n_bits(nbits)); ++ guarantee(val < (1U << nbits), "Field too big for insn"); ++ unsigned mask = (1U << nbits) - 1; + val <<= lsb; + mask <<= lsb; + unsigned target = *(unsigned *)a; @@ -1680,11 +1458,11 @@ index 000000000..d4da30ed6 + emit_int32((jint)insn); + } + -+ void halt() { ++ void _halt() { + emit_int32(0); + } + -+// Rigster Instruction ++// Register Instruction +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + unsigned insn = 0; \ @@ -1697,18 +1475,18 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + -+ INSN(add, 0b0110011, 0b000, 0b0000000); -+ INSN(sub, 0b0110011, 0b000, 0b0100000); -+ INSN(andr, 0b0110011, 0b111, 0b0000000); -+ INSN(orr, 0b0110011, 0b110, 0b0000000); -+ INSN(xorr, 0b0110011, 0b100, 0b0000000); ++ INSN(_add, 0b0110011, 0b000, 0b0000000); ++ INSN(_sub, 0b0110011, 0b000, 0b0100000); ++ INSN(_andr, 0b0110011, 0b111, 0b0000000); ++ INSN(_orr, 0b0110011, 0b110, 0b0000000); ++ INSN(_xorr, 0b0110011, 0b100, 0b0000000); + INSN(sll, 0b0110011, 0b001, 0b0000000); + INSN(sra, 0b0110011, 0b101, 0b0100000); + INSN(srl, 0b0110011, 0b101, 0b0000000); + INSN(slt, 0b0110011, 0b010, 0b0000000); + INSN(sltu, 0b0110011, 0b011, 0b0000000); -+ INSN(addw, 0b0111011, 0b000, 0b0000000); -+ INSN(subw, 0b0111011, 0b000, 0b0100000); ++ INSN(_addw, 0b0111011, 0b000, 0b0000000); ++ INSN(_subw, 0b0111011, 0b000, 0b0100000); + INSN(sllw, 0b0111011, 0b001, 0b0000000); + INSN(sraw, 0b0111011, 0b101, 0b0100000); + INSN(srlw, 0b0111011, 0b101, 0b0000000); @@ -1726,22 +1504,20 @@ index 000000000..d4da30ed6 + INSN(remw, 0b0111011, 0b110, 0b0000001); + INSN(remuw, 0b0111011, 0b111, 0b0000001); + -+ // Vector Configuration Instruction -+ INSN(vsetvl, 0b1010111, 0b111, 0b1000000); -+ +#undef INSN + +#define INSN_ENTRY_RELOC(result_type, header) \ + result_type header { \ ++ InstructionMark im(this); \ + guarantee(rtype == relocInfo::internal_word_type, \ + "only internal_word_type relocs make sense here"); \ -+ code_section()->relocate(pc(), InternalAddress(dest).rspec()); ++ code_section()->relocate(inst_mark(), InternalAddress(dest).rspec()); + + // Load/store register (all modes) +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ unsigned insn = 0; \ + int32_t val = offset & 0xfff; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ @@ -1749,7 +1525,19 @@ index 000000000..d4da30ed6 + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 31, 20, val); \ + emit(insn); \ -+ } \ ++ } ++ ++ INSN(lb, 0b0000011, 0b000); ++ INSN(lbu, 0b0000011, 0b100); ++ INSN(lh, 0b0000011, 0b001); ++ INSN(lhu, 0b0000011, 0b101); ++ INSN(_lw, 0b0000011, 0b010); ++ INSN(lwu, 0b0000011, 0b110); ++ INSN(_ld, 0b0000011, 0b011); ++ ++#undef INSN ++ ++#define INSN(NAME) \ + void NAME(Register Rd, address dest) { \ + assert_cond(dest != NULL); \ + int64_t distance = (dest - pc()); \ @@ -1766,7 +1554,7 @@ index 000000000..d4da30ed6 + NAME(Rd, dest); \ + } \ + void NAME(Register Rd, const Address &adr, Register temp = t0) { \ -+ switch(adr.getMode()) { \ ++ switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rd, adr.target()); \ @@ -1776,7 +1564,14 @@ index 000000000..d4da30ed6 + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rd, adr.base(), adr.offset()); \ + } else { \ -+ NAME(Rd, form_address_complex(adr.base(), adr.offset(), 12, Rd == adr.base() ? temp : Rd)); \ ++ int32_t offset = 0; \ ++ if (Rd == adr.base()) { \ ++ baseOffset32(temp, adr, offset); \ ++ NAME(Rd, temp, offset); \ ++ } else { \ ++ baseOffset32(Rd, adr, offset); \ ++ NAME(Rd, Rd, offset); \ ++ } \ + } \ + break; \ + } \ @@ -1788,20 +1583,20 @@ index 000000000..d4da30ed6 + wrap_label(Rd, L, &Assembler::NAME); \ + } + -+ INSN(lb, 0b0000011, 0b000); -+ INSN(lbu, 0b0000011, 0b100); -+ INSN(ld, 0b0000011, 0b011); -+ INSN(lh, 0b0000011, 0b001); -+ INSN(lhu, 0b0000011, 0b101); -+ INSN(lw, 0b0000011, 0b010); -+ INSN(lwu, 0b0000011, 0b110); ++ INSN(lb); ++ INSN(lbu); ++ INSN(lh); ++ INSN(lhu); ++ INSN(lw); ++ INSN(lwu); ++ INSN(ld); + +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ unsigned insn = 0; \ + uint32_t val = offset & 0xfff; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ @@ -1809,7 +1604,14 @@ index 000000000..d4da30ed6 + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 31, 20, val); \ + emit(insn); \ -+ } \ ++ } ++ ++ INSN(flw, 0b0000111, 0b010); ++ INSN(_fld, 0b0000111, 0b011); ++ ++#undef INSN ++ ++#define INSN(NAME) \ + void NAME(FloatRegister Rd, address dest, Register temp = t0) { \ + assert_cond(dest != NULL); \ + int64_t distance = (dest - pc()); \ @@ -1826,7 +1628,7 @@ index 000000000..d4da30ed6 + NAME(Rd, dest, temp); \ + } \ + void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) { \ -+ switch(adr.getMode()) { \ ++ switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rd, adr.target(), temp); \ @@ -1836,7 +1638,9 @@ index 000000000..d4da30ed6 + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rd, adr.base(), adr.offset()); \ + } else { \ -+ NAME(Rd, form_address_complex(adr.base(), adr.offset(), 12, temp)); \ ++ int32_t offset = 0; \ ++ baseOffset32(temp, adr, offset); \ ++ NAME(Rd, temp, offset); \ + } \ + break; \ + } \ @@ -1845,14 +1649,14 @@ index 000000000..d4da30ed6 + } \ + } + -+ INSN(flw, 0b0000111, 0b010); -+ INSN(fld, 0b0000111, 0b011); ++ INSN(flw); ++ INSN(fld); +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(Register Rs1, Register Rs2, const int64_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid."); \ ++ unsigned insn = 0; \ + uint32_t val = offset & 0x1fff; \ + uint32_t val11 = (val >> 11) & 0x1; \ + uint32_t val12 = (val >> 12) & 0x1; \ @@ -1867,7 +1671,18 @@ index 000000000..d4da30ed6 + patch((address)&insn, 30, 25, high); \ + patch((address)&insn, 31, val12); \ + emit(insn); \ -+ } \ ++ } ++ ++ INSN(_beq, 0b1100011, 0b000); ++ INSN(_bne, 0b1100011, 0b001); ++ INSN(bge, 0b1100011, 0b101); ++ INSN(bgeu, 0b1100011, 0b111); ++ INSN(blt, 0b1100011, 0b100); ++ INSN(bltu, 0b1100011, 0b110); ++ ++#undef INSN ++ ++#define INSN(NAME) \ + void NAME(Register Rs1, Register Rs2, const address dest) { \ + assert_cond(dest != NULL); \ + int64_t offset = (dest - pc()); \ @@ -1878,12 +1693,12 @@ index 000000000..d4da30ed6 + NAME(Rs1, Rs2, dest); \ + } + -+ INSN(beq, 0b1100011, 0b000); -+ INSN(bge, 0b1100011, 0b101); -+ INSN(bgeu, 0b1100011, 0b111); -+ INSN(blt, 0b1100011, 0b100); -+ INSN(bltu, 0b1100011, 0b110); -+ INSN(bne, 0b1100011, 0b001); ++ INSN(beq); ++ INSN(bne); ++ INSN(bge); ++ INSN(bgeu); ++ INSN(blt); ++ INSN(bltu); + +#undef INSN + @@ -1903,8 +1718,8 @@ index 000000000..d4da30ed6 + +#define INSN(NAME, REGISTER, op, funct3) \ + void NAME(REGISTER Rs1, Register Rs2, const int32_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ unsigned insn = 0; \ + uint32_t val = offset & 0xfff; \ + uint32_t low = val & 0x1f; \ + uint32_t high = (val >> 5) & 0x7f; \ @@ -1916,16 +1731,27 @@ index 000000000..d4da30ed6 + patch((address)&insn, 31, 25, high); \ + emit(insn); \ + } \ ++ ++ INSN(sb, Register, 0b0100011, 0b000); ++ INSN(sh, Register, 0b0100011, 0b001); ++ INSN(_sw, Register, 0b0100011, 0b010); ++ INSN(_sd, Register, 0b0100011, 0b011); ++ INSN(fsw, FloatRegister, 0b0100111, 0b010); ++ INSN(_fsd, FloatRegister, 0b0100111, 0b011); ++ ++#undef INSN ++ ++#define INSN(NAME, REGISTER) \ + INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, relocInfo::relocType rtype, Register temp = t0)) \ + NAME(Rs, dest, temp); \ + } + -+ INSN(sb, Register, 0b0100011, 0b000); -+ INSN(sh, Register, 0b0100011, 0b001); -+ INSN(sw, Register, 0b0100011, 0b010); -+ INSN(sd, Register, 0b0100011, 0b011); -+ INSN(fsw, FloatRegister, 0b0100111, 0b010); -+ INSN(fsd, FloatRegister, 0b0100111, 0b011); ++ INSN(sb, Register); ++ INSN(sh, Register); ++ INSN(sw, Register); ++ INSN(sd, Register); ++ INSN(fsw, FloatRegister); ++ INSN(fsd, FloatRegister); + +#undef INSN + @@ -1944,7 +1770,7 @@ index 000000000..d4da30ed6 + } \ + } \ + void NAME(Register Rs, const Address &adr, Register temp = t0) { \ -+ switch(adr.getMode()) { \ ++ switch (adr.getMode()) { \ + case Address::literal: { \ + assert_different_registers(Rs, temp); \ + code_section()->relocate(pc(), adr.rspec()); \ @@ -1955,8 +1781,10 @@ index 000000000..d4da30ed6 + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rs, adr.base(), adr.offset()); \ + } else { \ ++ int32_t offset= 0; \ + assert_different_registers(Rs, temp); \ -+ NAME(Rs, form_address_complex(adr.base(), adr.offset(), 12, temp)); \ ++ baseOffset32(temp, adr, offset); \ ++ NAME(Rs, temp, offset); \ + } \ + break; \ + } \ @@ -1986,7 +1814,7 @@ index 000000000..d4da30ed6 + } \ + } \ + void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) { \ -+ switch(adr.getMode()) { \ ++ switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rs, adr.target(), temp); \ @@ -1996,7 +1824,9 @@ index 000000000..d4da30ed6 + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rs, adr.base(), adr.offset()); \ + } else { \ -+ NAME(Rs, form_address_complex(adr.base(), adr.offset(), 12, temp)); \ ++ int32_t offset = 0; \ ++ baseOffset32(temp, adr, offset); \ ++ NAME(Rs, temp, offset); \ + } \ + break; \ + } \ @@ -2050,8 +1880,8 @@ index 000000000..d4da30ed6 + +#define INSN(NAME, op) \ + void NAME(Register Rd, const int32_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_imm_in_range(offset, 20, 1), "offset is invalid."); \ ++ unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff)); \ @@ -2059,7 +1889,13 @@ index 000000000..d4da30ed6 + patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff)); \ + patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1)); \ + emit(insn); \ -+ } \ ++ } ++ ++ INSN(_jal, 0b1101111); ++ ++#undef INSN ++ ++#define INSN(NAME) \ + void NAME(Register Rd, const address dest, Register temp = t0) { \ + assert_cond(dest != NULL); \ + int64_t offset = dest - pc(); \ @@ -2077,7 +1913,7 @@ index 000000000..d4da30ed6 + wrap_label(Rd, L, temp, &Assembler::NAME); \ + } + -+ INSN(jal, 0b1101111); ++ INSN(jal); + +#undef INSN + @@ -2085,8 +1921,8 @@ index 000000000..d4da30ed6 + +#define INSN(NAME, op, funct) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 14, 12, funct); \ @@ -2096,7 +1932,7 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + -+ INSN(jalr, 0b1100111, 0b000); ++ INSN(_jalr, 0b1100111, 0b000); + +#undef INSN + @@ -2130,8 +1966,10 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + ++ INSN(fence_i, 0b0001111, 0b001, 0b000000000000); + INSN(ecall, 0b1110011, 0b000, 0b000000000000); -+ INSN(ebreak, 0b1110011, 0b000, 0b000000000001); ++ INSN(_ebreak, 0b1110011, 0b000, 0b000000000001); ++ +#undef INSN + +enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11}; @@ -2239,12 +2077,12 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + -+ INSN(addi, 0b0010011, 0b000); -+ INSN(slti, 0b0010011, 0b010); -+ INSN(addiw, 0b0011011, 0b000); -+ INSN(and_imm12, 0b0010011, 0b111); -+ INSN(ori, 0b0010011, 0b110); -+ INSN(xori, 0b0010011, 0b100); ++ INSN(_addi, 0b0010011, 0b000); ++ INSN(slti, 0b0010011, 0b010); ++ INSN(_addiw, 0b0011011, 0b000); ++ INSN(_and_imm12, 0b0010011, 0b111); ++ INSN(ori, 0b0010011, 0b110); ++ INSN(xori, 0b0010011, 0b100); + +#undef INSN + @@ -2278,9 +2116,9 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + -+ INSN(slli, 0b0010011, 0b001, 0b000000); -+ INSN(srai, 0b0010011, 0b101, 0b010000); -+ INSN(srli, 0b0010011, 0b101, 0b000000); ++ INSN(_slli, 0b0010011, 0b001, 0b000000); ++ INSN(_srai, 0b0010011, 0b101, 0b010000); ++ INSN(_srli, 0b0010011, 0b101, 0b000000); + +#undef INSN + @@ -2316,7 +2154,7 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + -+ INSN(lui, 0b0110111); ++ INSN(_lui, 0b0110111); + INSN(auipc, 0b0010111); + +#undef INSN @@ -2592,6 +2430,23 @@ index 000000000..d4da30ed6 + +#undef patch_vtype + ++#define INSN(NAME, op, funct3, funct7) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 31, 25, funct7); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ patch_reg((address)&insn, 20, Rs2); \ ++ emit(insn); \ ++ } ++ ++ // Vector Configuration Instruction ++ INSN(vsetvl, 0b1010111, 0b111, 0b1000000); ++ ++#undef INSN ++ +enum VectorMask { + v0_t = 0b0, + unmasked = 0b1 @@ -3159,7 +3014,6 @@ index 000000000..d4da30ed6 + +// ==================================== +// RISC-V Bit-Manipulation Extension -+// Currently only support Zba and Zbb. +// ==================================== +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ @@ -3238,7 +3092,7 @@ index 000000000..d4da30ed6 +#undef INSN + +#define INSN(NAME, op, funct3, funct7) \ -+ void NAME(Register Rd, Register Rs1, unsigned shamt){ \ ++ void NAME(Register Rd, Register Rs1, unsigned shamt) {\ + guarantee(shamt <= 0x1f, "Shamt is invalid"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ @@ -3251,9 +3105,966 @@ index 000000000..d4da30ed6 + } + + INSN(roriw, 0b0011011, 0b101, 0b0110000); -+ ++ ++#undef INSN ++ ++// ======================================== ++// RISC-V Compressed Instructions Extension ++// ======================================== ++// Note: ++// 1. When UseRVC is enabled, 32-bit instructions under 'CompressibleRegion's will be ++// transformed to 16-bit instructions if compressible. ++// 2. RVC instructions in Assembler always begin with 'c_' prefix, as 'c_li', ++// but most of time we have no need to explicitly use these instructions. ++// 3. 'CompressibleRegion' is introduced to hint instructions in this Region's RTTI range ++// are qualified to be compressed with their 2-byte versions. ++// An example: ++// ++// CompressibleRegion cr(_masm); ++// __ andr(...); // this instruction could change to c.and if able to ++// ++// 4. Using -XX:PrintAssemblyOptions=no-aliases could distinguish RVC instructions from ++// normal ones. ++// ++ ++private: ++ bool _in_compressible_region; ++public: ++ bool in_compressible_region() const { return _in_compressible_region; } ++ void set_in_compressible_region(bool b) { _in_compressible_region = b; } ++public: ++ ++ // a compressible region ++ class CompressibleRegion : public StackObj { ++ protected: ++ Assembler *_masm; ++ bool _saved_in_compressible_region; ++ public: ++ CompressibleRegion(Assembler *_masm) ++ : _masm(_masm) ++ , _saved_in_compressible_region(_masm->in_compressible_region()) { ++ _masm->set_in_compressible_region(true); ++ } ++ ~CompressibleRegion() { ++ _masm->set_in_compressible_region(_saved_in_compressible_region); ++ } ++ }; ++ ++ // patch a 16-bit instruction. ++ static void c_patch(address a, unsigned msb, unsigned lsb, uint16_t val) { ++ assert_cond(a != NULL); ++ assert_cond(msb >= lsb && msb <= 15); ++ unsigned nbits = msb - lsb + 1; ++ guarantee(val < (1U << nbits), "Field too big for insn"); ++ uint16_t mask = (1U << nbits) - 1; ++ val <<= lsb; ++ mask <<= lsb; ++ uint16_t target = *(uint16_t *)a; ++ target &= ~mask; ++ target |= val; ++ *(uint16_t *)a = target; ++ } ++ ++ static void c_patch(address a, unsigned bit, uint16_t val) { ++ c_patch(a, bit, bit, val); ++ } ++ ++ // patch a 16-bit instruction with a general purpose register ranging [0, 31] (5 bits) ++ static void c_patch_reg(address a, unsigned lsb, Register reg) { ++ c_patch(a, lsb + 4, lsb, reg->encoding_nocheck()); ++ } ++ ++ // patch a 16-bit instruction with a general purpose register ranging [8, 15] (3 bits) ++ static void c_patch_compressed_reg(address a, unsigned lsb, Register reg) { ++ c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck()); ++ } ++ ++ // patch a 16-bit instruction with a float register ranging [0, 31] (5 bits) ++ static void c_patch_reg(address a, unsigned lsb, FloatRegister reg) { ++ c_patch(a, lsb + 4, lsb, reg->encoding_nocheck()); ++ } ++ ++ // patch a 16-bit instruction with a float register ranging [8, 15] (3 bits) ++ static void c_patch_compressed_reg(address a, unsigned lsb, FloatRegister reg) { ++ c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck()); ++ } ++ ++// -------------- RVC Instruction Definitions -------------- ++ ++ void c_nop() { ++ c_addi(x0, 0); ++ } ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd_Rs1, int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 6, 0)); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ ++ c_patch_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_addi, 0b000, 0b01); ++ INSN(c_addiw, 0b001, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 10, 0)); \ ++ assert_cond((imm & 0b1111) == 0); \ ++ assert_cond(imm != 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 4, 3, (imm & right_n_bits(9)) >> 7); \ ++ c_patch((address)&insn, 5, 5, (imm & nth_bit(6)) >> 6); \ ++ c_patch((address)&insn, 6, 6, (imm & nth_bit(4)) >> 4); \ ++ c_patch_reg((address)&insn, 7, sp); \ ++ c_patch((address)&insn, 12, 12, (imm & nth_bit(9)) >> 9); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_addi16sp, 0b011, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 10, 0)); \ ++ assert_cond((uimm & 0b11) == 0); \ ++ assert_cond(uimm != 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_compressed_reg((address)&insn, 2, Rd); \ ++ c_patch((address)&insn, 5, 5, (uimm & nth_bit(3)) >> 3); \ ++ c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \ ++ c_patch((address)&insn, 10, 7, (uimm & right_n_bits(10)) >> 6); \ ++ c_patch((address)&insn, 12, 11, (uimm & right_n_bits(6)) >> 4); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_addi4spn, 0b000, 0b00); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd_Rs1, uint32_t shamt) { \ ++ assert_cond(is_unsigned_imm_in_range(shamt, 6, 0)); \ ++ assert_cond(shamt != 0); \ ++ assert_cond(Rd_Rs1 != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5))); \ ++ c_patch_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_slli, 0b000, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, funct2, op) \ ++ void NAME(Register Rd_Rs1, uint32_t shamt) { \ ++ assert_cond(is_unsigned_imm_in_range(shamt, 6, 0)); \ ++ assert_cond(shamt != 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5))); \ ++ c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 11, 10, funct2); \ ++ c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_srli, 0b100, 0b00, 0b01); ++ INSN(c_srai, 0b100, 0b01, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, funct2, op) \ ++ void NAME(Register Rd_Rs1, int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 6, 0)); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ ++ c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 11, 10, funct2); \ ++ c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_andi, 0b100, 0b10, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct6, funct2, op) \ ++ void NAME(Register Rd_Rs1, Register Rs2) { \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_compressed_reg((address)&insn, 2, Rs2); \ ++ c_patch((address)&insn, 6, 5, funct2); \ ++ c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 15, 10, funct6); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_sub, 0b100011, 0b00, 0b01); ++ INSN(c_xor, 0b100011, 0b01, 0b01); ++ INSN(c_or, 0b100011, 0b10, 0b01); ++ INSN(c_and, 0b100011, 0b11, 0b01); ++ INSN(c_subw, 0b100111, 0b00, 0b01); ++ INSN(c_addw, 0b100111, 0b01, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct4, op) \ ++ void NAME(Register Rd_Rs1, Register Rs2) { \ ++ assert_cond(Rd_Rs1 != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_reg((address)&insn, 2, Rs2); \ ++ c_patch_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 15, 12, funct4); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_mv, 0b1000, 0b10); ++ INSN(c_add, 0b1001, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct4, op) \ ++ void NAME(Register Rs1) { \ ++ assert_cond(Rs1 != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_reg((address)&insn, 2, x0); \ ++ c_patch_reg((address)&insn, 7, Rs1); \ ++ c_patch((address)&insn, 15, 12, funct4); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_jr, 0b1000, 0b10); ++ INSN(c_jalr, 0b1001, 0b10); ++ ++#undef INSN ++ ++ typedef void (Assembler::* j_c_insn)(address dest); ++ typedef void (Assembler::* compare_and_branch_c_insn)(Register Rs1, address dest); ++ ++ void wrap_label(Label &L, j_c_insn insn) { ++ if (L.is_bound()) { ++ (this->*insn)(target(L)); ++ } else { ++ L.add_patch_at(code(), locator()); ++ (this->*insn)(pc()); ++ } ++ } ++ ++ void wrap_label(Label &L, Register r, compare_and_branch_c_insn insn) { ++ if (L.is_bound()) { ++ (this->*insn)(r, target(L)); ++ } else { ++ L.add_patch_at(code(), locator()); ++ (this->*insn)(r, pc()); ++ } ++ } ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(int32_t offset) { \ ++ assert_cond(is_imm_in_range(offset, 11, 1)); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 2, 2, (offset & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 5, 3, (offset & right_n_bits(4)) >> 1); \ ++ c_patch((address)&insn, 6, 6, (offset & nth_bit(7)) >> 7); \ ++ c_patch((address)&insn, 7, 7, (offset & nth_bit(6)) >> 6); \ ++ c_patch((address)&insn, 8, 8, (offset & nth_bit(10)) >> 10); \ ++ c_patch((address)&insn, 10, 9, (offset & right_n_bits(10)) >> 8); \ ++ c_patch((address)&insn, 11, 11, (offset & nth_bit(4)) >> 4); \ ++ c_patch((address)&insn, 12, 12, (offset & nth_bit(11)) >> 11); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } \ ++ void NAME(address dest) { \ ++ assert_cond(dest != NULL); \ ++ int64_t distance = dest - pc(); \ ++ assert_cond(is_imm_in_range(distance, 11, 1)); \ ++ c_j(distance); \ ++ } \ ++ void NAME(Label &L) { \ ++ wrap_label(L, &Assembler::NAME); \ ++ } ++ ++ INSN(c_j, 0b101, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rs1, int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 8, 1)); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 4, 3, (imm & right_n_bits(3)) >> 1); \ ++ c_patch((address)&insn, 6, 5, (imm & right_n_bits(8)) >> 6); \ ++ c_patch_compressed_reg((address)&insn, 7, Rs1); \ ++ c_patch((address)&insn, 11, 10, (imm & right_n_bits(5)) >> 3); \ ++ c_patch((address)&insn, 12, 12, (imm & nth_bit(8)) >> 8); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } \ ++ void NAME(Register Rs1, address dest) { \ ++ assert_cond(dest != NULL); \ ++ int64_t distance = dest - pc(); \ ++ assert_cond(is_imm_in_range(distance, 8, 1)); \ ++ NAME(Rs1, distance); \ ++ } \ ++ void NAME(Register Rs1, Label &L) { \ ++ wrap_label(L, Rs1, &Assembler::NAME); \ ++ } ++ ++ INSN(c_beqz, 0b110, 0b01); ++ INSN(c_bnez, 0b111, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd, int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 18, 0)); \ ++ assert_cond((imm & 0xfff) == 0); \ ++ assert_cond(imm != 0); \ ++ assert_cond(Rd != x0 && Rd != x2); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (imm & right_n_bits(17)) >> 12); \ ++ c_patch_reg((address)&insn, 7, Rd); \ ++ c_patch((address)&insn, 12, 12, (imm & nth_bit(17)) >> 17); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_lui, 0b011, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd, int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 6, 0)); \ ++ assert_cond(Rd != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ ++ c_patch_reg((address)&insn, 7, Rd); \ ++ c_patch((address)&insn, 12, 12, (imm & right_n_bits(6)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_li, 0b010, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ ++ assert_cond((uimm & 0b111) == 0); \ ++ assert_cond(Rd != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6); \ ++ c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3); \ ++ c_patch_reg((address)&insn, 7, Rd); \ ++ c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_ldsp, 0b011, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(FloatRegister Rd, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ ++ assert_cond((uimm & 0b111) == 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6); \ ++ c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3); \ ++ c_patch_reg((address)&insn, 7, Rd); \ ++ c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_fldsp, 0b001, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op, REGISTER_TYPE) \ ++ void NAME(REGISTER_TYPE Rd_Rs2, Register Rs1, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ ++ assert_cond((uimm & 0b111) == 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); \ ++ c_patch((address)&insn, 6, 5, (uimm & right_n_bits(8)) >> 6); \ ++ c_patch_compressed_reg((address)&insn, 7, Rs1); \ ++ c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_ld, 0b011, 0b00, Register); ++ INSN(c_sd, 0b111, 0b00, Register); ++ INSN(c_fld, 0b001, 0b00, FloatRegister); ++ INSN(c_fsd, 0b101, 0b00, FloatRegister); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op, REGISTER_TYPE) \ ++ void NAME(REGISTER_TYPE Rs2, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ ++ assert_cond((uimm & 0b111) == 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_reg((address)&insn, 2, Rs2); \ ++ c_patch((address)&insn, 9, 7, (uimm & right_n_bits(9)) >> 6); \ ++ c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_sdsp, 0b111, 0b10, Register); ++ INSN(c_fsdsp, 0b101, 0b10, FloatRegister); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rs2, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ ++ assert_cond((uimm & 0b11) == 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_reg((address)&insn, 2, Rs2); \ ++ c_patch((address)&insn, 8, 7, (uimm & right_n_bits(8)) >> 6); \ ++ c_patch((address)&insn, 12, 9, (uimm & right_n_bits(6)) >> 2); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_swsp, 0b110, 0b10); ++ +#undef INSN + ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ ++ assert_cond((uimm & 0b11) == 0); \ ++ assert_cond(Rd != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 3, 2, (uimm & right_n_bits(8)) >> 6); \ ++ c_patch((address)&insn, 6, 4, (uimm & right_n_bits(5)) >> 2); \ ++ c_patch_reg((address)&insn, 7, Rd); \ ++ c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_lwsp, 0b010, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd_Rs2, Register Rs1, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 7, 0)); \ ++ assert_cond((uimm & 0b11) == 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); \ ++ c_patch((address)&insn, 5, 5, (uimm & nth_bit(6)) >> 6); \ ++ c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \ ++ c_patch_compressed_reg((address)&insn, 7, Rs1); \ ++ c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_lw, 0b010, 0b00); ++ INSN(c_sw, 0b110, 0b00); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME() { \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 11, 2, 0x0); \ ++ c_patch((address)&insn, 12, 12, 0b1); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_ebreak, 0b100, 0b10); ++ ++#undef INSN ++ ++// -------------- RVC Transformation Functions -------------- ++ ++// -------------------------- ++// Register instructions ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2) { \ ++ /* add -> c.add */ \ ++ if (do_compress()) { \ ++ Register src = noreg; \ ++ if (Rs1 != x0 && Rs2 != x0 && ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) { \ ++ c_add(Rd, src); \ ++ return; \ ++ } \ ++ } \ ++ _add(Rd, Rs1, Rs2); \ ++ } ++ ++ INSN(add); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME, C_NAME, NORMAL_NAME) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2) { \ ++ /* sub/subw -> c.sub/c.subw */ \ ++ if (do_compress() && \ ++ (Rd == Rs1 && Rd->is_compressed_valid() && Rs2->is_compressed_valid())) { \ ++ C_NAME(Rd, Rs2); \ ++ return; \ ++ } \ ++ NORMAL_NAME(Rd, Rs1, Rs2); \ ++ } ++ ++ INSN(sub, c_sub, _sub); ++ INSN(subw, c_subw, _subw); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME, C_NAME, NORMAL_NAME) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2) { \ ++ /* and/or/xor/addw -> c.and/c.or/c.xor/c.addw */ \ ++ if (do_compress()) { \ ++ Register src = noreg; \ ++ if (Rs1->is_compressed_valid() && Rs2->is_compressed_valid() && \ ++ ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) { \ ++ C_NAME(Rd, src); \ ++ return; \ ++ } \ ++ } \ ++ NORMAL_NAME(Rd, Rs1, Rs2); \ ++ } ++ ++ INSN(andr, c_and, _andr); ++ INSN(orr, c_or, _orr); ++ INSN(xorr, c_xor, _xorr); ++ INSN(addw, c_addw, _addw); ++ ++#undef INSN ++ ++private: ++// some helper functions ++ bool do_compress() const { ++ return UseRVC && in_compressible_region(); ++ } ++ ++#define FUNC(NAME, funct3, bits) \ ++ bool NAME(Register rs1, Register rd_rs2, int32_t imm12, bool ld) { \ ++ return rs1 == sp && \ ++ is_unsigned_imm_in_range(imm12, bits, 0) && \ ++ (intx(imm12) & funct3) == 0x0 && \ ++ (!ld || rd_rs2 != x0); \ ++ } \ ++ ++ FUNC(is_c_ldsdsp, 0b111, 9); ++ FUNC(is_c_lwswsp, 0b011, 8); ++ ++#undef FUNC ++ ++#define FUNC(NAME, funct3, bits) \ ++ bool NAME(Register rs1, int32_t imm12) { \ ++ return rs1 == sp && \ ++ is_unsigned_imm_in_range(imm12, bits, 0) && \ ++ (intx(imm12) & funct3) == 0x0; \ ++ } \ ++ ++ FUNC(is_c_fldsdsp, 0b111, 9); ++ ++#undef FUNC ++ ++#define FUNC(NAME, REG_TYPE, funct3, bits) \ ++ bool NAME(Register rs1, REG_TYPE rd_rs2, int32_t imm12) { \ ++ return rs1->is_compressed_valid() && \ ++ rd_rs2->is_compressed_valid() && \ ++ is_unsigned_imm_in_range(imm12, bits, 0) && \ ++ (intx(imm12) & funct3) == 0x0; \ ++ } \ ++ ++ FUNC(is_c_ldsd, Register, 0b111, 8); ++ FUNC(is_c_lwsw, Register, 0b011, 7); ++ FUNC(is_c_fldsd, FloatRegister, 0b111, 8); ++ ++#undef FUNC ++ ++public: ++// -------------------------- ++// Load/store register ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ /* lw -> c.lwsp/c.lw */ \ ++ if (do_compress()) { \ ++ if (is_c_lwswsp(Rs, Rd, offset, true)) { \ ++ c_lwsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_lwsw(Rs, Rd, offset)) { \ ++ c_lw(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _lw(Rd, Rs, offset); \ ++ } ++ ++ INSN(lw); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ /* ld -> c.ldsp/c.ld */ \ ++ if (do_compress()) { \ ++ if (is_c_ldsdsp(Rs, Rd, offset, true)) { \ ++ c_ldsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_ldsd(Rs, Rd, offset)) { \ ++ c_ld(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _ld(Rd, Rs, offset); \ ++ } ++ ++ INSN(ld); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ ++ /* fld -> c.fldsp/c.fld */ \ ++ if (do_compress()) { \ ++ if (is_c_fldsdsp(Rs, offset)) { \ ++ c_fldsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_fldsd(Rs, Rd, offset)) { \ ++ c_fld(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _fld(Rd, Rs, offset); \ ++ } ++ ++ INSN(fld); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ /* sd -> c.sdsp/c.sd */ \ ++ if (do_compress()) { \ ++ if (is_c_ldsdsp(Rs, Rd, offset, false)) { \ ++ c_sdsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_ldsd(Rs, Rd, offset)) { \ ++ c_sd(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _sd(Rd, Rs, offset); \ ++ } ++ ++ INSN(sd); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ /* sw -> c.swsp/c.sw */ \ ++ if (do_compress()) { \ ++ if (is_c_lwswsp(Rs, Rd, offset, false)) { \ ++ c_swsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_lwsw(Rs, Rd, offset)) { \ ++ c_sw(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _sw(Rd, Rs, offset); \ ++ } ++ ++ INSN(sw); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ ++ /* fsd -> c.fsdsp/c.fsd */ \ ++ if (do_compress()) { \ ++ if (is_c_fldsdsp(Rs, offset)) { \ ++ c_fsdsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_fldsd(Rs, Rd, offset)) { \ ++ c_fsd(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _fsd(Rd, Rs, offset); \ ++ } ++ ++ INSN(fsd); ++ ++#undef INSN ++ ++// -------------------------- ++// Conditional branch instructions ++// -------------------------- ++#define INSN(NAME, C_NAME, NORMAL_NAME) \ ++ void NAME(Register Rs1, Register Rs2, const int64_t offset) { \ ++ /* beq/bne -> c.beqz/c.bnez */ \ ++ if (do_compress() && \ ++ (offset != 0 && Rs2 == x0 && Rs1->is_compressed_valid() && \ ++ is_imm_in_range(offset, 8, 1))) { \ ++ C_NAME(Rs1, offset); \ ++ return; \ ++ } \ ++ NORMAL_NAME(Rs1, Rs2, offset); \ ++ } ++ ++ INSN(beq, c_beqz, _beq); ++ INSN(bne, c_beqz, _bne); ++ ++#undef INSN ++ ++// -------------------------- ++// Unconditional branch instructions ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, const int32_t offset) { \ ++ /* jal -> c.j */ \ ++ if (do_compress() && offset != 0 && Rd == x0 && is_imm_in_range(offset, 11, 1)) { \ ++ c_j(offset); \ ++ return; \ ++ } \ ++ _jal(Rd, offset); \ ++ } ++ ++ INSN(jal); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ /* jalr -> c.jr/c.jalr */ \ ++ if (do_compress() && (offset == 0 && Rs != x0)) { \ ++ if (Rd == x1) { \ ++ c_jalr(Rs); \ ++ return; \ ++ } else if (Rd == x0) { \ ++ c_jr(Rs); \ ++ return; \ ++ } \ ++ } \ ++ _jalr(Rd, Rs, offset); \ ++ } ++ ++ INSN(jalr); ++ ++#undef INSN ++ ++// -------------------------- ++// Miscellaneous Instructions ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME() { \ ++ /* ebreak -> c.ebreak */ \ ++ if (do_compress()) { \ ++ c_ebreak(); \ ++ return; \ ++ } \ ++ _ebreak(); \ ++ } ++ ++ INSN(ebreak); ++ ++#undef INSN ++ ++#define INSN(NAME) \ ++ void NAME() { \ ++ /* The illegal instruction in RVC is presented by a 16-bit 0. */ \ ++ if (do_compress()) { \ ++ emit_int16(0); \ ++ return; \ ++ } \ ++ _halt(); \ ++ } ++ ++ INSN(halt); ++ ++#undef INSN ++ ++// -------------------------- ++// Immediate Instructions ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, int64_t imm) { \ ++ /* li -> c.li */ \ ++ if (do_compress() && (is_imm_in_range(imm, 6, 0) && Rd != x0)) { \ ++ c_li(Rd, imm); \ ++ return; \ ++ } \ ++ _li(Rd, imm); \ ++ } ++ ++ INSN(li); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs1, int32_t imm) { \ ++ /* addi -> c.addi/c.nop/c.mv/c.addi16sp/c.addi4spn */ \ ++ if (do_compress()) { \ ++ if (Rd == Rs1 && is_imm_in_range(imm, 6, 0)) { \ ++ c_addi(Rd, imm); \ ++ return; \ ++ } else if (imm == 0 && Rd != x0 && Rs1 != x0) { \ ++ c_mv(Rd, Rs1); \ ++ return; \ ++ } else if (Rs1 == sp && imm != 0) { \ ++ if (Rd == Rs1 && (imm & 0b1111) == 0x0 && is_imm_in_range(imm, 10, 0)) { \ ++ c_addi16sp(imm); \ ++ return; \ ++ } else if (Rd->is_compressed_valid() && (imm & 0b11) == 0x0 && is_unsigned_imm_in_range(imm, 10, 0)) { \ ++ c_addi4spn(Rd, imm); \ ++ return; \ ++ } \ ++ } \ ++ } \ ++ _addi(Rd, Rs1, imm); \ ++ } ++ ++ INSN(addi); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs1, int32_t imm) { \ ++ /* addiw -> c.addiw */ \ ++ if (do_compress() && (Rd == Rs1 && Rd != x0 && is_imm_in_range(imm, 6, 0))) { \ ++ c_addiw(Rd, imm); \ ++ return; \ ++ } \ ++ _addiw(Rd, Rs1, imm); \ ++ } ++ ++ INSN(addiw); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs1, int32_t imm) { \ ++ /* and_imm12 -> c.andi */ \ ++ if (do_compress() && \ ++ (Rd == Rs1 && Rd->is_compressed_valid() && is_imm_in_range(imm, 6, 0))) { \ ++ c_andi(Rd, imm); \ ++ return; \ ++ } \ ++ _and_imm12(Rd, Rs1, imm); \ ++ } ++ ++ INSN(and_imm12); ++ ++#undef INSN ++ ++// -------------------------- ++// Shift Immediate Instructions ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs1, unsigned shamt) { \ ++ /* slli -> c.slli */ \ ++ if (do_compress() && (Rd == Rs1 && Rd != x0 && shamt != 0)) { \ ++ c_slli(Rd, shamt); \ ++ return; \ ++ } \ ++ _slli(Rd, Rs1, shamt); \ ++ } ++ ++ INSN(slli); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME, C_NAME, NORMAL_NAME) \ ++ void NAME(Register Rd, Register Rs1, unsigned shamt) { \ ++ /* srai/srli -> c.srai/c.srli */ \ ++ if (do_compress() && (Rd == Rs1 && Rd->is_compressed_valid() && shamt != 0)) { \ ++ C_NAME(Rd, shamt); \ ++ return; \ ++ } \ ++ NORMAL_NAME(Rd, Rs1, shamt); \ ++ } ++ ++ INSN(srai, c_srai, _srai); ++ INSN(srli, c_srli, _srli); ++ ++#undef INSN ++ ++// -------------------------- ++// Upper Immediate Instruction ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, int32_t imm) { \ ++ /* lui -> c.lui */ \ ++ if (do_compress() && (Rd != x0 && Rd != x2 && imm != 0 && is_imm_in_range(imm, 18, 0))) { \ ++ c_lui(Rd, imm); \ ++ return; \ ++ } \ ++ _lui(Rd, imm); \ ++ } ++ ++ INSN(lui); ++ ++#undef INSN ++ ++// --------------------------------------------------------------------------------------- ++ + void bgt(Register Rs, Register Rt, const address &dest); + void ble(Register Rs, Register Rt, const address &dest); + void bgtu(Register Rs, Register Rt, const address &dest); @@ -3273,25 +4084,17 @@ index 000000000..d4da30ed6 + void wrap_label(Register r, Label &L, Register t, load_insn_by_temp insn); + void wrap_label(Register r, Label &L, jal_jalr_insn insn); + -+ // Computational pseudo instructions ++ // calculate pseudoinstruction + void add(Register Rd, Register Rn, int64_t increment, Register temp = t0); -+ void addw(Register Rd, Register Rn, int32_t increment, Register temp = t0); -+ ++ void addw(Register Rd, Register Rn, int64_t increment, Register temp = t0); + void sub(Register Rd, Register Rn, int64_t decrement, Register temp = t0); -+ void subw(Register Rd, Register Rn, int32_t decrement, Register temp = t0); ++ void subw(Register Rd, Register Rn, int64_t decrement, Register temp = t0); + + // RVB pseudo instructions + // zero extend word + void zext_w(Register Rd, Register Rs); + -+ Assembler(CodeBuffer* code) : AbstractAssembler(code) { -+ } -+ -+ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, -+ Register tmp, -+ int offset) { -+ ShouldNotCallThis(); -+ return RegisterOrConstant(); ++ Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) { + } + + // Stack overflow checking @@ -3301,34 +4104,25 @@ index 000000000..d4da30ed6 + return is_imm_in_range(imm, 12, 0); + } + -+ // The maximum range of a branch is fixed for the riscv -+ // architecture. ++ // The maximum range of a branch is fixed for the RISCV architecture. + static const unsigned long branch_range = 1 * M; + + static bool reachable_from_branch_at(address branch, address target) { + return uabs(target - branch) < branch_range; + } + -+ static Assembler::SEW elemBytes_to_sew(int esize) { -+ assert(esize > 0 && esize <= 64 && is_power_of_2(esize), "unsupported element size"); -+ return (Assembler::SEW) exact_log2(esize); -+ } -+ + virtual ~Assembler() {} -+ +}; + -+class BiasedLockingCounters; -+ +#endif // CPU_RISCV_ASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp new file mode 100644 -index 000000000..82b825db7 +index 00000000000..7ffe8803985 --- /dev/null +++ b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp @@ -0,0 +1,47 @@ +/* -+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -3376,14 +4170,14 @@ index 000000000..82b825db7 +#endif // CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp new file mode 100644 -index 000000000..d0ac7ef46 +index 00000000000..23d982f9abd --- /dev/null +++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp -@@ -0,0 +1,169 @@ +@@ -0,0 +1,167 @@ +/* -+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016 SAP SE. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -3409,7 +4203,7 @@ index 000000000..d0ac7ef46 +#ifndef CPU_RISCV_BYTES_RISCV_HPP +#define CPU_RISCV_BYTES_RISCV_HPP + -+#include "memory/allocation.hpp" ++#include "memory/allStatic.hpp" + +class Bytes: AllStatic { + public: @@ -3457,7 +4251,6 @@ index 000000000..d0ac7ef46 + ((u8)(((u4*)p)[0])); + + case 2: -+ case 6: + return ((u8)(((u2*)p)[3]) << 48) | + ((u8)(((u2*)p)[2]) << 32) | + ((u8)(((u2*)p)[1]) << 16) | @@ -3471,7 +4264,7 @@ index 000000000..d0ac7ef46 + ((u8)(p[3]) << 24) | + ((u8)(p[2]) << 16) | + ((u8)(p[1]) << 8) | -+ (u8)(p[0]); ++ ((u8)(p[0])); + } + } + @@ -3516,7 +4309,6 @@ index 000000000..d0ac7ef46 + break; + + case 2: -+ case 6: + ((u2*)p)[3] = x >> 48; + ((u2*)p)[2] = x >> 32; + ((u2*)p)[1] = x >> 16; @@ -3546,17 +4338,17 @@ index 000000000..d0ac7ef46 + static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } +}; + -+#include OS_CPU_HEADER_INLINE(bytes) ++#include OS_CPU_HEADER(bytes) + +#endif // CPU_RISCV_BYTES_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp new file mode 100644 -index 000000000..522eedd29 +index 00000000000..dcd0472c540 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp -@@ -0,0 +1,352 @@ +@@ -0,0 +1,353 @@ +/* -+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -3588,6 +4380,7 @@ index 000000000..522eedd29 +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" ++#include "classfile/javaClasses.hpp" +#include "nativeInst_riscv.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_riscv.inline.hpp" @@ -3595,8 +4388,21 @@ index 000000000..522eedd29 + +#define __ ce->masm()-> + -+void CounterOverflowStub::emit_code(LIR_Assembler* ce) -+{ ++void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset()); ++ __ code_section()->relocate(__ pc(), safepoint_pc.rspec()); ++ __ la(t0, safepoint_pc.target()); ++ __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); ++ ++ assert(SharedRuntime::polling_page_return_handler_blob() != NULL, ++ "polling page return stub not created yet"); ++ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); ++ ++ __ far_jump(RuntimeAddress(stub)); ++} ++ ++void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + Metadata *m = _method->as_constant_ptr()->as_metadata(); + __ mov_metadata(t0, m); @@ -3608,22 +4414,19 @@ index 000000000..522eedd29 + __ j(_continuation); +} + -+RangeCheckStub::RangeCheckStub(CodeEmitInfo *info, LIR_Opr index, LIR_Opr array) -+ : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) -+{ ++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) ++ : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) -+ : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) -+{ ++ : _index(index), _array(), _throw_index_out_of_bounds_exception(true) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + -+void RangeCheckStub::emit_code(LIR_Assembler* ce) -+{ ++void RangeCheckStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_info->deoptimize_on_exception()) { + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); @@ -3643,7 +4446,7 @@ index 000000000..522eedd29 + if (_throw_index_out_of_bounds_exception) { + stub_id = Runtime1::throw_index_exception_id; + } else { -+ assert(_array != NULL, "sanity"); ++ assert(_array != LIR_Opr::nullOpr(), "sanity"); + __ mv(t1, _array->as_pointer_register()); + stub_id = Runtime1::throw_range_check_failed_id; + } @@ -3655,13 +4458,11 @@ index 000000000..522eedd29 + debug_only(__ should_not_reach_here()); +} + -+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) -+{ ++PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { + _info = new CodeEmitInfo(info); +} + -+void PredicateFailedStub::emit_code(LIR_Assembler* ce) -+{ ++void PredicateFailedStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ far_call(RuntimeAddress(a)); @@ -3670,8 +4471,7 @@ index 000000000..522eedd29 + debug_only(__ should_not_reach_here()); +} + -+void DivByZeroStub::emit_code(LIR_Assembler* ce) -+{ ++void DivByZeroStub::emit_code(LIR_Assembler* ce) { + if (_offset != -1) { + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + } @@ -3685,21 +4485,19 @@ index 000000000..522eedd29 +} + +// Implementation of NewInstanceStub -+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) -+{ ++NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) { + _result = result; + _klass = klass; + _klass_reg = klass_reg; + _info = new CodeEmitInfo(info); -+ assert(stub_id == Runtime1::new_instance_id || -+ stub_id == Runtime1::fast_new_instance_id || ++ assert(stub_id == Runtime1::new_instance_id || ++ stub_id == Runtime1::fast_new_instance_id || + stub_id == Runtime1::fast_new_instance_init_check_id, + "need new_instance id"); + _stub_id = stub_id; +} + -+void NewInstanceStub::emit_code(LIR_Assembler* ce) -+{ ++void NewInstanceStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + __ mv(x13, _klass_reg->as_register()); @@ -3711,16 +4509,14 @@ index 000000000..522eedd29 +} + +// Implementation of NewTypeArrayStub -+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) -+{ ++NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _length = length; + _result = result; + _info = new CodeEmitInfo(info); +} + -+void NewTypeArrayStub::emit_code(LIR_Assembler* ce) -+{ ++void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == x9, "length must in x9"); @@ -3733,16 +4529,14 @@ index 000000000..522eedd29 +} + +// Implementation of NewObjectArrayStub -+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) -+{ ++NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _result = result; + _length = length; + _info = new CodeEmitInfo(info); +} + -+void NewObjectArrayStub::emit_code(LIR_Assembler* ce) -+{ ++void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == x9, "length must in x9"); @@ -3756,13 +4550,11 @@ index 000000000..522eedd29 + +// Implementation of MonitorAccessStubs +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) -+: MonitorAccessStub(obj_reg, lock_reg) -+{ ++: MonitorAccessStub(obj_reg, lock_reg) { + _info = new CodeEmitInfo(info); +} + -+void MonitorEnterStub::emit_code(LIR_Assembler* ce) -+{ ++void MonitorEnterStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_obj_reg->as_register(), 1); @@ -3779,8 +4571,7 @@ index 000000000..522eedd29 + __ j(_continuation); +} + -+void MonitorExitStub::emit_code(LIR_Assembler* ce) -+{ ++void MonitorExitStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_compute_lock) { + // lock_reg was destroyed by fast unlocking attempt => recompute it @@ -3798,18 +4589,23 @@ index 000000000..522eedd29 + __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id))); +} + ++// Implementation of patching: ++// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) ++// - Replace original code with a call to the stub ++// At Runtime: ++// - call to stub, jump to runtime ++// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object) ++// - in runtime: after initializing class, restore original code, reexecute instruction ++ +int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; + +void PatchingStub::align_patch_site(MacroAssembler* masm) {} + -+// RISCV don't use C1 runtime patching. When need patch, just deoptimize. -+void PatchingStub::emit_code(LIR_Assembler* ce) -+{ ++void PatchingStub::emit_code(LIR_Assembler* ce) { + assert(false, "RISCV should not use C1 runtime patching"); +} + -+void DeoptimizeStub::emit_code(LIR_Assembler* ce) -+{ ++void DeoptimizeStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + ce->store_parameter(_trap_request, 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id))); @@ -3817,8 +4613,7 @@ index 000000000..522eedd29 + DEBUG_ONLY(__ should_not_reach_here()); +} + -+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) -+{ ++void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { + address a = NULL; + if (_info->deoptimize_on_exception()) { + // Deoptimize, do not throw the exception, because it is probably wrong to do it here. @@ -3835,8 +4630,7 @@ index 000000000..522eedd29 + debug_only(__ should_not_reach_here()); +} + -+void SimpleExceptionStub::emit_code(LIR_Assembler* ce) -+{ ++void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + + __ bind(_entry); @@ -3845,32 +4639,29 @@ index 000000000..522eedd29 + if (_obj->is_cpu_register()) { + __ mv(t0, _obj->as_register()); + } -+ __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), t1); ++ __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), NULL, t1); + ce->add_call_info_here(_info); + debug_only(__ should_not_reach_here()); +} + -+void ArrayCopyStub::emit_code(LIR_Assembler* ce) -+{ ++void ArrayCopyStub::emit_code(LIR_Assembler* ce) { + // ---------------slow case: call to native----------------- + __ bind(_entry); + // Figure out where the args should go + // This should really convert the IntrinsicID to the Method* and signature + // but I don't know how to do that. -+ // + const int args_num = 5; + VMRegPair args[args_num]; + BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT }; -+ SharedRuntime::java_calling_convention(signature, args, args_num, true); ++ SharedRuntime::java_calling_convention(signature, args, args_num); + + // push parameters + Register r[args_num]; -+ int i = 0; -+ r[i++] = src()->as_register(); -+ r[i++] = src_pos()->as_register(); -+ r[i++] = dst()->as_register(); -+ r[i++] = dst_pos()->as_register(); -+ r[i++] = length()->as_register(); ++ r[0] = src()->as_register(); ++ r[1] = src_pos()->as_register(); ++ r[2] = dst()->as_register(); ++ r[3] = dst_pos()->as_register(); ++ r[4] = length()->as_register(); + + // next registers will get stored on the stack + for (int j = 0; j < args_num; j++) { @@ -3879,7 +4670,7 @@ index 000000000..522eedd29 + int st_off = r_1->reg2stack() * wordSize; + __ sd(r[j], Address(sp, st_off)); + } else { -+ assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg "); ++ assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg"); + } + } + @@ -3899,8 +4690,10 @@ index 000000000..522eedd29 + ce->add_call_info_here(info()); + +#ifndef PRODUCT -+ __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); -+ __ incrementw(Address(t1)); ++ if (PrintC1Statistics) { ++ __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); ++ __ add_memory_int32(Address(t1), 1); ++ } +#endif + + __ j(_continuation); @@ -3909,13 +4702,12 @@ index 000000000..522eedd29 +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp new file mode 100644 -index 000000000..a0f411352 +index 00000000000..4417ad63091 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp -@@ -0,0 +1,85 @@ +@@ -0,0 +1,84 @@ +/* -+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -3973,7 +4765,7 @@ index 000000000..a0f411352 + + pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan + pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of float registers visible to linear scan -+ pd_nof_xmm_regs_linearscan = 0, // like sparc we don't have any of these ++ pd_nof_xmm_regs_linearscan = 0, // don't have vector registers + + pd_first_cpu_reg = 0, + pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1, @@ -4000,13 +4792,12 @@ index 000000000..a0f411352 +#endif // CPU_RISCV_C1_DEFS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp new file mode 100644 -index 000000000..d4876625c +index 00000000000..e3a2606c532 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp -@@ -0,0 +1,31 @@ +@@ -0,0 +1,30 @@ +/* + * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4037,13 +4828,12 @@ index 000000000..d4876625c +// No FPU stack on RISCV diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp new file mode 100644 -index 000000000..4b43bc4d7 +index 00000000000..7bc3d311501 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp -@@ -0,0 +1,33 @@ +@@ -0,0 +1,32 @@ +/* -+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4076,13 +4866,12 @@ index 000000000..4b43bc4d7 +#endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp new file mode 100644 -index 000000000..94b4e0f0b +index 00000000000..172031941b2 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp -@@ -0,0 +1,391 @@ +@@ -0,0 +1,388 @@ +/* -+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4112,8 +4901,7 @@ index 000000000..94b4e0f0b +#include "runtime/sharedRuntime.hpp" +#include "vmreg_riscv.inline.hpp" + -+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) -+{ ++LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { + LIR_Opr opr = LIR_OprFact::illegalOpr; + VMReg r_1 = reg->first(); + VMReg r_2 = reg->second(); @@ -4129,7 +4917,7 @@ index 000000000..94b4e0f0b + Register reg2 = r_2->as_Register(); + assert(reg2 == reg1, "must be same register"); + opr = as_long_opr(reg1); -+ } else if (type == T_OBJECT || type == T_ARRAY) { ++ } else if (is_reference_type(type)) { + opr = as_oop_opr(reg1); + } else if (type == T_METADATA) { + opr = as_metadata_opr(reg1); @@ -4240,8 +5028,8 @@ index 000000000..94b4e0f0b +LIR_Opr FrameMap::fpu10_float_opr; +LIR_Opr FrameMap::fpu10_double_opr; + -+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; -+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; ++LIR_Opr FrameMap::_caller_save_cpu_regs[] = {}; ++LIR_Opr FrameMap::_caller_save_fpu_regs[] = {}; + +//-------------------------------------------------------- +// FrameMap @@ -4398,7 +5186,7 @@ index 000000000..94b4e0f0b + + VMRegPair regs; + BasicType sig_bt = T_OBJECT; -+ SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true); ++ SharedRuntime::java_calling_convention(&sig_bt, ®s, 1); + receiver_opr = as_oop_opr(regs.first()->as_Register()); + + for (i = 0; i < nof_caller_save_fpu_regs; i++) { @@ -4413,7 +5201,7 @@ index 000000000..94b4e0f0b + + +// ----------------mapping----------------------- -+// all mapping is based on rfp addressing, except for simple leaf methods where we access ++// all mapping is based on fp addressing, except for simple leaf methods where we access +// the locals sp based (and no frame is built) + + @@ -4430,7 +5218,7 @@ index 000000000..94b4e0f0b +// | .........| <- TOS +// | locals | +// +----------+ -+// | old fp, | ++// | old fp, | +// +----------+ +// | ret addr | +// +----------+ @@ -4458,8 +5246,7 @@ index 000000000..94b4e0f0b + return as_FloatRegister(n)->as_VMReg(); +} + -+LIR_Opr FrameMap::stack_pointer() -+{ ++LIR_Opr FrameMap::stack_pointer() { + return FrameMap::sp_opr; +} + @@ -4473,13 +5260,12 @@ index 000000000..94b4e0f0b +} diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp new file mode 100644 -index 000000000..f600c2f6f +index 00000000000..01281f5c9e1 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp -@@ -0,0 +1,149 @@ +@@ -0,0 +1,148 @@ +/* -+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4628,13 +5414,12 @@ index 000000000..f600c2f6f +#endif // CPU_RISCV_C1_FRAMEMAP_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp new file mode 100644 -index 000000000..a846d60ae +index 00000000000..4c1c13dc290 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp -@@ -0,0 +1,287 @@ +@@ -0,0 +1,281 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4671,16 +5456,15 @@ index 000000000..a846d60ae + +#define __ _masm-> + -+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) { -+ ++void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal, ++ LIR_Opr result, CodeEmitInfo* info) { + // opcode check + assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem"); + bool is_irem = (code == lir_irem); -+ -+ // operand check -+ assert(left->is_single_cpu(), "left must be register"); -+ assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant"); -+ assert(result->is_single_cpu(), "result must be register"); ++ // opreand check ++ assert(left->is_single_cpu(), "left must be a register"); ++ assert(right->is_single_cpu() || right->is_constant(), "right must be a register or constant"); ++ assert(result->is_single_cpu(), "result must be a register"); + Register lreg = left->as_register(); + Register dreg = result->as_register(); + @@ -4754,7 +5538,7 @@ index 000000000..a846d60ae + case lir_sub: __ subw(dreg, lreg, c); break; + default: ShouldNotReachHere(); + } -+ break; ++ break; + case T_OBJECT: // fall through + case T_ADDRESS: + switch (code) { @@ -4762,7 +5546,7 @@ index 000000000..a846d60ae + case lir_sub: __ sub(dreg, lreg, c); break; + default: ShouldNotReachHere(); + } -+ break; ++ break; + default: + ShouldNotReachHere(); + } @@ -4817,7 +5601,7 @@ index 000000000..a846d60ae + jlong c = right->as_constant_ptr()->as_jlong(); + Register dreg = as_reg(dest); + switch (code) { -+ case lir_add: ++ case lir_add: // fall through + case lir_sub: + if (c == 0 && dreg == lreg_lo) { + COMMENT("effective nop elided"); @@ -4831,7 +5615,7 @@ index 000000000..a846d60ae + // move lreg_lo to dreg if divisor is 1 + __ mv(dreg, lreg_lo); + } else { -+ unsigned int shift = exact_log2(c); ++ unsigned int shift = exact_log2_long(c); + // use t0 as intermediate result register + __ srai(t0, lreg_lo, 0x3f); + if (is_imm_in_range(c - 1, 12, 0)) { @@ -4849,7 +5633,7 @@ index 000000000..a846d60ae + // move 0 to dreg if divisor is 1 + __ mv(dreg, zr); + } else { -+ unsigned int shift = exact_log2(c); ++ unsigned int shift = exact_log2_long(c); + __ srai(t0, lreg_lo, 0x3f); + __ srli(t0, t0, BitsPerLong - shift); + __ add(t1, lreg_lo, t0); @@ -4874,9 +5658,7 @@ index 000000000..a846d60ae + switch (code) { + case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; -+ case lir_mul_strictfp: // fall through + case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; -+ case lir_div_strictfp: // fall through + case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + default: + ShouldNotReachHere(); @@ -4889,9 +5671,7 @@ index 000000000..a846d60ae + switch (code) { + case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; -+ case lir_mul_strictfp: // fall through + case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; -+ case lir_div_strictfp: // fall through + case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + default: + ShouldNotReachHere(); @@ -4921,13 +5701,12 @@ index 000000000..a846d60ae +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp new file mode 100644 -index 000000000..93530ef58 +index 00000000000..ab0a9963fc1 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp -@@ -0,0 +1,36 @@ +@@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4950,6 +5729,7 @@ index 000000000..93530ef58 + * questions. + * + */ ++ +#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP +#define CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP + @@ -4960,17 +5740,17 @@ index 000000000..93530ef58 + void arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); + void arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right, Register lreg, Register dreg); + void arithmetic_idiv(LIR_Op3* op, bool is_irem); ++ +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp new file mode 100644 -index 000000000..31f8d6a4a +index 00000000000..b7f53e395f3 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp -@@ -0,0 +1,387 @@ +@@ -0,0 +1,388 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -4999,6 +5779,7 @@ index 000000000..31f8d6a4a +#include "c1/c1_MacroAssembler.hpp" +#include "ci/ciArrayKlass.hpp" +#include "oops/objArrayKlass.hpp" ++#include "runtime/stubRoutines.hpp" + +#define __ _masm-> + @@ -5026,7 +5807,7 @@ index 000000000..31f8d6a4a + __ mv(c_rarg4, j_rarg4); +#ifndef PRODUCT + if (PrintC1Statistics) { -+ __ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt)); ++ __ add_memory_int32(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt), 1); + } +#endif + __ far_call(RuntimeAddress(copyfunc_addr)); @@ -5064,14 +5845,14 @@ index 000000000..31f8d6a4a + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset()))); -+ __ mv(t1, Klass::_lh_neutral_value); ++ __ li(t1, Klass::_lh_neutral_value); + __ bge(t0, t1, *stub->entry(), /* is_far */ true); + } + + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset()))); -+ __ mv(t1, Klass::_lh_neutral_value); ++ __ li(t1, Klass::_lh_neutral_value); + __ bge(t0, t1, *stub->entry(), /* is_far */ true); + } + } @@ -5133,7 +5914,7 @@ index 000000000..31f8d6a4a + if (PrintC1Statistics) { + Label failed; + __ bnez(x10, failed); -+ __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt)); ++ __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt), 1); + __ bind(failed); + } +#endif @@ -5142,7 +5923,7 @@ index 000000000..31f8d6a4a + +#ifndef PRODUCT + if (PrintC1Statistics) { -+ __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt)); ++ __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt), 1); + } +#endif + assert_different_registers(dst, dst_pos, length, src_pos, src, x10, t0); @@ -5214,6 +5995,7 @@ index 000000000..31f8d6a4a +void LIR_Assembler::arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags) { + assert(default_type != NULL, "NULL default_type!"); + BasicType basic_type = default_type->element_type()->basic_type(); ++ + if (basic_type == T_ARRAY) { basic_type = T_OBJECT; } + if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { + // Sanity check the known type with the incoming class. For the @@ -5269,7 +6051,7 @@ index 000000000..31f8d6a4a + CodeStub* stub = op->stub(); + int flags = op->flags(); + BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; -+ if (basic_type == T_ARRAY) { basic_type = T_OBJECT; } ++ if (is_reference_type(basic_type)) { basic_type = T_OBJECT; } + + // if we don't know anything, just go through the generic arraycopy + if (default_type == NULL) { @@ -5292,7 +6074,7 @@ index 000000000..31f8d6a4a + +#ifndef PRODUCT + if (PrintC1Statistics) { -+ __ incrementw(ExternalAddress(Runtime1::arraycopy_count_address(basic_type))); ++ __ add_memory_int32(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)), 1); + } +#endif + arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type); @@ -5356,13 +6138,12 @@ index 000000000..31f8d6a4a +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp new file mode 100644 -index 000000000..872fd2ef6 +index 00000000000..06a0f248ca6 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp -@@ -0,0 +1,51 @@ +@@ -0,0 +1,52 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -5388,6 +6169,7 @@ index 000000000..872fd2ef6 + +#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP +#define CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP ++ + // arraycopy sub functions + void generic_arraycopy(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, CodeStub *stub); @@ -5410,17 +6192,18 @@ index 000000000..872fd2ef6 + Register dst, Register dst_pos); + void arraycopy_load_args(Register src, Register src_pos, Register length, + Register dst, Register dst_pos); ++ +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp new file mode 100644 -index 000000000..222e3e97e +index 00000000000..742c2126e60 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -0,0 +1,2275 @@ +@@ -0,0 +1,2267 @@ +/* + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -5455,14 +6238,12 @@ index 000000000..222e3e97e +#include "ci/ciArrayKlass.hpp" +#include "ci/ciInstance.hpp" +#include "code/compiledIC.hpp" -+#include "gc/shared/barrierSet.hpp" -+#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/objArrayKlass.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" -+#include "utilities/macros.hpp" ++#include "utilities/powerOfTwo.hpp" +#include "vmreg_riscv.inline.hpp" + +#ifndef PRODUCT @@ -5512,6 +6293,17 @@ index 000000000..222e3e97e + +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } + ++void LIR_Assembler::clinit_barrier(ciMethod* method) { ++ assert(VM_Version::supports_fast_class_init_checks(), "sanity"); ++ assert(!method->holder()->is_not_initialized(), "initialization should have been started"); ++ ++ Label L_skip_barrier; ++ ++ __ mov_metadata(t1, method->holder()->constant_encoding()); ++ __ clinit_barrier(t1, t0, &L_skip_barrier /* L_fast_path */); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); ++ __ bind(L_skip_barrier); ++} + +LIR_Opr LIR_Assembler::receiverOpr() { + return FrameMap::receiver_opr; @@ -5521,25 +6313,11 @@ index 000000000..222e3e97e + return FrameMap::as_pointer_opr(receiverOpr()->as_register()); +} + -+//--------------fpu register translations----------------------- -+void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } -+ -+void LIR_Assembler::reset_FPU() { Unimplemented(); } -+ -+void LIR_Assembler::fpop() { Unimplemented(); } -+ -+void LIR_Assembler::fxch(int i) { Unimplemented(); } -+ -+void LIR_Assembler::fld(int i) { Unimplemented(); } -+ -+void LIR_Assembler::ffree(int i) { Unimplemented(); } -+ +void LIR_Assembler::breakpoint() { Unimplemented(); } + +void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } + +void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } -+//------------------------------------------- + +static jlong as_long(LIR_Opr data) { + jlong result; @@ -5557,6 +6335,43 @@ index 000000000..222e3e97e + return result; +} + ++Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) { ++ if (addr->base()->is_illegal()) { ++ assert(addr->index()->is_illegal(), "must be illegal too"); ++ __ movptr(tmp, addr->disp()); ++ return Address(tmp, 0); ++ } ++ ++ Register base = addr->base()->as_pointer_register(); ++ LIR_Opr index_opr = addr->index(); ++ ++ if (index_opr->is_illegal()) { ++ return Address(base, addr->disp()); ++ } ++ ++ int scale = addr->scale(); ++ if (index_opr->is_cpu_register()) { ++ Register index; ++ if (index_opr->is_single_cpu()) { ++ index = index_opr->as_register(); ++ } else { ++ index = index_opr->as_register_lo(); ++ } ++ if (scale != 0) { ++ __ shadd(tmp, index, base, tmp, scale); ++ } else { ++ __ add(tmp, base, index); ++ } ++ return Address(tmp, addr->disp()); ++ } else if (index_opr->is_constant()) { ++ intptr_t addr_offset = (((intptr_t)index_opr->as_constant_ptr()->as_jint()) << scale) + addr->disp(); ++ return Address(base, addr_offset); ++ } ++ ++ Unimplemented(); ++ return Address(); ++} ++ +Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { + ShouldNotReachHere(); + return Address(); @@ -5572,7 +6387,7 @@ index 000000000..222e3e97e + +// Ensure a valid Address (base + offset) to a stack-slot. If stack access is +// not encodable as a base + (immediate) offset, generate an explicit address -+// calculation to hold the address in a temporary register. ++// calculation to hold the address in t0. +Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) { + precond(size == 4 || size == 8); + Address addr = frame_map()->address_for_slot(index, adjust); @@ -5690,10 +6505,7 @@ index 000000000..222e3e97e +int LIR_Assembler::initial_frame_size_in_bytes() const { + // if rounding, must let FrameMap know! + -+ // The frame_map records size in slots (32bit word) -+ -+ // subtract two words to account for return address and link -+ return (frame_map()->framesize() - (2 * VMRegImpl::slots_per_word)) * VMRegImpl::stack_slot_size; ++ return in_bytes(frame_map()->framesize_in_bytes()); +} + +int LIR_Assembler::emit_exception_handler() { @@ -5757,7 +6569,11 @@ index 000000000..222e3e97e + if (method()->is_synchronized()) { + monitor_address(0, FrameMap::r10_opr); + stub = new MonitorExitStub(FrameMap::r10_opr, true, 0); -+ __ unlock_object(x15, x14, x10, *stub->entry()); ++ if (UseHeavyMonitors) { ++ __ j(*stub->entry()); ++ } else { ++ __ unlock_object(x15, x14, x10, *stub->entry()); ++ } + __ bind(*stub->continuation()); + } + @@ -5810,7 +6626,7 @@ index 000000000..222e3e97e + return offset; +} + -+void LIR_Assembler::return_op(LIR_Opr result) { ++void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { + assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10"); + + // Pop the stack before the safepoint code @@ -5820,20 +6636,18 @@ index 000000000..222e3e97e + __ reserved_stack_check(); + } + -+ address polling_page(os::get_polling_page()); -+ __ read_polling_page(t0, polling_page, relocInfo::poll_return_type); ++ code_stub->set_safepoint_offset(__ offset()); ++ __ relocate(relocInfo::poll_return_type); ++ __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */); + __ ret(); +} + +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { -+ address polling_page(os::get_polling_page()); + guarantee(info != NULL, "Shouldn't be NULL"); -+ assert(os::is_poll_address(polling_page), "should be"); -+ int32_t offset = 0; -+ __ get_polling_page(t0, polling_page, offset, relocInfo::poll_type); ++ __ get_polling_page(t0, relocInfo::poll_type); + add_debug_info_for_branch(info); // This isn't just debug info: + // it's the oop map -+ __ read_polling_page(t0, offset, relocInfo::poll_type); ++ __ read_polling_page(t0, 0, relocInfo::poll_type); + return __ offset(); +} + @@ -6007,7 +6821,7 @@ index 000000000..222e3e97e + } + move_regs(src->as_register(), dest->as_register()); + } else if (dest->is_double_cpu()) { -+ if (src->type() == T_OBJECT || src->type() == T_ARRAY) { ++ if (is_reference_type(src->type())) { + __ verify_oop(src->as_register()); + move_regs(src->as_register(), dest->as_register_lo()); + return; @@ -6064,8 +6878,7 @@ index 000000000..222e3e97e + } +} + -+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, -+ bool pop_fpu_stack, bool wide, bool /* unaligned */) { ++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) { + LIR_Address* to_addr = dest->as_address_ptr(); + // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src + Register compressed_src = t1; @@ -6075,7 +6888,7 @@ index 000000000..222e3e97e + return; + } + -+ if (type == T_ARRAY || type == T_OBJECT) { ++ if (is_reference_type(type)) { + __ verify_oop(src->as_register()); + + if (UseCompressedOops && !wide) { @@ -6187,8 +7000,7 @@ index 000000000..222e3e97e + reg2stack(temp, dest, dest->type(), false); +} + -+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, -+ bool wide, bool /* unaligned */) { ++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) { + assert(src->is_address(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + @@ -6233,11 +7045,7 @@ index 000000000..222e3e97e + __ ld(dest->as_register(), as_Address(from_addr)); + break; + case T_ADDRESS: -+ if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { -+ __ lwu(dest->as_register(), as_Address(from_addr)); -+ } else { -+ __ ld(dest->as_register(), as_Address(from_addr)); -+ } ++ __ ld(dest->as_register(), as_Address(from_addr)); + break; + case T_INT: + __ lw(dest->as_register(), as_Address(from_addr)); @@ -6261,21 +7069,21 @@ index 000000000..222e3e97e + ShouldNotReachHere(); + } + -+ if (type == T_ARRAY || type == T_OBJECT) { ++ if (is_reference_type(type)) { + if (UseCompressedOops && !wide) { + __ decode_heap_oop(dest->as_register()); + } -+ __ verify_oop(dest->as_register()); -+ } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { -+ if (UseCompressedClassPointers) { -+ __ decode_klass_not_null(dest->as_register()); ++ ++ if (!UseZGC) { ++ // Load barrier has not yet been applied, so ZGC can't verify the oop here ++ __ verify_oop(dest->as_register()); + } + } +} + +void LIR_Assembler::emit_op3(LIR_Op3* op) { + switch (op->code()) { -+ case lir_idiv: ++ case lir_idiv: // fall through + case lir_irem: + arithmetic_idiv(op->code(), + op->in_opr1(), @@ -6311,13 +7119,11 @@ index 000000000..222e3e97e + Label done; + move_op(opr2, result, type, lir_patch_none, NULL, + false, // pop_fpu_stack -+ false, // unaligned + false); // wide + __ j(done); + __ bind(label); + move_op(opr1, result, type, lir_patch_none, NULL, + false, // pop_fpu_stack -+ false, // unaligned + false); // wide + __ bind(done); +} @@ -6431,8 +7237,8 @@ index 000000000..222e3e97e + Register len = op->len()->as_register(); + + if (UseSlowPath || -+ (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) || -+ (!UseFastNewTypeArray && (op->type() != T_OBJECT && op->type() != T_ARRAY))) { ++ (!UseFastNewObjectArray && is_reference_type(op->type())) || ++ (!UseFastNewTypeArray && !is_reference_type(op->type()))) { + __ j(*op->stub()->entry()); + } else { + Register tmp1 = op->tmp1()->as_register(); @@ -6467,7 +7273,7 @@ index 000000000..222e3e97e + __ ld(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); + __ bne(recv, t1, next_test); + Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); -+ __ increment(data_addr, DataLayout::counter_increment); ++ __ add_memory_int64(data_addr, DataLayout::counter_increment); + __ j(*update_done); + __ bind(next_test); + } @@ -6479,7 +7285,7 @@ index 000000000..222e3e97e + __ ld(t1, recv_addr); + __ bnez(t1, next_test); + __ sd(recv, recv_addr); -+ __ mv(t1, DataLayout::counter_increment); ++ __ li(t1, DataLayout::counter_increment); + __ sd(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); + __ j(*update_done); + __ bind(next_test); @@ -6505,7 +7311,7 @@ index 000000000..222e3e97e + __ load_klass(klass_RInfo, obj); + if (k->is_loaded()) { + // See if we get an immediate positive hit -+ __ ld(t0, Address(klass_RInfo, long(k->super_check_offset()))); ++ __ ld(t0, Address(klass_RInfo, int64_t(k->super_check_offset()))); + if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { + __ bne(k_RInfo, t0, *failure_target, /* is_far */ true); + // successful cast, fall through to profile or jump @@ -6550,10 +7356,7 @@ index 000000000..222e3e97e + // Object is null, update MDO and exit + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); -+ Address data_addr = __ form_address(mdo, /* base */ -+ md->byte_offset_of_slot(data, DataLayout::flags_offset()), /* offset */ -+ 12, /* expect offset bits */ -+ t1); /* temp reg */ ++ Address data_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); + __ lbu(t0, data_addr); + __ ori(t0, t0, BitData::null_seen_byte_constant()); + __ sb(t0, data_addr); @@ -6667,7 +7470,7 @@ index 000000000..222e3e97e + assert(op->addr()->is_address(), "what else?"); + LIR_Address* addr_ptr = op->addr()->as_address_ptr(); + assert(addr_ptr->disp() == 0, "need 0 disp"); -+ assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); ++ assert(addr_ptr->index() == LIR_Opr::illegalOpr(), "need 0 index"); + addr = as_reg(addr_ptr->base()); + } + Register newval = as_reg(op->new_value()); @@ -6758,7 +7561,12 @@ index 000000000..222e3e97e + } +} + -+void LIR_Assembler::align_call(LIR_Code code) { } ++void LIR_Assembler::align_call(LIR_Code code) { ++ // With RVC a call instruction may get 2-byte aligned. ++ // The address of the call instruction needs to be 4-byte aligned to ++ // ensure that it does not span a cache line so that it can be patched. ++ __ align(4); ++} + +void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { + address call = __ trampoline_call(Address(op->addr(), rtype)); @@ -6778,10 +7586,9 @@ index 000000000..222e3e97e + add_call_info(code_offset(), op->info()); +} + -+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ShouldNotReachHere(); } -+ +void LIR_Assembler::emit_static_call_stub() { + address call_pc = __ pc(); ++ assert((__ offset() % 4) == 0, "bad alignment"); + address stub = __ start_a_stub(call_stub_size()); + if (stub == NULL) { + bailout("static call stub overflow"); @@ -6793,7 +7600,8 @@ index 000000000..222e3e97e + __ relocate(static_stub_Relocation::spec(call_pc)); + __ emit_static_call_stub(); + -+ assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(), "stub too big"); ++ assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() ++ <= call_stub_size(), "stub too big"); + __ end_a_stub(); +} + @@ -6838,7 +7646,6 @@ index 000000000..222e3e97e + __ j(_unwind_handler_entry); +} + -+ +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { + Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); @@ -6866,7 +7673,6 @@ index 000000000..222e3e97e + } +} + -+ +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { + Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); @@ -6901,22 +7707,16 @@ index 000000000..222e3e97e + } +} + -+ -+ +void LIR_Assembler::emit_lock(LIR_OpLock* op) { + Register obj = op->obj_opr()->as_register(); // may not be an oop + Register hdr = op->hdr_opr()->as_register(); + Register lock = op->lock_opr()->as_register(); -+ if (!UseFastLocking) { ++ if (UseHeavyMonitors) { + __ j(*op->stub()->entry()); + } else if (op->code() == lir_lock) { -+ Register scratch = noreg; -+ if (UseBiasedLocking) { -+ scratch = op->scratch_opr()->as_register(); -+ } + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + // add debug info for NullPointerException only if one is possible -+ int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); ++ int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry()); + if (op->info() != NULL) { + add_debug_info_for_null_check(null_check_offset, op->info()); + } @@ -6929,6 +7729,23 @@ index 000000000..222e3e97e + __ bind(*op->stub()->continuation()); +} + ++void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { ++ Register obj = op->obj()->as_pointer_register(); ++ Register result = op->result_opr()->as_pointer_register(); ++ ++ CodeEmitInfo* info = op->info(); ++ if (info != NULL) { ++ add_debug_info_for_null_check_here(info); ++ } ++ ++ if (UseCompressedClassPointers) { ++ __ lwu(result, Address(obj, oopDesc::klass_offset_in_bytes())); ++ __ decode_klass_not_null(result); ++ } else { ++ __ ld(result, Address(obj, oopDesc::klass_offset_in_bytes())); ++ } ++} ++ +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); @@ -6962,7 +7779,7 @@ index 000000000..222e3e97e + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); -+ __ increment(data_addr, DataLayout::counter_increment); ++ __ add_memory_int64(data_addr, DataLayout::counter_increment); + return; + } + } @@ -6978,7 +7795,7 @@ index 000000000..222e3e97e + __ mov_metadata(t1, known_klass->constant_encoding()); + __ sd(t1, recv_addr); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); -+ __ increment(data_addr, DataLayout::counter_increment); ++ __ add_memory_int64(data_addr, DataLayout::counter_increment); + return; + } + } @@ -6988,13 +7805,13 @@ index 000000000..222e3e97e + type_profile_helper(mdo, md, data, recv, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. -+ __ increment(counter_addr, DataLayout::counter_increment); ++ __ add_memory_int64(counter_addr, DataLayout::counter_increment); + + __ bind(update_done); + } + } else { + // Static call -+ __ increment(counter_addr, DataLayout::counter_increment); ++ __ add_memory_int64(counter_addr, DataLayout::counter_increment); + } +} + @@ -7029,7 +7846,7 @@ index 000000000..222e3e97e + + if (TypeEntries::is_type_none(current_klass)) { + __ beqz(t1, none); -+ __ mv(t0, (u1)TypeEntries::null_seen); ++ __ li(t0, (u1)TypeEntries::null_seen); + __ beq(t0, t1, none); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the @@ -7079,7 +7896,7 @@ index 000000000..222e3e97e + Label ok; + __ ld(t0, mdo_addr); + __ beqz(t0, ok); -+ __ mv(t1, (u1)TypeEntries::null_seen); ++ __ li(t1, (u1)TypeEntries::null_seen); + __ beq(t0, t1, ok); + // may have been set by another thread + __ membar(MacroAssembler::LoadLoad); @@ -7199,32 +8016,30 @@ index 000000000..222e3e97e + + +void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { -+#if INCLUDE_SHENANDOAHGC -+ if (UseShenandoahGC && patch_code != lir_patch_none) { ++ if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } -+#endif -+ assert(patch_code == lir_patch_none, "Patch code not supported"); ++ + LIR_Address* adr = addr->as_address_ptr(); + Register dst = dest->as_register_lo(); + + assert_different_registers(dst, t0); -+ if(adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) { -+ ++ if (adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) { ++ int scale = adr->scale(); + intptr_t offset = adr->disp(); + LIR_Opr index_op = adr->index(); -+ int scale = adr->scale(); -+ if(index_op->is_constant()) { ++ if (index_op->is_constant()) { + offset += ((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale; + } + -+ if(!is_imm_in_range(offset, 12, 0)) { ++ if (!is_imm_in_range(offset, 12, 0)) { + __ la(t0, as_Address(adr)); + __ mv(dst, t0); + return; + } + } ++ + __ la(dst, as_Address(adr)); +} + @@ -7248,8 +8063,7 @@ index 000000000..222e3e97e + +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { + if (dest->is_address() || src->is_address()) { -+ move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, -+ /* unaligned */ false, /* wide */ false); ++ move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /* wide */ false); + } else { + ShouldNotReachHere(); + } @@ -7326,7 +8140,7 @@ index 000000000..222e3e97e +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) { + Address addr = as_Address(src->as_address_ptr()); + BasicType type = src->type(); -+ bool is_oop = type == T_OBJECT || type == T_ARRAY; ++ bool is_oop = is_reference_type(type); + + get_op(type); + @@ -7376,41 +8190,6 @@ index 000000000..222e3e97e + return exact_log2(elem_size); +} + -+Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) { -+ if (addr->base()->is_illegal()) { -+ assert(addr->index()->is_illegal(), "must be illegal too"); -+ __ movptr(tmp, addr->disp()); -+ return Address(tmp, 0); -+ } -+ -+ Register base = addr->base()->as_pointer_register(); -+ LIR_Opr index_op = addr->index(); -+ int scale = addr->scale(); -+ -+ if (index_op->is_illegal()) { -+ return Address(base, addr->disp()); -+ } else if (index_op->is_cpu_register()) { -+ Register index; -+ if (index_op->is_single_cpu()) { -+ index = index_op->as_register(); -+ } else { -+ index = index_op->as_register_lo(); -+ } -+ if (scale != 0) { -+ __ shadd(tmp, index, base, tmp, scale); -+ } else { -+ __ add(tmp, base, index); -+ } -+ return Address(tmp, addr->disp()); -+ } else if (index_op->is_constant()) { -+ intptr_t addr_offset = (((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale) + addr->disp(); -+ return Address(base, addr_offset); -+ } -+ -+ Unimplemented(); -+ return Address(); -+} -+ +// helper functions which checks for overflow and sets bailout if it +// occurs. Always returns a valid embeddable pointer but in the +// bailout case the pointer won't be to unique storage. @@ -7444,16 +8223,6 @@ index 000000000..222e3e97e + } +} + -+void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { -+ _masm->code_section()->relocate(adr, relocInfo::poll_type); -+ int pc_offset = code_offset(); -+ flush_debug_info(pc_offset); -+ info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); -+ if (info->exception_handlers() != NULL) { -+ compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); -+ } -+} -+ +void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { + __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */, + Assembler::rl /* release */, t0, true /* result as bool */); @@ -7498,7 +8267,6 @@ index 000000000..222e3e97e + add_call_info_here(info); +} + -+ +void LIR_Assembler::check_exact_klass(Register tmp, ciKlass* exact_klass) { + Label ok; + __ load_klass(tmp, tmp); @@ -7588,6 +8356,16 @@ index 000000000..222e3e97e + __ bind(done); +} + ++void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { ++ _masm->code_section()->relocate(adr, relocInfo::poll_type); ++ int pc_offset = code_offset(); ++ flush_debug_info(pc_offset); ++ info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); ++ if (info->exception_handlers() != NULL) { ++ compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); ++ } ++} ++ +void LIR_Assembler::type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo, + ciProfileData* data, Label* success, Label* failure, + Label& profile_cast_success, Label& profile_cast_failure) { @@ -7602,10 +8380,7 @@ index 000000000..222e3e97e + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); -+ Address counter_addr = __ form_address(mdo, /* base */ -+ md->byte_offset_of_slot(data, CounterData::count_offset()), /* offset */ -+ 12, /* expect offset bits */ -+ t1); /* temp reg */ ++ Address counter_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ ld(t0, counter_addr); + __ addi(t0, t0, -DataLayout::counter_increment); + __ sd(t0, counter_addr); @@ -7687,21 +8462,21 @@ index 000000000..222e3e97e + assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp"); + int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord; + assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); -+ __ mv(t0, c); ++ __ li(t0, c); + __ sd(t0, Address(sp, offset_from_rsp_in_bytes)); +} + +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp new file mode 100644 -index 000000000..11a47fd6e +index 00000000000..051328c3a8a --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp @@ -0,0 +1,132 @@ +/* -+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -7743,9 +8518,6 @@ index 000000000..11a47fd6e + + Address as_Address(LIR_Address* addr, Register tmp); + -+ // Ensure we have a valid Address (base+offset) to a stack-slot. -+ Address stack_slot_address(int index, uint shift, int adjust = 0); -+ + // helper functions which checks for overflow and sets bailout if it + // occurs. Always returns a valid embeddable pointer but in the + // bailout case the pointer won't be to unique storage. @@ -7753,6 +8525,9 @@ index 000000000..11a47fd6e + address double_constant(double d); + address int_constant(jlong n); + ++ // Ensure we have a valid Address (base + offset) to a stack-slot. ++ Address stack_slot_address(int index, uint shift, int adjust = 0); ++ + // Record the type of the receiver in ReceiverTypeData + void type_profile_helper(Register mdo, + ciMethodData *md, ciProfileData *data, @@ -7768,17 +8543,15 @@ index 000000000..11a47fd6e + + void deoptimize_trap(CodeEmitInfo *info); + -+ enum -+ { -+ // see emit_static_call_stub for detail: ++ enum { ++ // See emit_static_call_stub for detail + // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address) + _call_stub_size = 14 * NativeInstruction::instruction_size + + (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size), -+ _call_aot_stub_size = 0, -+ // see emit_exception_handler for detail: ++ // See emit_exception_handler for detail + // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY) + _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller -+ // see emit_deopt_handler for detail ++ // See emit_deopt_handler for detail + // auipc (1) + far_jump (6 or 2) + _deopt_handler_size = 1 * NativeInstruction::instruction_size + + 6 * NativeInstruction::instruction_size // or smaller @@ -7789,10 +8562,12 @@ index 000000000..11a47fd6e + void check_no_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, Address mdo_addr, Label &next); + + void check_exact_klass(Register tmp, ciKlass* exact_klass); ++ + void check_null(Register tmp, Label &update, intptr_t current_klass, Address mdo_addr, bool do_update, Label &next); + + void (MacroAssembler::*add)(Register prev, RegisterOrConstant incr, Register addr); + void (MacroAssembler::*xchg)(Register prev, Register newv, Register addr); ++ + void get_op(BasicType type); + + // emit_typecheck_helper sub functions @@ -7832,12 +8607,12 @@ index 000000000..11a47fd6e +#endif // CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp new file mode 100644 -index 000000000..8ba9ed66d +index 00000000000..e126f148cdf --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -@@ -0,0 +1,1083 @@ +@@ -0,0 +1,1075 @@ +/* -+ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -7876,6 +8651,7 @@ index 000000000..8ba9ed66d +#include "ci/ciTypeArrayKlass.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" ++#include "utilities/powerOfTwo.hpp" +#include "vmreg_riscv.inline.hpp" + +#ifdef ASSERT @@ -7980,7 +8756,6 @@ index 000000000..8ba9ed66d + return false; +} + -+ +bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { + if (c->as_constant() != NULL) { + long constant = 0; @@ -7996,7 +8771,6 @@ index 000000000..8ba9ed66d + return false; +} + -+ +LIR_Opr LIRGenerator::safepoint_poll_register() { + return LIR_OprFact::illegalOpr; +} @@ -8004,7 +8778,7 @@ index 000000000..8ba9ed66d +LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, + int shift, int disp, BasicType type) { + assert(base->is_register(), "must be"); -+ ++ + if (index->is_constant()) { + LIR_Const *constant = index->as_constant_ptr(); + jlong c; @@ -8031,17 +8805,22 @@ index 000000000..8ba9ed66d + int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); + int elem_size = type2aelembytes(type); + int shift = exact_log2(elem_size); -+ + return generate_address(array_opr, index_opr, shift, offset_in_bytes, type); +} + +LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { ++ LIR_Opr r; + switch (type) { -+ case T_LONG: return LIR_OprFact::longConst(x); -+ case T_INT: return LIR_OprFact::intConst(x); -+ default: ShouldNotReachHere(); ++ case T_LONG: ++ r = LIR_OprFact::longConst(x); ++ break; ++ case T_INT: ++ r = LIR_OprFact::intConst(x); ++ break; ++ default: ++ ShouldNotReachHere(); + } -+ return NULL; ++ return r; +} + +void LIRGenerator::increment_counter(address counter, BasicType type, int step) { @@ -8111,11 +8890,6 @@ index 000000000..8ba9ed66d + + // "lock" stores the address of the monitor stack slot, so this is not an oop + LIR_Opr lock = new_register(T_INT); -+ // Need a tmp register for biased locking -+ LIR_Opr tmp = LIR_OprFact::illegalOpr; -+ if (UseBiasedLocking) { -+ tmp = new_register(T_INT); -+ } + + CodeEmitInfo* info_for_exception = NULL; + if (x->needs_null_check()) { @@ -8124,7 +8898,7 @@ index 000000000..8ba9ed66d + // this CodeEmitInfo must not have the xhandlers because here the + // object is already locked (xhandlers expect object to be unlocked) + CodeEmitInfo* info = state_for(x, x->state(), true); -+ monitor_enter(obj.result(), lock, syncTempOpr(), tmp, ++ monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr, + x->monitor_no(), info_for_exception, info); +} + @@ -8194,12 +8968,7 @@ index 000000000..8ba9ed66d + right.load_item(); + + LIR_Opr reg = rlock(x); -+ LIR_Opr tmp = LIR_OprFact::illegalOpr; -+ if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) { -+ tmp = new_register(T_DOUBLE); -+ } -+ -+ arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp()); ++ arithmetic_op_fpu(x->op(), reg, left.result(), right.result()); + + set_result(x, round_item(reg)); +} @@ -8208,7 +8977,7 @@ index 000000000..8ba9ed66d +void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + + // missing test if instr is commutative and if we should swap -+ LIRItem left(x->x(), this); ++ LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { @@ -8232,7 +9001,7 @@ index 000000000..8ba9ed66d + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); -+ __ branch(lir_cond_equal, right.result()->type(), new DivByZeroStub(info)); ++ __ branch(lir_cond_equal, new DivByZeroStub(info)); + } + + rlock_result(x); @@ -8306,16 +9075,16 @@ index 000000000..8ba9ed66d + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0)); -+ __ branch(lir_cond_equal, right.result()->type(), new DivByZeroStub(info)); ++ __ branch(lir_cond_equal, new DivByZeroStub(info)); + } + + LIR_Opr ill = LIR_OprFact::illegalOpr; -+ + if (x->op() == Bytecodes::_irem) { + __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); + } else if (x->op() == Bytecodes::_idiv) { + __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); + } ++ + } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) { + if (right.is_constant() && + ((x->op() == Bytecodes::_iadd && !Assembler::operand_valid_for_add_immediate(right.get_jint_constant())) || @@ -8389,7 +9158,7 @@ index 000000000..8ba9ed66d + left.load_item(); + rlock_result(x); + ValueTag tag = right.type()->tag(); -+ if(right.is_constant() && ++ if (right.is_constant() && + ((tag == longTag && Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) || + (tag == intTag && Assembler::operand_valid_for_add_immediate(right.get_jint_constant())))) { + right.dont_load_item(); @@ -8438,7 +9207,7 @@ index 000000000..8ba9ed66d + new_value.load_item(); + cmp_value.load_item(); + LIR_Opr result = new_register(T_INT); -+ if (type == T_OBJECT || type == T_ARRAY) { ++ if (is_reference_type(type)) { + __ cas_obj(addr, cmp_value.result(), new_value.result(), new_register(T_INT), new_register(T_INT), result); + } else if (type == T_INT) { + __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill); @@ -8452,7 +9221,7 @@ index 000000000..8ba9ed66d +} + +LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) { -+ bool is_oop = type == T_OBJECT || type == T_ARRAY; ++ bool is_oop = is_reference_type(type); + LIR_Opr result = new_register(type); + value.load_item(); + assert(type == T_INT || is_oop LP64_ONLY( || type == T_LONG ), "unexpected type"); @@ -8485,14 +9254,16 @@ index 000000000..8ba9ed66d + do_LibmIntrinsic(x); + break; + case vmIntrinsics::_dabs: // fall through -+ case vmIntrinsics::_dsqrt: { ++ case vmIntrinsics::_dsqrt: // fall through ++ case vmIntrinsics::_dsqrt_strict: { + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + LIR_Opr dst = rlock_result(x); + + switch (x->id()) { -+ case vmIntrinsics::_dsqrt: { ++ case vmIntrinsics::_dsqrt: // fall through ++ case vmIntrinsics::_dsqrt_strict: { + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } @@ -8892,9 +9663,9 @@ index 000000000..8ba9ed66d + profile_branch(x, cond); + move_to_phi(x->state()); + if (x->x()->type()->is_float_kind()) { -+ __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux()); ++ __ branch(lir_cond(cond), x->tsux(), x->usux()); + } else { -+ __ branch(lir_cond(cond), right->type(), x->tsux()); ++ __ branch(lir_cond(cond), x->tsux()); + } + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); @@ -8913,20 +9684,16 @@ index 000000000..8ba9ed66d + +void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, + CodeEmitInfo* info) { -+ if (!UseBarriersForVolatile) { -+ __ membar(); -+ } -+ + __ volatile_load_mem_reg(address, result, info); +} diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp new file mode 100644 -index 000000000..00e33e882 +index 00000000000..5f1c394ab3d --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp @@ -0,0 +1,55 @@ +/* -+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -8954,40 +9721,40 @@ index 000000000..00e33e882 +#include "asm/register.hpp" +#include "c1/c1_LIR.hpp" + -+FloatRegister LIR_OprDesc::as_float_reg() const { ++FloatRegister LIR_Opr::as_float_reg() const { + return as_FloatRegister(fpu_regnr()); +} + -+FloatRegister LIR_OprDesc::as_double_reg() const { ++FloatRegister LIR_Opr::as_double_reg() const { + return as_FloatRegister(fpu_regnrLo()); +} + +// Reg2 unused. +LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { + assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); -+ return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | -+ (reg1 << LIR_OprDesc::reg2_shift) | -+ LIR_OprDesc::double_type | -+ LIR_OprDesc::fpu_register | -+ LIR_OprDesc::double_size); ++ return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) | ++ (reg1 << LIR_Opr::reg2_shift) | ++ LIR_Opr::double_type | ++ LIR_Opr::fpu_register | ++ LIR_Opr::double_size); +} + +#ifndef PRODUCT +void LIR_Address::verify() const { + assert(base()->is_cpu_register(), "wrong base operand"); + assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand"); -+ assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, -+ "wrong type for addresses"); ++ assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || base()->type() == T_LONG || ++ base()->type() == T_METADATA, "wrong type for addresses"); +} +#endif // PRODUCT diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp new file mode 100644 -index 000000000..60dcdc0e1 +index 00000000000..78a61128bdd --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp @@ -0,0 +1,33 @@ +/* -+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -9021,14 +9788,14 @@ index 000000000..60dcdc0e1 +} diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp new file mode 100644 -index 000000000..f0aa08a39 +index 00000000000..d7ca7b0fd05 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp -@@ -0,0 +1,85 @@ +@@ -0,0 +1,83 @@ +/* -+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -9063,7 +9830,6 @@ index 000000000..f0aa08a39 + return 1; +} + -+ +inline bool LinearScan::requires_adjacent_regs(BasicType type) { + return false; +} @@ -9085,8 +9851,8 @@ index 000000000..f0aa08a39 + return false; +} + -+ +inline void LinearScan::pd_add_temps(LIR_Op* op) { ++ // No special case behaviours yet +} + + @@ -9099,8 +9865,8 @@ index 000000000..f0aa08a39 + _first_reg = pd_first_callee_saved_reg; + _last_reg = pd_last_callee_saved_reg; + return true; -+ } else if (cur->type() == T_INT || cur->type() == T_LONG || -+ cur->type() == T_OBJECT || cur->type() == T_ADDRESS || cur->type() == T_METADATA) { ++ } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || ++ cur->type() == T_ADDRESS || cur->type() == T_METADATA) { + _first_reg = pd_first_cpu_reg; + _last_reg = pd_last_allocatable_cpu_reg; + return true; @@ -9108,18 +9874,17 @@ index 000000000..f0aa08a39 + return false; +} + -+ +#endif // CPU_RISCV_C1_LINEARSCAN_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp new file mode 100644 -index 000000000..370ec45c6 +index 00000000000..6f656c8c533 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -@@ -0,0 +1,441 @@ +@@ -0,0 +1,432 @@ +/* -+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -9143,15 +9908,16 @@ index 000000000..370ec45c6 + */ + +#include "precompiled.hpp" ++#include "c1/c1_LIR.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "classfile/systemDictionary.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/arrayOop.hpp" -+#include "oops/markOop.hpp" ++#include "oops/markWord.hpp" +#include "runtime/basicLock.hpp" -+#include "runtime/biasedLocking.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" @@ -9167,7 +9933,7 @@ index 000000000..370ec45c6 + } +} + -+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case) { ++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { + const int aligned_mask = BytesPerWord - 1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); @@ -9179,17 +9945,19 @@ index 000000000..370ec45c6 + // save object being locked into the BasicObjectLock + sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + -+ if (UseBiasedLocking) { -+ assert(tmp != noreg, "should have tmp register at this point"); -+ null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, tmp, false, done, &slow_case); -+ } else { -+ null_check_offset = offset(); ++ null_check_offset = offset(); ++ ++ if (DiagnoseSyncOnValueBasedClasses != 0) { ++ load_klass(hdr, obj); ++ lwu(hdr, Address(hdr, Klass::access_flags_offset())); ++ andi(t0, hdr, JVM_ACC_IS_VALUE_BASED_CLASS); ++ bnez(t0, slow_case, true /* is_far */); + } + + // Load object header + ld(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked -+ ori(hdr, hdr, markOopDesc::unlocked_value); ++ ori(hdr, hdr, markWord::unlocked_value); + // save unlocked object header into the displaced header location on the stack + sd(hdr, Address(disp_hdr, 0)); + // test if object header is still the same (i.e. unlocked), and if so, store the @@ -9212,7 +9980,7 @@ index 000000000..370ec45c6 + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + sub(hdr, hdr, sp); -+ mv(t0, aligned_mask - os::vm_page_size()); ++ li(t0, aligned_mask - os::vm_page_size()); + andr(hdr, hdr, t0); + // for recursive locking, the result is zero => save it in the displaced header + // location (NULL in the displaced hdr location indicates recursive locking) @@ -9220,10 +9988,6 @@ index 000000000..370ec45c6 + // otherwise we don't care about the result and handle locking via runtime call + bnez(hdr, slow_case, /* is_far */ true); + bind(done); -+ if (PrintBiasedLockingStatistics) { -+ la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); -+ incrementw(Address(t1, 0)); -+ } + return null_check_offset; +} + @@ -9233,21 +9997,13 @@ index 000000000..370ec45c6 + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done; + -+ if (UseBiasedLocking) { -+ // load object -+ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); -+ biased_locking_exit(obj, hdr, done); -+ } -+ + // load displaced header + ld(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is NULL we had recursive locking + // if we had recursive locking, we are done + beqz(hdr, done); -+ if (!UseBiasedLocking) { -+ // load object -+ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); -+ } ++ // load object ++ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + verify_oop(obj); + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to @@ -9274,13 +10030,8 @@ index 000000000..370ec45c6 + +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) { + assert_different_registers(obj, klass, len); -+ if (UseBiasedLocking && !len->is_valid()) { -+ assert_different_registers(obj, klass, len, tmp1, tmp2); -+ ld(tmp1, Address(klass, Klass::prototype_header_offset())); -+ } else { -+ // This assumes that all prototype bits fitr in an int32_t -+ mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype()); -+ } ++ // This assumes that all prototype bits fitr in an int32_t ++ mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value()); + sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); + + if (UseCompressedClassPointers) { // Take care not to kill klass @@ -9298,7 +10049,7 @@ index 000000000..370ec45c6 +} + +// preserves obj, destroys len_in_bytes -+void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1) { ++void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp) { + assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); + Label done; + @@ -9310,7 +10061,7 @@ index 000000000..370ec45c6 + if (hdr_size_in_bytes) { + add(obj, obj, hdr_size_in_bytes); + } -+ zero_memory(obj, len_in_bytes, tmp1); ++ zero_memory(obj, len_in_bytes, tmp); + if (hdr_size_in_bytes) { + sub(obj, obj, hdr_size_in_bytes); + } @@ -9434,24 +10185,29 @@ index 000000000..370ec45c6 +} + +void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { -+ // If we have to make this method not-entrant we'll overwrite its -+ // first instruction with a jump. For this action to be legal we -+ // must ensure that this first instruction is a J, JAL or NOP. -+ // Make it a NOP. -+ nop(); + assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); + // Make sure there is enough stack space for this method's activation. -+ // Note that we do this before doing an enter(). ++ // Note that we do this before creating a frame. + generate_stack_overflow_check(bang_size_in_bytes); -+ MacroAssembler::build_frame(framesize + 2 * wordSize); // 2: multipler for wordSize ++ MacroAssembler::build_frame(framesize); ++ ++ // Insert nmethod entry barrier into frame. ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->nmethod_entry_barrier(this); +} + +void C1_MacroAssembler::remove_frame(int framesize) { -+ MacroAssembler::remove_frame(framesize + 2 * wordSize); // 2: multiper for wordSize ++ MacroAssembler::remove_frame(framesize); +} + + -+void C1_MacroAssembler::verified_entry() { ++void C1_MacroAssembler::verified_entry(bool breakAtEntry) { ++ // If we have to make this method not-entrant we'll overwrite its ++ // first instruction with a jump. For this action to be legal we ++ // must ensure that this first instruction is a J, JAL or NOP. ++ // Make it a NOP. ++ ++ nop(); +} + +void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { @@ -9539,9 +10295,9 @@ index 000000000..370ec45c6 + if (type == T_OBJECT || type == T_ARRAY) { + assert(cmpFlag == lir_cond_equal || cmpFlag == lir_cond_notEqual, "Should be equal or notEqual"); + if (cmpFlag == lir_cond_equal) { -+ oop_equal(op1, op2, label, is_far); ++ beq(op1, op2, label, is_far); + } else { -+ oop_nequal(op1, op2, label, is_far); ++ bne(op1, op2, label, is_far); + } + } else { + assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(c1_cond_branch) / sizeof(c1_cond_branch[0])), @@ -9559,14 +10315,14 @@ index 000000000..370ec45c6 +} diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp new file mode 100644 -index 000000000..5d0cefe89 +index 00000000000..dfd3c17d7c7 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp -@@ -0,0 +1,121 @@ +@@ -0,0 +1,120 @@ +/* -+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -9614,7 +10370,7 @@ index 000000000..5d0cefe89 + ); + + void initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2); -+ void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1); ++ void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp); + + void float_cmp(bool is_float, int unordered_result, + FloatRegister f0, FloatRegister f1, @@ -9624,9 +10380,8 @@ index 000000000..5d0cefe89 + // hdr : must be x10, contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must point to the displaced header location, contents preserved -+ // tmp : temporary register, contents destroyed + // returns code offset at which to add null check debug information -+ int lock_object (Register swap, Register obj, Register disp_hdr, Register tmp, Label& slow_case); ++ int lock_object (Register swap, Register obj, Register disp_hdr, Label& slow_case); + + // unlocking + // hdr : contents destroyed @@ -9686,14 +10441,14 @@ index 000000000..5d0cefe89 +#endif // CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp new file mode 100644 -index 000000000..f06e7b51c +index 00000000000..f523c9ed50a --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -@@ -0,0 +1,1206 @@ +@@ -0,0 +1,1172 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -9723,9 +10478,11 @@ index 000000000..f06e7b51c +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "compiler/disassembler.hpp" ++#include "compiler/oopMap.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "interpreter/interpreter.hpp" ++#include "memory/universe.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/oop.inline.hpp" @@ -9733,18 +10490,20 @@ index 000000000..f06e7b51c +#include "register_riscv.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" ++#include "runtime/stubRoutines.hpp" +#include "runtime/vframe.hpp" +#include "runtime/vframeArray.hpp" ++#include "utilities/powerOfTwo.hpp" +#include "vmreg_riscv.inline.hpp" + + +// Implementation of StubAssembler + -+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) { ++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, int args_size) { + // setup registers -+ assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, ++ assert(!(oop_result->is_valid() || metadata_result->is_valid()) || oop_result != metadata_result, + "registers must be different"); -+ assert(oop_result1 != xthread && metadata_result != xthread, "registers must be different"); ++ assert(oop_result != xthread && metadata_result != xthread, "registers must be different"); + assert(args_size >= 0, "illegal args_size"); + bool align_stack = false; + @@ -9780,7 +10539,7 @@ index 000000000..f06e7b51c + beqz(t0, L); + // exception pending => remove activation and forward to exception handler + // make sure that the vm_results are cleared -+ if (oop_result1->is_valid()) { ++ if (oop_result->is_valid()) { + sd(zr, Address(xthread, JavaThread::vm_result_offset())); + } + if (metadata_result->is_valid()) { @@ -9797,8 +10556,8 @@ index 000000000..f06e7b51c + bind(L); + } + // get oop results if there are any and reset the values in the thread -+ if (oop_result1->is_valid()) { -+ get_vm_result(oop_result1, xthread); ++ if (oop_result->is_valid()) { ++ get_vm_result(oop_result, xthread); + } + if (metadata_result->is_valid()) { + get_vm_result_2(metadata_result, xthread); @@ -9806,12 +10565,12 @@ index 000000000..f06e7b51c + return call_offset; +} + -+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) { ++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1) { + mv(c_rarg1, arg1); -+ return call_RT(oop_result1, metadata_result, entry, 1); ++ return call_RT(oop_result, metadata_result, entry, 1); +} + -+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) { ++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2) { + const int arg_num = 2; + if (c_rarg1 == arg2) { + if (c_rarg2 == arg1) { @@ -9826,10 +10585,10 @@ index 000000000..f06e7b51c + mv(c_rarg1, arg1); + mv(c_rarg2, arg2); + } -+ return call_RT(oop_result1, metadata_result, entry, arg_num); ++ return call_RT(oop_result, metadata_result, entry, arg_num); +} + -+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { ++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { + const int arg_num = 3; + // if there is any conflict use the stack + if (arg1 == c_rarg2 || arg1 == c_rarg3 || @@ -9838,31 +10597,36 @@ index 000000000..f06e7b51c + const int arg1_sp_offset = 0; + const int arg2_sp_offset = 1; + const int arg3_sp_offset = 2; -+ addi(sp, sp, -(arg_num * wordSize)); -+ sd(arg3, Address(sp, arg3_sp_offset * wordSize)); -+ sd(arg2, Address(sp, arg2_sp_offset * wordSize)); ++ addi(sp, sp, -(arg_num + 1) * wordSize); + sd(arg1, Address(sp, arg1_sp_offset * wordSize)); ++ sd(arg2, Address(sp, arg2_sp_offset * wordSize)); ++ sd(arg3, Address(sp, arg3_sp_offset * wordSize)); + + ld(c_rarg1, Address(sp, arg1_sp_offset * wordSize)); + ld(c_rarg2, Address(sp, arg2_sp_offset * wordSize)); + ld(c_rarg3, Address(sp, arg3_sp_offset * wordSize)); -+ addi(sp, sp, arg_num * wordSize); ++ addi(sp, sp, (arg_num + 1) * wordSize); + } else { + mv(c_rarg1, arg1); + mv(c_rarg2, arg2); + mv(c_rarg3, arg3); + } -+ return call_RT(oop_result1, metadata_result, entry, arg_num); ++ return call_RT(oop_result, metadata_result, entry, arg_num); +} + ++enum return_state_t { ++ does_not_return, requires_return ++}; ++ +// Implementation of StubFrame + +class StubFrame: public StackObj { + private: + StubAssembler* _sasm; ++ bool _return_state; + + public: -+ StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments); ++ StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return); + void load_argument(int offset_in_words, Register reg); + + ~StubFrame(); @@ -9880,8 +10644,9 @@ index 000000000..f06e7b51c + +#define __ _sasm-> + -+StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) { ++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state) { + _sasm = sasm; ++ _return_state = return_state; + __ prologue(name, must_gc_arguments); +} + @@ -9893,7 +10658,11 @@ index 000000000..f06e7b51c + + +StubFrame::~StubFrame() { -+ __ epilogue(); ++ if (_return_state == requires_return) { ++ __ epilogue(); ++ } else { ++ __ should_not_reach_here(); ++ } + _sasm = NULL; +} + @@ -9919,7 +10688,7 @@ index 000000000..f06e7b51c +}; + +// Save off registers which might be killed by calls into the runtime. -+// Tries to smart of about FP registers. In particular we separate ++// Tries to smart of about FPU registers. In particular we separate +// saving and describing the FPU registers for deoptimization since we +// have to save the FPU registers twice if we describe them. The +// deopt blob is the only thing which needs to describe FPU registers. @@ -9936,11 +10705,12 @@ index 000000000..f06e7b51c + OopMap* oop_map = new OopMap(frame_size_in_slots, 0); + assert_cond(oop_map != NULL); + -+ // cpu_regs, caller save registers only, see FrameMap::initialize ++ // caller save registers only, see FrameMap::initialize + // in c1_FrameMap_riscv.cpp for detail. -+ const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = {x7, x10, x11, x12, -+ x13, x14, x15, x16, x17, -+ x28, x29, x30, x31}; ++ const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = { ++ x7, x10, x11, x12, x13, x14, x15, x16, x17, x28, x29, x30, x31 ++ }; ++ + for (int i = 0; i < FrameMap::max_nof_caller_save_cpu_regs; i++) { + Register r = caller_save_cpu_regs[i]; + int sp_offset = cpu_reg_save_offsets[r->encoding()]; @@ -10055,7 +10825,6 @@ index 000000000..f06e7b51c + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, oop_map); + -+ __ should_not_reach_here(); + return oop_maps; +} + @@ -10103,9 +10872,7 @@ index 000000000..f06e7b51c + sasm->set_frame_size(frame_size); + break; + } -+ default: -+ __ should_not_reach_here(); -+ break; ++ default: ShouldNotReachHere(); + } + + // verify that only x10 and x13 are valid at this time @@ -10161,11 +10928,8 @@ index 000000000..f06e7b51c + restore_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: -+ // Pop the return address. -+ __ leave(); -+ __ ret(); // jump to exception handler + break; -+ default: ShouldNotReachHere(); ++ default: ShouldNotReachHere(); + } + + return oop_maps; @@ -10268,80 +11032,37 @@ index 000000000..f06e7b51c +#endif + __ reset_last_Java_frame(true); + -+ // check for pending exceptions -+ { -+ Label L; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ beqz(t0, L); -+ // exception pending => remove activation and forward to exception handler -+ -+ { Label L1; -+ __ bnez(x10, L1); // have we deoptimized? -+ __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); -+ __ bind(L1); -+ } -+ -+ // the deopt blob expects exceptions in the special fields of -+ // JavaThread, so copy and clear pending exception. -+ -+ // load and clear pending exception -+ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); -+ __ sd(zr, Address(xthread, Thread::pending_exception_offset())); -+ -+ // check that there is really a valid exception -+ __ verify_not_null_oop(x10); -+ -+ // load throwing pc: this is the return address of the stub -+ __ ld(x13, Address(fp, wordSize)); -+ +#ifdef ASSERT -+ // check that fields in JavaThread for exception oop and issuing pc are empty -+ Label oop_empty; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ beqz(t0, oop_empty); -+ __ stop("exception oop must be empty"); -+ __ bind(oop_empty); ++ // Check that fields in JavaThread for exception oop and issuing pc are empty ++ Label oop_empty; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, oop_empty); ++ __ stop("exception oop must be empty"); ++ __ bind(oop_empty); + -+ Label pc_empty; -+ __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); -+ __ beqz(t0, pc_empty); -+ __ stop("exception pc must be empty"); -+ __ bind(pc_empty); ++ Label pc_empty; ++ __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); ++ __ beqz(t0, pc_empty); ++ __ stop("exception pc must be empty"); ++ __ bind(pc_empty); +#endif + -+ // store exception oop and throwing pc to JavaThread -+ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); -+ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); -+ -+ restore_live_registers(sasm); -+ -+ __ leave(); -+ -+ // Forward the exception directly to deopt blob. We can blow no -+ // registers and must leave throwing pc on the stack. A patch may -+ // have values live in registers so the entry point with the -+ // exception in tls. -+ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls())); -+ -+ __ bind(L); -+ } -+ -+ // Runtime will return true if the nmethod has been deoptimized during -+ // the patching process. In that case we must do a deopt reexecute instead. -+ Label cont; ++ // Runtime will return true if the nmethod has been deoptimized, this is the ++ // expected scenario and anything else is an error. Note that we maintain a ++ // check on the result purely as a defensive measure. ++ Label no_deopt; ++ __ beqz(x10, no_deopt); // Have we deoptimized? + -+ __ beqz(x10, cont); // have we deoptimized? ++ // Perform a re-execute. The proper return address is already on the stack, ++ // we just need to restore registers, pop all of our frames but the return ++ // address and jump to the deopt blob. + -+ // Will reexecute. Proper return address is already on the stack we just restore -+ // registers, pop all of our frame but the return address and jump to the deopt blob + restore_live_registers(sasm); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + -+ __ bind(cont); -+ restore_live_registers(sasm); -+ __ leave(); -+ __ ret(); ++ __ bind(no_deopt); ++ __ stop("deopt not performed"); + + return oop_maps; +} @@ -10367,13 +11088,13 @@ index 000000000..f06e7b51c + + case throw_div0_exception_id: + { -+ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments); ++ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); + } + break; + + case throw_null_pointer_exception_id: -+ { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments); ++ { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); + } + break; @@ -10652,14 +11373,14 @@ index 000000000..f06e7b51c + + case throw_class_cast_exception_id: + { -+ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments); ++ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); + } + break; + + case throw_incompatible_class_change_error_id: + { -+ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments); ++ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, + CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); + } @@ -10693,7 +11414,7 @@ index 000000000..f06e7b51c + __ check_klass_subtype_slow_path(x14, x10, x12, x15, NULL, &miss); + + // fallthrough on success: -+ __ mv(t0, 1); ++ __ li(t0, 1); + __ sd(t0, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result + __ pop_reg(RegSet::of(x10, x12, x14, x15), sp); + __ ret(); @@ -10753,7 +11474,7 @@ index 000000000..f06e7b51c + + case deoptimize_id: + { -+ StubFrame f(sasm, "deoptimize", dont_gc_arguments); ++ StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return); + OopMap* oop_map = save_live_registers(sasm); + assert_cond(oop_map != NULL); + f.load_argument(0, c_rarg1); @@ -10772,7 +11493,7 @@ index 000000000..f06e7b51c + + case throw_range_check_failed_id: + { -+ StubFrame f(sasm, "range_check_failed", dont_gc_arguments); ++ StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); + } + break; @@ -10788,7 +11509,7 @@ index 000000000..f06e7b51c + + case access_field_patching_id: + { -+ StubFrame f(sasm, "access_field_patching", dont_gc_arguments); ++ StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); + } @@ -10796,7 +11517,7 @@ index 000000000..f06e7b51c + + case load_klass_patching_id: + { -+ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments); ++ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); + } @@ -10804,7 +11525,7 @@ index 000000000..f06e7b51c + + case load_mirror_patching_id: + { -+ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments); ++ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); + } @@ -10812,7 +11533,7 @@ index 000000000..f06e7b51c + + case load_appendix_patching_id: + { -+ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments); ++ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); + } @@ -10835,14 +11556,14 @@ index 000000000..f06e7b51c + + case throw_index_exception_id: + { -+ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments); ++ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); + } + break; + + case throw_array_store_exception_id: + { -+ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments); ++ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return); + // tos + 0: link + // + 1: return address + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); @@ -10851,7 +11572,7 @@ index 000000000..f06e7b51c + + case predicate_failed_trap_id: + { -+ StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments); ++ StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return); + + OopMap* map = save_live_registers(sasm); + assert_cond(map != NULL); @@ -10874,7 +11595,7 @@ index 000000000..f06e7b51c + StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); + save_live_registers(sasm); + -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)), c_rarg0); + + restore_live_registers(sasm); + } @@ -10882,8 +11603,8 @@ index 000000000..f06e7b51c + + default: + { -+ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); -+ __ mv(x10, (int)id); ++ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return); ++ __ li(x10, (int) id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10); + __ should_not_reach_here(); + } @@ -10898,14 +11619,13 @@ index 000000000..f06e7b51c +const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; } diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp new file mode 100644 -index 000000000..974c8fe76 +index 00000000000..fe46f7b21c8 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -@@ -0,0 +1,72 @@ +@@ -0,0 +1,65 @@ +/* -+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -10937,10 +11657,8 @@ index 000000000..974c8fe76 +// Sets the default values for platform dependent flags used by the client compiler. +// (see c1_globals.hpp) + -+#ifndef TIERED ++#ifndef COMPILER2 +define_pd_global(bool, BackgroundCompilation, true ); -+define_pd_global(bool, UseTLAB, true ); -+define_pd_global(bool, ResizeTLAB, true ); +define_pd_global(bool, InlineIntrinsics, true ); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, false); @@ -10949,7 +11667,6 @@ index 000000000..974c8fe76 +define_pd_global(intx, CompileThreshold, 1500 ); + +define_pd_global(intx, OnStackReplacePercentage, 933 ); -+define_pd_global(intx, FreqInlineSize, 325 ); +define_pd_global(intx, NewSizeThreadIncrease, 4*K ); +define_pd_global(intx, InitialCodeCacheSize, 160*K); +define_pd_global(intx, ReservedCodeCacheSize, 32*M ); @@ -10960,126 +11677,25 @@ index 000000000..974c8fe76 +define_pd_global(intx, CodeCacheExpansionSize, 32*K ); +define_pd_global(uintx, CodeCacheMinBlockLength, 1); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); -+define_pd_global(uintx, MetaspaceSize, 12*M ); +define_pd_global(bool, NeverActAsServerClassMachine, true ); -+define_pd_global(uint64_t,MaxRAM, 1ULL*G); ++define_pd_global(uint64_t, MaxRAM, 1ULL*G); +define_pd_global(bool, CICompileOSR, true ); -+#endif // !TIERED ++#endif // !COMPILER2 +define_pd_global(bool, UseTypeProfile, false); -+define_pd_global(bool, RoundFPResults, true ); + -+define_pd_global(bool, LIRFillDelaySlots, false); +define_pd_global(bool, OptimizeSinglePrecision, true ); +define_pd_global(bool, CSEArrayLength, false); -+define_pd_global(bool, TwoOperandLIRForm, false ); ++define_pd_global(bool, TwoOperandLIRForm, false); + +#endif // CPU_RISCV_C1_GLOBALS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -new file mode 100644 -index 000000000..bf4efa629 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -@@ -0,0 +1,91 @@ -+/* -+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP -+#define CPU_RISCV_C2_GLOBALS_RISCV_HPP -+ -+#include "utilities/globalDefinitions.hpp" -+#include "utilities/macros.hpp" -+ -+// Sets the default values for platform dependent flags used by the server compiler. -+// (see c2_globals.hpp). Alpha-sorted. -+ -+define_pd_global(bool, BackgroundCompilation, true); -+define_pd_global(bool, UseTLAB, true); -+define_pd_global(bool, ResizeTLAB, true); -+define_pd_global(bool, CICompileOSR, true); -+define_pd_global(bool, InlineIntrinsics, true); -+define_pd_global(bool, PreferInterpreterNativeStubs, false); -+define_pd_global(bool, ProfileTraps, true); -+define_pd_global(bool, UseOnStackReplacement, true); -+define_pd_global(bool, ProfileInterpreter, true); -+define_pd_global(bool, TieredCompilation, trueInTiered); -+define_pd_global(intx, CompileThreshold, 10000); -+ -+define_pd_global(intx, OnStackReplacePercentage, 140); -+define_pd_global(intx, ConditionalMoveLimit, 0); -+define_pd_global(intx, FLOATPRESSURE, 64); -+define_pd_global(intx, FreqInlineSize, 325); -+define_pd_global(intx, MinJumpTableSize, 10); -+define_pd_global(intx, INTPRESSURE, 24); -+define_pd_global(intx, InteriorEntryAlignment, 16); -+define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); -+define_pd_global(intx, LoopUnrollLimit, 60); -+define_pd_global(intx, LoopPercentProfileLimit, 10); -+// InitialCodeCacheSize derived from specjbb2000 run. -+define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize -+define_pd_global(intx, CodeCacheExpansionSize, 64*K); -+ -+// Ergonomics related flags -+define_pd_global(uint64_t,MaxRAM, 128ULL*G); -+define_pd_global(intx, RegisterCostAreaRatio, 16000); -+ -+// Peephole and CISC spilling both break the graph, and so makes the -+// scheduler sick. -+define_pd_global(bool, OptoPeephole, false); -+define_pd_global(bool, UseCISCSpill, false); -+define_pd_global(bool, OptoScheduling, true); -+define_pd_global(bool, OptoBundling, false); -+define_pd_global(bool, OptoRegScheduling, false); -+define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); -+define_pd_global(bool, IdealizeClearArrayNode, true); -+ -+define_pd_global(intx, ReservedCodeCacheSize, 48*M); -+define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); -+define_pd_global(intx, ProfiledCodeHeapSize, 22*M); -+define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); -+define_pd_global(uintx, CodeCacheMinBlockLength, 4); -+define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); -+ -+// Heap related flags -+define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); -+ -+// Ergonomics related flags -+define_pd_global(bool, NeverActAsServerClassMachine, false); -+ -+define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed. -+ -+#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp +diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp new file mode 100644 -index 000000000..3cb4a4995 +index 00000000000..27770dc17aa --- /dev/null -+++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp -@@ -0,0 +1,38 @@ ++++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +@@ -0,0 +1,1646 @@ +/* -+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -11104,1270 +11720,1634 @@ index 000000000..3cb4a4995 + */ + +#include "precompiled.hpp" -+#include "opto/compile.hpp" -+#include "opto/node.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "opto/c2_MacroAssembler.hpp" ++#include "opto/intrinsicnode.hpp" ++#include "opto/subnode.hpp" ++#include "runtime/stubRoutines.hpp" + -+// processor dependent initialization for riscv ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) block_comment(str) ++#define STOP(error) block_comment(error); stop(error) ++#endif + -+extern void reg_mask_init(); ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + -+void Compile::pd_compiler2_init() { -+ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); -+ reg_mask_init(); -+} -diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp -new file mode 100644 -index 000000000..881900892 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++// short string ++// StringUTF16.indexOfChar ++// StringLatin1.indexOfChar ++void C2_MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, ++ Register ch, Register result, ++ bool isL) ++{ ++ Register ch1 = t0; ++ Register index = t1; + -+#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP -+#define CPU_RISCV_CODEBUFFER_RISCV_HPP ++ BLOCK_COMMENT("string_indexof_char_short {"); + -+private: -+ void pd_initialize() {} ++ Label LOOP, LOOP1, LOOP4, LOOP8; ++ Label MATCH, MATCH1, MATCH2, MATCH3, ++ MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; + -+public: -+ void flush_bundle(bool start_new_bundle) {} ++ mv(result, -1); ++ mv(index, zr); + -+#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -new file mode 100644 -index 000000000..0354a93a0 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -@@ -0,0 +1,154 @@ -+/* -+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ bind(LOOP); ++ addi(t0, index, 8); ++ ble(t0, cnt1, LOOP8); ++ addi(t0, index, 4); ++ ble(t0, cnt1, LOOP4); ++ j(LOOP1); + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "code/compiledIC.hpp" -+#include "code/icBuffer.hpp" -+#include "code/nmethod.hpp" -+#include "memory/resourceArea.hpp" -+#include "runtime/mutexLocker.hpp" -+#include "runtime/safepoint.hpp" ++ bind(LOOP8); ++ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); ++ beq(ch, ch1, MATCH); ++ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); ++ beq(ch, ch1, MATCH1); ++ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); ++ beq(ch, ch1, MATCH2); ++ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); ++ beq(ch, ch1, MATCH3); ++ isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); ++ beq(ch, ch1, MATCH4); ++ isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); ++ beq(ch, ch1, MATCH5); ++ isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); ++ beq(ch, ch1, MATCH6); ++ isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); ++ beq(ch, ch1, MATCH7); ++ addi(index, index, 8); ++ addi(str1, str1, isL ? 8 : 16); ++ blt(index, cnt1, LOOP); ++ j(NOMATCH); + -+// ---------------------------------------------------------------------------- ++ bind(LOOP4); ++ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); ++ beq(ch, ch1, MATCH); ++ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); ++ beq(ch, ch1, MATCH1); ++ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); ++ beq(ch, ch1, MATCH2); ++ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); ++ beq(ch, ch1, MATCH3); ++ addi(index, index, 4); ++ addi(str1, str1, isL ? 4 : 8); ++ bge(index, cnt1, NOMATCH); + -+#define __ _masm. -+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { -+ precond(cbuf.stubs()->start() != badAddress); -+ precond(cbuf.stubs()->end() != badAddress); -+ // Stub is fixed up when the corresponding call is converted from -+ // calling compiled code to calling interpreted code. -+ // mv xmethod, 0 -+ // jalr -4 # to self ++ bind(LOOP1); ++ isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); ++ beq(ch, ch1, MATCH); ++ addi(index, index, 1); ++ addi(str1, str1, isL ? 1 : 2); ++ blt(index, cnt1, LOOP1); ++ j(NOMATCH); + -+ if (mark == NULL) { -+ mark = cbuf.insts_mark(); // Get mark within main instrs section. -+ } ++ bind(MATCH1); ++ addi(index, index, 1); ++ j(MATCH); + -+ // Note that the code buffer's insts_mark is always relative to insts. -+ // That's why we must use the macroassembler to generate a stub. -+ MacroAssembler _masm(&cbuf); ++ bind(MATCH2); ++ addi(index, index, 2); ++ j(MATCH); + -+ address base = __ start_a_stub(to_interp_stub_size()); -+ int offset = __ offset(); -+ if (base == NULL) { -+ return NULL; // CodeBuffer::expand failed -+ } -+ // static stub relocation stores the instruction address of the call -+ __ relocate(static_stub_Relocation::spec(mark)); ++ bind(MATCH3); ++ addi(index, index, 3); ++ j(MATCH); + -+ __ emit_static_call_stub(); ++ bind(MATCH4); ++ addi(index, index, 4); ++ j(MATCH); + -+ assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big"); -+ __ end_a_stub(); -+ return base; -+} -+#undef __ ++ bind(MATCH5); ++ addi(index, index, 5); ++ j(MATCH); + -+int CompiledStaticCall::to_interp_stub_size() { -+ // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr -+ return 12 * NativeInstruction::instruction_size; -+} ++ bind(MATCH6); ++ addi(index, index, 6); ++ j(MATCH); + -+int CompiledStaticCall::to_trampoline_stub_size() { -+ // Somewhat pessimistically, we count four instructions here (although -+ // there are only three) because we sometimes emit an alignment nop. -+ // Trampoline stubs are always word aligned. -+ return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size; -+} ++ bind(MATCH7); ++ addi(index, index, 7); + -+// Relocation entries for call stub, compiled java to interpreter. -+int CompiledStaticCall::reloc_to_interp_stub() { -+ return 4; // 3 in emit_to_interp_stub + 1 in emit_call ++ bind(MATCH); ++ mv(result, index); ++ bind(NOMATCH); ++ BLOCK_COMMENT("} string_indexof_char_short"); +} + -+void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { -+ address stub = find_stub(false /* is_aot */); -+ guarantee(stub != NULL, "stub not found"); -+ -+ if (TraceICs) { -+ ResourceMark rm; -+ tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", -+ p2i(instruction_address()), -+ callee->name_and_sig_as_C_string()); -+ } -+ -+ // Creation also verifies the object. -+ NativeMovConstReg* method_holder -+ = nativeMovConstReg_at(stub); -+#ifndef PRODUCT -+ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); -+ -+ // read the value once -+ volatile intptr_t data = method_holder->data(); -+ assert(data == 0 || data == (intptr_t)callee(), -+ "a) MT-unsafe modification of inline cache"); -+ assert(data == 0 || jump->jump_destination() == entry, -+ "b) MT-unsafe modification of inline cache"); -+#endif -+ // Update stub. -+ method_holder->set_data((intptr_t)callee()); -+ NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry); -+ ICache::invalidate_range(stub, to_interp_stub_size()); -+ // Update jump to call. -+ set_destination_mt_safe(stub); -+} ++// StringUTF16.indexOfChar ++// StringLatin1.indexOfChar ++void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ bool isL) ++{ ++ Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; ++ Register ch1 = t0; ++ Register orig_cnt = t1; ++ Register mask1 = tmp3; ++ Register mask2 = tmp2; ++ Register match_mask = tmp1; ++ Register trailing_char = tmp4; ++ Register unaligned_elems = tmp4; + -+void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { -+ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); -+ // Reset stub. -+ address stub = static_stub->addr(); -+ assert(stub != NULL, "stub not found"); -+ // Creation also verifies the object. -+ NativeMovConstReg* method_holder -+ = nativeMovConstReg_at(stub); -+ method_holder->set_data(0); -+} ++ BLOCK_COMMENT("string_indexof_char {"); ++ beqz(cnt1, NOMATCH); + -+//----------------------------------------------------------------------------- -+// Non-product mode code -+#ifndef PRODUCT ++ addi(t0, cnt1, isL ? -32 : -16); ++ bgtz(t0, DO_LONG); ++ string_indexof_char_short(str1, cnt1, ch, result, isL); ++ j(DONE); + -+void CompiledDirectStaticCall::verify() { -+ // Verify call. -+ _call->verify(); -+ if (os::is_MP()) { -+ _call->verify_alignment(); ++ bind(DO_LONG); ++ mv(orig_cnt, cnt1); ++ if (AvoidUnalignedAccesses) { ++ Label ALIGNED; ++ andi(unaligned_elems, str1, 0x7); ++ beqz(unaligned_elems, ALIGNED); ++ sub(unaligned_elems, unaligned_elems, 8); ++ neg(unaligned_elems, unaligned_elems); ++ if (!isL) { ++ srli(unaligned_elems, unaligned_elems, 1); ++ } ++ // do unaligned part per element ++ string_indexof_char_short(str1, unaligned_elems, ch, result, isL); ++ bgez(result, DONE); ++ mv(orig_cnt, cnt1); ++ sub(cnt1, cnt1, unaligned_elems); ++ bind(ALIGNED); + } + -+ // Verify stub. -+ address stub = find_stub(false /* is_aot */); -+ assert(stub != NULL, "no stub found for static call"); -+ // Creation also verifies the object. -+ NativeMovConstReg* method_holder -+ = nativeMovConstReg_at(stub); -+ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); ++ // duplicate ch ++ if (isL) { ++ slli(ch1, ch, 8); ++ orr(ch, ch1, ch); ++ } ++ slli(ch1, ch, 16); ++ orr(ch, ch1, ch); ++ slli(ch1, ch, 32); ++ orr(ch, ch1, ch); + -+ // Verify state. -+ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); -+} ++ if (!isL) { ++ slli(cnt1, cnt1, 1); ++ } + -+#endif // !PRODUCT -diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp -new file mode 100644 -index 000000000..011e965ad ---- /dev/null -+++ b/src/hotspot/cpu/riscv/copy_riscv.hpp -@@ -0,0 +1,60 @@ -+/* -+ * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ uint64_t mask0101 = UCONST64(0x0101010101010101); ++ uint64_t mask0001 = UCONST64(0x0001000100010001); ++ mv(mask1, isL ? mask0101 : mask0001); ++ uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); ++ uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); ++ mv(mask2, isL ? mask7f7f : mask7fff); + -+#ifndef CPU_RISCV_COPY_RISCV_HPP -+#define CPU_RISCV_COPY_RISCV_HPP ++ bind(CH1_LOOP); ++ ld(ch1, Address(str1)); ++ addi(str1, str1, 8); ++ addi(cnt1, cnt1, -8); ++ compute_match_mask(ch1, ch, match_mask, mask1, mask2); ++ bnez(match_mask, HIT); ++ bgtz(cnt1, CH1_LOOP); ++ j(NOMATCH); + -+// Inline functions for memory copy and fill. ++ bind(HIT); ++ ctzc_bit(trailing_char, match_mask, isL, ch1, result); ++ srli(trailing_char, trailing_char, 3); ++ addi(cnt1, cnt1, 8); ++ ble(cnt1, trailing_char, NOMATCH); ++ // match case ++ if (!isL) { ++ srli(cnt1, cnt1, 1); ++ srli(trailing_char, trailing_char, 1); ++ } + -+// Contains inline asm implementations -+#include OS_CPU_HEADER_INLINE(copy) ++ sub(result, orig_cnt, cnt1); ++ add(result, result, trailing_char); ++ j(DONE); + ++ bind(NOMATCH); ++ mv(result, -1); + -+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { -+ julong* to = (julong*) tohw; -+ julong v = ((julong) value << 32) | value; -+ while (count-- > 0) { -+ *to++ = v; -+ } ++ bind(DONE); ++ BLOCK_COMMENT("} string_indexof_char"); +} + -+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { -+ pd_fill_to_words(tohw, count, value); -+} ++typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); + -+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { -+ (void)memset(to, value, count); -+} ++// Search for needle in haystack and return index or -1 ++// x10: result ++// x11: haystack ++// x12: haystack_len ++// x13: needle ++// x14: needle_len ++void C2_MacroAssembler::string_indexof(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, int ae) ++{ ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); + -+static void pd_zero_to_words(HeapWord* tohw, size_t count) { -+ pd_fill_to_words(tohw, count, 0); -+} ++ Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; + -+static void pd_zero_to_bytes(void* to, size_t count) { -+ (void)memset(to, 0, count); -+} ++ Register ch1 = t0; ++ Register ch2 = t1; ++ Register nlen_tmp = tmp1; // needle len tmp ++ Register hlen_tmp = tmp2; // haystack len tmp ++ Register result_tmp = tmp4; + -+#endif // CPU_RISCV_COPY_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp -new file mode 100644 -index 000000000..31cee7103 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp -@@ -0,0 +1,32 @@ -+/* -+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ bool isLL = ae == StrIntrinsicNode::LL; + -+#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP -+#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; + -+// Nothing to do on riscv ++ BLOCK_COMMENT("string_indexof {"); + -+#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp -new file mode 100644 -index 000000000..e97b89327 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp -@@ -0,0 +1,37 @@ -+/* -+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ // Note, inline_string_indexOf() generates checks: ++ // if (pattern.count > src.count) return -1; ++ // if (pattern.count == 0) return 0; + -+#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP -+#define CPU_RISCV_DISASSEMBLER_RISCV_HPP ++ // We have two strings, a source string in haystack, haystack_len and a pattern string ++ // in needle, needle_len. Find the first occurence of pattern in source or return -1. + -+ static int pd_instruction_alignment() { -+ return 1; -+ } ++ // For larger pattern and source we use a simplified Boyer Moore algorithm. ++ // With a small pattern and source we use linear scan. + -+ static const char* pd_cpu_opts() { -+ return ""; -+ } ++ // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. ++ sub(result_tmp, haystack_len, needle_len); ++ // needle_len < 8, use linear scan ++ sub(t0, needle_len, 8); ++ bltz(t0, LINEARSEARCH); ++ // needle_len >= 256, use linear scan ++ sub(t0, needle_len, 256); ++ bgez(t0, LINEARSTUB); ++ // needle_len >= haystack_len/4, use linear scan ++ srli(t0, haystack_len, 2); ++ bge(needle_len, t0, LINEARSTUB); + -+#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp -new file mode 100644 -index 000000000..be6f1a67f ---- /dev/null -+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp -@@ -0,0 +1,683 @@ -+/* -+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ // Boyer-Moore-Horspool introduction: ++ // The Boyer Moore alogorithm is based on the description here:- ++ // ++ // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm ++ // ++ // This describes and algorithm with 2 shift rules. The 'Bad Character' rule ++ // and the 'Good Suffix' rule. ++ // ++ // These rules are essentially heuristics for how far we can shift the ++ // pattern along the search string. ++ // ++ // The implementation here uses the 'Bad Character' rule only because of the ++ // complexity of initialisation for the 'Good Suffix' rule. ++ // ++ // This is also known as the Boyer-Moore-Horspool algorithm: ++ // ++ // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm ++ // ++ // #define ASIZE 256 ++ // ++ // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { ++ // int i, j; ++ // unsigned c; ++ // unsigned char bc[ASIZE]; ++ // ++ // /* Preprocessing */ ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ // ++ // /* Searching */ ++ // j = 0; ++ // while (j <= n - m) { ++ // c = src[i+j]; ++ // if (pattern[m-1] == c) ++ // int k; ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // if (k < 0) return j; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 ++ // // LL case: (c< 256) always true. Remove branch ++ // j += bc[pattern[j+m-1]]; ++ // #endif ++ // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF ++ // // UU case: need if (c if not. ++ // if (c < ASIZE) ++ // j += bc[pattern[j+m-1]]; ++ // else ++ // j += m ++ // #endif ++ // } ++ // return -1; ++ // } + -+#include "precompiled.hpp" -+#include "interpreter/interpreter.hpp" -+#include "memory/resourceArea.hpp" -+#include "oops/markOop.hpp" -+#include "oops/method.hpp" -+#include "oops/oop.inline.hpp" -+#include "prims/methodHandles.hpp" -+#include "runtime/frame.inline.hpp" -+#include "runtime/handles.inline.hpp" -+#include "runtime/javaCalls.hpp" -+#include "runtime/monitorChunk.hpp" -+#include "runtime/os.hpp" -+#include "runtime/signature.hpp" -+#include "runtime/stubCodeGenerator.hpp" -+#include "runtime/stubRoutines.hpp" -+#include "vmreg_riscv.inline.hpp" -+#ifdef COMPILER1 -+#include "c1/c1_Runtime1.hpp" -+#include "runtime/vframeArray.hpp" -+#endif ++ // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result ++ Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, ++ BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; + -+#ifdef ASSERT -+void RegisterMap::check_location_valid() { -+} -+#endif ++ Register haystack_end = haystack_len; ++ Register skipch = tmp2; + ++ // pattern length is >=8, so, we can read at least 1 register for cases when ++ // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for ++ // UL case. We'll re-read last character in inner pre-loop code to have ++ // single outer pre-loop load ++ const int firstStep = isLL ? 7 : 3; + -+// Profiling/safepoint support ++ const int ASIZE = 256; ++ const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) + -+bool frame::safe_for_sender(JavaThread *thread) { -+ address addr_sp = (address)_sp; -+ address addr_fp = (address)_fp; -+ address unextended_sp = (address)_unextended_sp; ++ sub(sp, sp, ASIZE); + -+ // consider stack guards when trying to determine "safe" stack pointers -+ static size_t stack_guard_size = os::uses_stack_guard_pages() ? -+ (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0; -+ assert_cond(thread != NULL); -+ size_t usable_stack_size = thread->stack_size() - stack_guard_size; ++ // init BC offset table with default value: needle_len ++ slli(t0, needle_len, 8); ++ orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] ++ slli(tmp1, t0, 16); ++ orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] ++ slli(tmp1, t0, 32); ++ orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] + -+ // sp must be within the usable part of the stack (not in guards) -+ bool sp_safe = (addr_sp < thread->stack_base()) && -+ (addr_sp >= thread->stack_base() - usable_stack_size); ++ mv(ch1, sp); // ch1 is t0 ++ mv(tmp6, ASIZE / STORE_BYTES); // loop iterations + -+ if (!sp_safe) { -+ return false; ++ bind(BM_INIT_LOOP); ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ for (int i = 0; i < 4; i++) { ++ sd(tmp5, Address(ch1, i * wordSize)); + } ++ add(ch1, ch1, 32); ++ sub(tmp6, tmp6, 4); ++ bgtz(tmp6, BM_INIT_LOOP); + -+ // When we are running interpreted code the machine stack pointer, SP, is -+ // set low enough so that the Java expression stack can grow and shrink -+ // without ever exceeding the machine stack bounds. So, ESP >= SP. -+ -+ // When we call out of an interpreted method, SP is incremented so that -+ // the space between SP and ESP is removed. The SP saved in the callee's -+ // frame is the SP *before* this increment. So, when we walk a stack of -+ // interpreter frames the sender's SP saved in a frame might be less than -+ // the SP at the point of call. -+ -+ // So unextended sp must be within the stack but we need not to check -+ // that unextended sp >= sp -+ -+ bool unextended_sp_safe = (unextended_sp < thread->stack_base()); ++ sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern ++ Register orig_haystack = tmp5; ++ mv(orig_haystack, haystack); ++ // result_tmp = tmp4 ++ shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); ++ sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 ++ mv(tmp3, needle); + -+ if (!unextended_sp_safe) { -+ return false; ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ bind(BCLOOP); ++ (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); ++ add(tmp3, tmp3, needle_chr_size); ++ if (!needle_isL) { ++ // ae == StrIntrinsicNode::UU ++ mv(tmp6, ASIZE); ++ bgeu(ch1, tmp6, BCSKIP); + } ++ add(tmp4, sp, ch1); ++ sb(ch2, Address(tmp4)); // store skip offset to BC offset table + -+ // an fp must be within the stack and above (but not equal) sp -+ // second evaluation on fp+ is added to handle situation where fp is -1 -+ bool fp_safe = (addr_fp < thread->stack_base() && (addr_fp > addr_sp) && -+ (((addr_fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); ++ bind(BCSKIP); ++ sub(ch2, ch2, 1); // for next pattern element, skip distance -1 ++ bgtz(ch2, BCLOOP); + -+ // We know sp/unextended_sp are safe only fp is questionable here ++ // tmp6: pattern end, address after needle ++ shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); ++ if (needle_isL == haystack_isL) { ++ // load last 8 bytes (8LL/4UU symbols) ++ ld(tmp6, Address(tmp6, -wordSize)); ++ } else { ++ // UL: from UTF-16(source) search Latin1(pattern) ++ lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) ++ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d ++ // We'll have to wait until load completed, but it's still faster than per-character loads+checks ++ srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a ++ slli(ch2, tmp6, XLEN - 24); ++ srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b ++ slli(ch1, tmp6, XLEN - 16); ++ srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c ++ andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d ++ slli(ch2, ch2, 16); ++ orr(ch2, ch2, ch1); // 0x00000b0c ++ slli(result, tmp3, 48); // use result as temp register ++ orr(tmp6, tmp6, result); // 0x0a00000d ++ slli(result, ch2, 16); ++ orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d ++ } + -+ // If the current frame is known to the code cache then we can attempt to -+ // to construct the sender and do some validation of it. This goes a long way -+ // toward eliminating issues when we get in frame construction code ++ // i = m - 1; ++ // skipch = j + i; ++ // if (skipch == pattern[m - 1] ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // else ++ // move j with bad char offset table ++ bind(BMLOOPSTR2); ++ // compare pattern to source string backward ++ shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); ++ (this->*haystack_load_1chr)(skipch, Address(result), noreg); ++ sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 ++ if (needle_isL == haystack_isL) { ++ // re-init tmp3. It's for free because it's executed in parallel with ++ // load above. Alternative is to initialize it before loop, but it'll ++ // affect performance on in-order systems with 2 or more ld/st pipelines ++ srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] ++ } ++ if (!isLL) { // UU/UL case ++ slli(ch2, nlen_tmp, 1); // offsets in bytes ++ } ++ bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char ++ add(result, haystack, isLL ? nlen_tmp : ch2); ++ ld(ch2, Address(result)); // load 8 bytes from source string ++ mv(ch1, tmp6); ++ if (isLL) { ++ j(BMLOOPSTR1_AFTER_LOAD); ++ } else { ++ sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 ++ j(BMLOOPSTR1_CMP); ++ } + -+ if (_cb != NULL) { ++ bind(BMLOOPSTR1); ++ shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); ++ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); ++ shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); + -+ // First check if frame is complete and tester is reliable -+ // Unfortunately we can only check frame complete for runtime stubs and nmethod -+ // other generic buffer blobs are more problematic so we just assume they are -+ // ok. adapter blobs never have a frame complete and are never ok. ++ bind(BMLOOPSTR1_AFTER_LOAD); ++ sub(nlen_tmp, nlen_tmp, 1); ++ bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); + -+ if (!_cb->is_frame_complete_at(_pc)) { -+ if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { -+ return false; -+ } -+ } ++ bind(BMLOOPSTR1_CMP); ++ beq(ch1, ch2, BMLOOPSTR1); + -+ // Could just be some random pointer within the codeBlob -+ if (!_cb->code_contains(_pc)) { -+ return false; ++ bind(BMSKIP); ++ if (!isLL) { ++ // if we've met UTF symbol while searching Latin1 pattern, then we can ++ // skip needle_len symbols ++ if (needle_isL != haystack_isL) { ++ mv(result_tmp, needle_len); ++ } else { ++ mv(result_tmp, 1); + } ++ mv(t0, ASIZE); ++ bgeu(skipch, t0, BMADV); ++ } ++ add(result_tmp, sp, skipch); ++ lbu(result_tmp, Address(result_tmp)); // load skip offset + -+ // Entry frame checks -+ if (is_entry_frame()) { -+ // an entry frame must have a valid fp. -+ return fp_safe && is_entry_frame_valid(thread); -+ } ++ bind(BMADV); ++ sub(nlen_tmp, needle_len, 1); ++ // move haystack after bad char skip offset ++ shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); ++ ble(haystack, haystack_end, BMLOOPSTR2); ++ add(sp, sp, ASIZE); ++ j(NOMATCH); + -+ intptr_t* sender_sp = NULL; -+ intptr_t* sender_unextended_sp = NULL; -+ address sender_pc = NULL; -+ intptr_t* saved_fp = NULL; ++ bind(BMLOOPSTR1_LASTCMP); ++ bne(ch1, ch2, BMSKIP); + -+ if (is_interpreted_frame()) { -+ // fp must be safe -+ if (!fp_safe) { -+ return false; -+ } ++ bind(BMMATCH); ++ sub(result, haystack, orig_haystack); ++ if (!haystack_isL) { ++ srli(result, result, 1); ++ } ++ add(sp, sp, ASIZE); ++ j(DONE); + -+ sender_pc = (address)this->fp()[return_addr_offset]; -+ // for interpreted frames, the value below is the sender "raw" sp, -+ // which can be different from the sender unextended sp (the sp seen -+ // by the sender) because of current frame local variables -+ sender_sp = (intptr_t*) addr_at(sender_sp_offset); -+ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; -+ saved_fp = (intptr_t*) this->fp()[link_offset]; -+ } else { -+ // must be some sort of compiled/runtime frame -+ // fp does not have to be safe (although it could be check for c1?) ++ bind(LINEARSTUB); ++ sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm ++ bltz(t0, LINEARSEARCH); ++ mv(result, zr); ++ RuntimeAddress stub = NULL; ++ if (isLL) { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); ++ assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); ++ } else if (needle_isL) { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); ++ assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); ++ } else { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); ++ assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); ++ } ++ trampoline_call(stub); ++ j(DONE); + -+ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc -+ if (_cb->frame_size() <= 0) { -+ return false; -+ } ++ bind(NOMATCH); ++ mv(result, -1); ++ j(DONE); + -+ sender_sp = _unextended_sp + _cb->frame_size(); -+ // Is sender_sp safe? -+ if ((address)sender_sp >= thread->stack_base()) { -+ return false; -+ } -+ sender_unextended_sp = sender_sp; -+ sender_pc = (address) *(sender_sp + frame::return_addr_offset); -+ saved_fp = (intptr_t*) *(sender_sp + frame::link_offset); -+ } ++ bind(LINEARSEARCH); ++ string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); + ++ bind(DONE); ++ BLOCK_COMMENT("} string_indexof"); ++} + -+ // If the potential sender is the interpreter then we can do some more checking -+ if (Interpreter::contains(sender_pc)) { ++// string_indexof ++// result: x10 ++// src: x11 ++// src_count: x12 ++// pattern: x13 ++// pattern_count: x14 or 1/2/3/4 ++void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ int needle_con_cnt, Register result, int ae) ++{ ++ // Note: ++ // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant ++ // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 ++ assert(needle_con_cnt <= 4, "Invalid needle constant count"); ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); + -+ // fp is always saved in a recognizable place in any code we generate. However -+ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp -+ // is really a frame pointer. -+ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ Register ch1 = t0; ++ Register ch2 = t1; ++ Register hlen_neg = haystack_len, nlen_neg = needle_len; ++ Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; + -+ if (!saved_fp_safe) { -+ return false; -+ } ++ bool isLL = ae == StrIntrinsicNode::LL; + -+ // construct the potential sender -+ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; + -+ return sender.is_interpreted_frame_valid(thread); -+ } ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; ++ load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; + -+ // We must always be able to find a recognizable pc -+ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); -+ if (sender_pc == NULL || sender_blob == NULL) { -+ return false; -+ } ++ Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; + -+ // Could be a zombie method -+ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { -+ return false; -+ } ++ Register first = tmp3; + -+ // Could just be some random pointer within the codeBlob -+ if (!sender_blob->code_contains(sender_pc)) { -+ return false; -+ } ++ if (needle_con_cnt == -1) { ++ Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; + -+ // We should never be able to see an adapter if the current frame is something from code cache -+ if (sender_blob->is_adapter_blob()) { -+ return false; -+ } ++ sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); ++ bltz(t0, DOSHORT); + -+ // Could be the call_stub -+ if (StubRoutines::returns_to_call_stub(sender_pc)) { -+ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ (this->*needle_load_1chr)(first, Address(needle), noreg); ++ slli(t0, needle_len, needle_chr_shift); ++ add(needle, needle, t0); ++ neg(nlen_neg, t0); ++ slli(t0, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, t0); ++ neg(hlen_neg, t0); + -+ if (!saved_fp_safe) { -+ return false; -+ } ++ bind(FIRST_LOOP); ++ add(t0, haystack, hlen_neg); ++ (this->*haystack_load_1chr)(ch2, Address(t0), noreg); ++ beq(first, ch2, STR1_LOOP); + -+ // construct the potential sender -+ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ bind(STR2_NEXT); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, FIRST_LOOP); ++ j(NOMATCH); + -+ // Validate the JavaCallWrapper an entry frame must have -+ address jcw = (address)sender.entry_frame_call_wrapper(); ++ bind(STR1_LOOP); ++ add(nlen_tmp, nlen_neg, needle_chr_size); ++ add(hlen_tmp, hlen_neg, haystack_chr_size); ++ bgez(nlen_tmp, MATCH); + -+ bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp()); ++ bind(STR1_NEXT); ++ add(ch1, needle, nlen_tmp); ++ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); ++ add(ch2, haystack, hlen_tmp); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ bne(ch1, ch2, STR2_NEXT); ++ add(nlen_tmp, nlen_tmp, needle_chr_size); ++ add(hlen_tmp, hlen_tmp, haystack_chr_size); ++ bltz(nlen_tmp, STR1_NEXT); ++ j(MATCH); + -+ return jcw_safe; ++ bind(DOSHORT); ++ if (needle_isL == haystack_isL) { ++ sub(t0, needle_len, 2); ++ bltz(t0, DO1); ++ bgtz(t0, DO3); + } ++ } + -+ CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); -+ if (nm != NULL) { -+ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || -+ nm->method()->is_method_handle_intrinsic()) { -+ return false; -+ } -+ } ++ if (needle_con_cnt == 4) { ++ Label CH1_LOOP; ++ (this->*load_4chr)(ch1, Address(needle), noreg); ++ sub(result_tmp, haystack_len, 4); ++ slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); + -+ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size -+ // because the return address counts against the callee's frame. -+ if (sender_blob->frame_size() <= 0) { -+ assert(!sender_blob->is_compiled(), "should count return address at least"); -+ return false; -+ } ++ bind(CH1_LOOP); ++ add(ch2, haystack, hlen_neg); ++ (this->*load_4chr)(ch2, Address(ch2), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, CH1_LOOP); ++ j(NOMATCH); ++ } + -+ // We should never be able to see anything here except an nmethod. If something in the -+ // code cache (current frame) is called by an entity within the code cache that entity -+ // should not be anything but the call stub (already covered), the interpreter (already covered) -+ // or an nmethod. -+ if (!sender_blob->is_compiled()) { -+ return false; ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { ++ Label CH1_LOOP; ++ BLOCK_COMMENT("string_indexof DO2 {"); ++ bind(DO2); ++ (this->*load_2chr)(ch1, Address(needle), noreg); ++ if (needle_con_cnt == 2) { ++ sub(result_tmp, haystack_len, 2); + } ++ slli(tmp3, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); + -+ // Could put some more validation for the potential non-interpreted sender -+ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... -+ -+ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb -+ -+ // We've validated the potential sender that would be created -+ return true; ++ bind(CH1_LOOP); ++ add(tmp3, haystack, hlen_neg); ++ (this->*load_2chr)(ch2, Address(tmp3), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, CH1_LOOP); ++ j(NOMATCH); ++ BLOCK_COMMENT("} string_indexof DO2"); + } + -+ // Must be native-compiled frame. Since sender will try and use fp to find -+ // linkages it must be safe -+ if (!fp_safe) { -+ return false; -+ } ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { ++ Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; ++ BLOCK_COMMENT("string_indexof DO3 {"); + -+ // Will the pc we fetch be non-zero (which we'll find at the oldest frame) -+ if ((address)this->fp()[return_addr_offset] == NULL) { return false; } ++ bind(DO3); ++ (this->*load_2chr)(first, Address(needle), noreg); ++ (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); ++ if (needle_con_cnt == 3) { ++ sub(result_tmp, haystack_len, 3); ++ } ++ slli(hlen_tmp, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, hlen_tmp); ++ neg(hlen_neg, hlen_tmp); + -+ return true; -+} ++ bind(FIRST_LOOP); ++ add(ch2, haystack, hlen_neg); ++ (this->*load_2chr)(ch2, Address(ch2), noreg); ++ beq(first, ch2, STR1_LOOP); + -+void frame::patch_pc(Thread* thread, address pc) { -+ address* pc_addr = &(((address*) sp())[-1]); -+ if (TracePcPatching) { -+ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", -+ p2i(pc_addr), p2i(*pc_addr), p2i(pc)); -+ } -+ // Either the return address is the original one or we are going to -+ // patch in the same address that's already there. -+ assert(_pc == *pc_addr || pc == *pc_addr, "must be"); -+ *pc_addr = pc; -+ _cb = CodeCache::find_blob(pc); -+ address original_pc = CompiledMethod::get_deopt_original_pc(this); -+ if (original_pc != NULL) { -+ assert(original_pc == _pc, "expected original PC to be stored before patching"); -+ _deopt_state = is_deoptimized; -+ // leave _pc as is -+ } else { -+ _deopt_state = not_deoptimized; -+ _pc = pc; ++ bind(STR2_NEXT); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, FIRST_LOOP); ++ j(NOMATCH); ++ ++ bind(STR1_LOOP); ++ add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); ++ add(ch2, haystack, hlen_tmp); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ bne(ch1, ch2, STR2_NEXT); ++ j(MATCH); ++ BLOCK_COMMENT("} string_indexof DO3"); + } -+} + -+bool frame::is_interpreted_frame() const { -+ return Interpreter::contains(pc()); -+} ++ if (needle_con_cnt == -1 || needle_con_cnt == 1) { ++ Label DO1_LOOP; + -+int frame::frame_size(RegisterMap* map) const { -+ frame sender = this->sender(map); -+ return sender.sp() - sp(); -+} ++ BLOCK_COMMENT("string_indexof DO1 {"); ++ bind(DO1); ++ (this->*needle_load_1chr)(ch1, Address(needle), noreg); ++ sub(result_tmp, haystack_len, 1); ++ mv(tmp3, result_tmp); ++ if (haystack_chr_shift) { ++ slli(tmp3, result_tmp, haystack_chr_shift); ++ } ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); + -+intptr_t* frame::entry_frame_argument_at(int offset) const { -+ // convert offset to index to deal with tsi -+ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); -+ // Entry frame's arguments are always in relation to unextended_sp() -+ return &unextended_sp()[index]; -+} ++ bind(DO1_LOOP); ++ add(tmp3, haystack, hlen_neg); ++ (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, DO1_LOOP); ++ BLOCK_COMMENT("} string_indexof DO1"); ++ } + -+// sender_sp -+intptr_t* frame::interpreter_frame_sender_sp() const { -+ assert(is_interpreted_frame(), "interpreted frame expected"); -+ return (intptr_t*) at(interpreter_frame_sender_sp_offset); -+} ++ bind(NOMATCH); ++ mv(result, -1); ++ j(DONE); + -+void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { -+ assert(is_interpreted_frame(), "interpreted frame expected"); -+ ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); ++ bind(MATCH); ++ srai(t0, hlen_neg, haystack_chr_shift); ++ add(result, result_tmp, t0); ++ ++ bind(DONE); +} + ++// Compare strings. ++void C2_MacroAssembler::string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, ++ Register tmp3, int ae) ++{ ++ Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, ++ DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, ++ SHORT_LOOP_START, TAIL_CHECK, L; + -+// monitor elements ++ const int STUB_THRESHOLD = 64 + 8; ++ bool isLL = ae == StrIntrinsicNode::LL; ++ bool isLU = ae == StrIntrinsicNode::LU; ++ bool isUL = ae == StrIntrinsicNode::UL; + -+BasicObjectLock* frame::interpreter_frame_monitor_begin() const { -+ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); -+} ++ bool str1_isL = isLL || isLU; ++ bool str2_isL = isLL || isUL; + -+BasicObjectLock* frame::interpreter_frame_monitor_end() const { -+ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); -+ // make sure the pointer points inside the frame -+ assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer"); -+ assert((intptr_t*) result < fp(), "monitor end should be strictly below the frame pointer"); -+ return result; -+} ++ // for L strings, 1 byte for 1 character ++ // for U strings, 2 bytes for 1 character ++ int str1_chr_size = str1_isL ? 1 : 2; ++ int str2_chr_size = str2_isL ? 1 : 2; ++ int minCharsInWord = isLL ? wordSize : wordSize / 2; + -+void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { -+ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; -+} ++ load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; + -+// Used by template based interpreter deoptimization -+void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) { -+ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp; -+} ++ BLOCK_COMMENT("string_compare {"); + -+frame frame::sender_for_entry_frame(RegisterMap* map) const { -+ assert(map != NULL, "map must be set"); -+ // Java frame called from C; skip all C frames and return top C -+ // frame of that chunk as the sender -+ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); -+ assert(!entry_frame_is_first(), "next Java fp must be non zero"); -+ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); -+ // Since we are walking the stack now this nested anchor is obviously walkable -+ // even if it wasn't when it was stacked. -+ if (!jfa->walkable()) { -+ // Capture _last_Java_pc (if needed) and mark anchor walkable. -+ jfa->capture_last_Java_pc(); ++ // Bizzarely, the counts are passed in bytes, regardless of whether they ++ // are L or U strings, however the result is always in characters. ++ if (!str1_isL) { ++ sraiw(cnt1, cnt1, 1); ++ } ++ if (!str2_isL) { ++ sraiw(cnt2, cnt2, 1); + } -+ map->clear(); -+ assert(map->include_argument_oops(), "should be set by clear"); -+ vmassert(jfa->last_Java_pc() != NULL, "not walkable"); -+ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); -+ return fr; -+} -+ -+//------------------------------------------------------------------------------ -+// frame::verify_deopt_original_pc -+// -+// Verifies the calculated original PC of a deoptimization PC for the -+// given unextended SP. -+#ifdef ASSERT -+void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { -+ frame fr; -+ -+ // This is ugly but it's better than to change {get,set}_original_pc -+ // to take an SP value as argument. And it's only a debugging -+ // method anyway. -+ fr._unextended_sp = unextended_sp; + -+ assert_cond(nm != NULL); -+ address original_pc = nm->get_original_pc(&fr); -+ assert(nm->insts_contains_inclusive(original_pc), -+ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); -+} -+#endif ++ // Compute the minimum of the string lengths and save the difference in result. ++ sub(result, cnt1, cnt2); ++ bgt(cnt1, cnt2, L); ++ mv(cnt2, cnt1); ++ bind(L); + -+//------------------------------------------------------------------------------ -+// frame::adjust_unextended_sp -+void frame::adjust_unextended_sp() { -+ // On riscv, sites calling method handle intrinsics and lambda forms are treated -+ // as any other call site. Therefore, no special action is needed when we are -+ // returning to any of these call sites. ++ // A very short string ++ li(t0, minCharsInWord); ++ ble(cnt2, t0, SHORT_STRING); + -+ if (_cb != NULL) { -+ CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); -+ if (sender_cm != NULL) { -+ // If the sender PC is a deoptimization point, get the original PC. -+ if (sender_cm->is_deopt_entry(_pc) || -+ sender_cm->is_deopt_mh_entry(_pc)) { -+ DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); ++ // Compare longwords ++ // load first parts of strings and finish initialization while loading ++ { ++ if (str1_isL == str2_isL) { // LL or UU ++ // load 8 bytes once to compare ++ ld(tmp1, Address(str1)); ++ beq(str1, str2, DONE); ++ ld(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ sub(cnt2, cnt2, minCharsInWord); ++ beqz(cnt2, TAIL_CHECK); ++ // convert cnt2 from characters to bytes ++ if (!str1_isL) { ++ slli(cnt2, cnt2, 1); + } ++ add(str2, str2, cnt2); ++ add(str1, str1, cnt2); ++ sub(cnt2, zr, cnt2); ++ } else if (isLU) { // LU case ++ lwu(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ addi(cnt2, cnt2, -4); ++ add(str1, str1, cnt2); ++ sub(cnt1, zr, cnt2); ++ slli(cnt2, cnt2, 1); ++ add(str2, str2, cnt2); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ sub(cnt2, zr, cnt2); ++ addi(cnt1, cnt1, 4); ++ } else { // UL case ++ ld(tmp1, Address(str1)); ++ lwu(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ addi(cnt2, cnt2, -4); ++ slli(t0, cnt2, 1); ++ sub(cnt1, zr, t0); ++ add(str1, str1, t0); ++ add(str2, str2, cnt2); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ sub(cnt2, zr, cnt2); ++ addi(cnt1, cnt1, 8); + } -+ } -+} -+ -+//------------------------------------------------------------------------------ -+// frame::update_map_with_saved_link -+void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { -+ // The interpreter and compiler(s) always save fp in a known -+ // location on entry. We must record where that location is -+ // so that if fp was live on callout from c2 we can find -+ // the saved copy no matter what it called. -+ -+ // Since the interpreter always saves fp if we record where it is then -+ // we don't have to always save fp on entry and exit to c2 compiled -+ // code, on entry will be enough. -+ assert(map != NULL, "map must be set"); -+ map->set_location(::fp->as_VMReg(), (address) link_addr); -+ // this is weird "H" ought to be at a higher address however the -+ // oopMaps seems to have the "H" regs at the same address and the -+ // vanilla register. -+ map->set_location(::fp->as_VMReg()->next(), (address) link_addr); -+} ++ addi(cnt2, cnt2, isUL ? 4 : 8); ++ bgez(cnt2, TAIL); ++ xorr(tmp3, tmp1, tmp2); ++ bnez(tmp3, DIFFERENCE); + ++ // main loop ++ bind(NEXT_WORD); ++ if (str1_isL == str2_isL) { // LL or UU ++ add(t0, str1, cnt2); ++ ld(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ addi(cnt2, cnt2, 8); ++ } else if (isLU) { // LU case ++ add(t0, str1, cnt1); ++ lwu(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ addi(cnt1, cnt1, 4); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ addi(cnt2, cnt2, 8); ++ } else { // UL case ++ add(t0, str2, cnt2); ++ lwu(tmp2, Address(t0)); ++ add(t0, str1, cnt1); ++ ld(tmp1, Address(t0)); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ addi(cnt1, cnt1, 8); ++ addi(cnt2, cnt2, 4); ++ } ++ bgez(cnt2, TAIL); + -+//------------------------------------------------------------------------------ -+// frame::sender_for_interpreter_frame -+frame frame::sender_for_interpreter_frame(RegisterMap* map) const { -+ // SP is the raw SP from the sender after adapter or interpreter -+ // extension. -+ intptr_t* sender_sp = this->sender_sp(); ++ xorr(tmp3, tmp1, tmp2); ++ beqz(tmp3, NEXT_WORD); ++ j(DIFFERENCE); ++ bind(TAIL); ++ xorr(tmp3, tmp1, tmp2); ++ bnez(tmp3, DIFFERENCE); ++ // Last longword. In the case where length == 4 we compare the ++ // same longword twice, but that's still faster than another ++ // conditional branch. ++ if (str1_isL == str2_isL) { // LL or UU ++ ld(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ } else if (isLU) { // LU case ++ lwu(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ } else { // UL case ++ lwu(tmp2, Address(str2)); ++ ld(tmp1, Address(str1)); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ } ++ bind(TAIL_CHECK); ++ xorr(tmp3, tmp1, tmp2); ++ beqz(tmp3, DONE); + -+ // This is the sp before any possible extension (adapter/locals). -+ intptr_t* unextended_sp = interpreter_frame_sender_sp(); ++ // Find the first different characters in the longwords and ++ // compute their difference. ++ bind(DIFFERENCE); ++ ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb ++ srl(tmp1, tmp1, result); ++ srl(tmp2, tmp2, result); ++ if (isLL) { ++ andi(tmp1, tmp1, 0xFF); ++ andi(tmp2, tmp2, 0xFF); ++ } else { ++ andi(tmp1, tmp1, 0xFFFF); ++ andi(tmp2, tmp2, 0xFFFF); ++ } ++ sub(result, tmp1, tmp2); ++ j(DONE); ++ } + -+#ifdef COMPILER2 -+ assert(map != NULL, "map must be set"); -+ if (map->update_map()) { -+ update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); ++ bind(STUB); ++ RuntimeAddress stub = NULL; ++ switch (ae) { ++ case StrIntrinsicNode::LL: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); ++ break; ++ case StrIntrinsicNode::UU: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); ++ break; ++ case StrIntrinsicNode::LU: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); ++ break; ++ case StrIntrinsicNode::UL: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); ++ break; ++ default: ++ ShouldNotReachHere(); + } -+#endif // COMPILER2 ++ assert(stub.target() != NULL, "compare_long_string stub has not been generated"); ++ trampoline_call(stub); ++ j(DONE); + -+ return frame(sender_sp, unextended_sp, link(), sender_pc()); -+} ++ bind(SHORT_STRING); ++ // Is the minimum length zero? ++ beqz(cnt2, DONE); ++ // arrange code to do most branches while loading and loading next characters ++ // while comparing previous ++ (this->*str1_load_chr)(tmp1, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST_INIT); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ j(SHORT_LOOP_START); ++ bind(SHORT_LOOP); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST); ++ bind(SHORT_LOOP_START); ++ (this->*str1_load_chr)(tmp2, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ (this->*str2_load_chr)(t0, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ bne(tmp1, cnt1, SHORT_LOOP_TAIL); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST2); ++ (this->*str1_load_chr)(tmp1, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ beq(tmp2, t0, SHORT_LOOP); ++ sub(result, tmp2, t0); ++ j(DONE); ++ bind(SHORT_LOOP_TAIL); ++ sub(result, tmp1, cnt1); ++ j(DONE); ++ bind(SHORT_LAST2); ++ beq(tmp2, t0, DONE); ++ sub(result, tmp2, t0); + ++ j(DONE); ++ bind(SHORT_LAST_INIT); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ bind(SHORT_LAST); ++ beq(tmp1, cnt1, DONE); ++ sub(result, tmp1, cnt1); + -+//------------------------------------------------------------------------------ -+// frame::sender_for_compiled_frame -+frame frame::sender_for_compiled_frame(RegisterMap* map) const { -+ // we cannot rely upon the last fp having been saved to the thread -+ // in C2 code but it will have been pushed onto the stack. so we -+ // have to find it relative to the unextended sp ++ bind(DONE); + -+ assert(_cb->frame_size() >= 0, "must have non-zero frame size"); -+ intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size(); -+ intptr_t* unextended_sp = l_sender_sp; ++ BLOCK_COMMENT("} string_compare"); ++} + -+ // the return_address is always the word on the stack -+ address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset); ++void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, ++ Register tmp4, Register tmp5, Register tmp6, Register result, ++ Register cnt1, int elem_size) { ++ Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ Register cnt2 = tmp2; // cnt2 only used in array length compare ++ Register elem_per_word = tmp6; ++ int log_elem_size = exact_log2(elem_size); ++ int length_offset = arrayOopDesc::length_offset_in_bytes(); ++ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); + -+ intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset); ++ assert(elem_size == 1 || elem_size == 2, "must be char or byte"); ++ assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); ++ li(elem_per_word, wordSize / elem_size); + -+ assert(map != NULL, "map must be set"); -+ if (map->update_map()) { -+ // Tell GC to use argument oopmaps for some runtime stubs that need it. -+ // For C1, the runtime stub might not have oop maps, so set this flag -+ // outside of update_register_map. -+ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); -+ if (_cb->oop_maps() != NULL) { -+ OopMapSet::update_register_map(this, map); -+ } ++ BLOCK_COMMENT("arrays_equals {"); + -+ // Since the prolog does the save and restore of FP there is no -+ // oopmap for it so we must fill in its location as if there was -+ // an oopmap entry since if our caller was compiled code there -+ // could be live jvm state in it. -+ update_map_with_saved_link(map, saved_fp_addr); -+ } ++ // if (a1 == a2), return true ++ beq(a1, a2, SAME); + -+ return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc); -+} ++ mv(result, false); ++ beqz(a1, DONE); ++ beqz(a2, DONE); ++ lwu(cnt1, Address(a1, length_offset)); ++ lwu(cnt2, Address(a2, length_offset)); ++ bne(cnt2, cnt1, DONE); ++ beqz(cnt1, SAME); + -+//------------------------------------------------------------------------------ -+// frame::sender -+frame frame::sender(RegisterMap* map) const { -+ // Default is we done have to follow them. The sender_for_xxx will -+ // update it accordingly -+ assert(map != NULL, "map must be set"); -+ map->set_include_argument_oops(false); ++ slli(tmp5, cnt1, 3 + log_elem_size); ++ sub(tmp5, zr, tmp5); ++ add(a1, a1, base_offset); ++ add(a2, a2, base_offset); ++ ld(tmp3, Address(a1, 0)); ++ ld(tmp4, Address(a2, 0)); ++ ble(cnt1, elem_per_word, SHORT); // short or same + -+ if (is_entry_frame()) { -+ return sender_for_entry_frame(map); -+ } -+ if (is_interpreted_frame()) { -+ return sender_for_interpreter_frame(map); -+ } -+ assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); ++ // Main 16 byte comparison loop with 2 exits ++ bind(NEXT_DWORD); { ++ ld(tmp1, Address(a1, wordSize)); ++ ld(tmp2, Address(a2, wordSize)); ++ sub(cnt1, cnt1, 2 * wordSize / elem_size); ++ blez(cnt1, TAIL); ++ bne(tmp3, tmp4, DONE); ++ ld(tmp3, Address(a1, 2 * wordSize)); ++ ld(tmp4, Address(a2, 2 * wordSize)); ++ add(a1, a1, 2 * wordSize); ++ add(a2, a2, 2 * wordSize); ++ ble(cnt1, elem_per_word, TAIL2); ++ } beq(tmp1, tmp2, NEXT_DWORD); ++ j(DONE); + -+ // This test looks odd: why is it not is_compiled_frame() ? That's -+ // because stubs also have OOP maps. -+ if (_cb != NULL) { -+ return sender_for_compiled_frame(map); -+ } ++ bind(TAIL); ++ xorr(tmp4, tmp3, tmp4); ++ xorr(tmp2, tmp1, tmp2); ++ sll(tmp2, tmp2, tmp5); ++ orr(tmp5, tmp4, tmp2); ++ j(IS_TMP5_ZR); + -+ // Must be native-compiled frame, i.e. the marshaling code for native -+ // methods that exists in the core system. -+ return frame(sender_sp(), link(), sender_pc()); ++ bind(TAIL2); ++ bne(tmp1, tmp2, DONE); ++ ++ bind(SHORT); ++ xorr(tmp4, tmp3, tmp4); ++ sll(tmp5, tmp4, tmp5); ++ ++ bind(IS_TMP5_ZR); ++ bnez(tmp5, DONE); ++ ++ bind(SAME); ++ mv(result, true); ++ // That's it. ++ bind(DONE); ++ ++ BLOCK_COMMENT("} array_equals"); +} + -+bool frame::is_interpreted_frame_valid(JavaThread* thread) const { -+ assert(is_interpreted_frame(), "Not an interpreted frame"); -+ // These are reasonable sanity checks -+ if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) { -+ return false; -+ } -+ if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) { -+ return false; -+ } -+ if (fp() + interpreter_frame_initial_sp_offset < sp()) { -+ return false; -+ } -+ // These are hacks to keep us out of trouble. -+ // The problem with these is that they mask other problems -+ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above -+ return false; -+ } ++// Compare Strings + -+ // do some validation of frame elements ++// For Strings we're passed the address of the first characters in a1 ++// and a2 and the length in cnt1. ++// elem_size is the element size in bytes: either 1 or 2. ++// There are two implementations. For arrays >= 8 bytes, all ++// comparisons (including the final one, which may overlap) are ++// performed 8 bytes at a time. For strings < 8 bytes, we compare a ++// halfword, then a short, and then a byte. + -+ // first the method ++void C2_MacroAssembler::string_equals(Register a1, Register a2, ++ Register result, Register cnt1, int elem_size) ++{ ++ Label SAME, DONE, SHORT, NEXT_WORD; ++ Register tmp1 = t0; ++ Register tmp2 = t1; + -+ Method* m = *interpreter_frame_method_addr(); ++ assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); ++ assert_different_registers(a1, a2, result, cnt1, t0, t1); + -+ // validate the method we'd find in this potential sender -+ if (!Method::is_valid_method(m)) { -+ return false; -+ } -+ // stack frames shouldn't be much larger than max_stack elements -+ // this test requires the use of unextended_sp which is the sp as seen by -+ // the current frame, and not sp which is the "raw" pc which could point -+ // further because of local variables of the callee method inserted after -+ // method arguments -+ if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) { -+ return false; -+ } ++ BLOCK_COMMENT("string_equals {"); + -+ // validate bci/bcx -+ address bcp = interpreter_frame_bcp(); -+ if (m->validate_bci_from_bcp(bcp) < 0) { -+ return false; -+ } ++ mv(result, false); + -+ // validate constantPoolCache* -+ ConstantPoolCache* cp = *interpreter_frame_cache_addr(); -+ if (MetaspaceObj::is_valid(cp) == false) { -+ return false; -+ } -+ // validate locals -+ address locals = (address) *interpreter_frame_locals_addr(); ++ // Check for short strings, i.e. smaller than wordSize. ++ sub(cnt1, cnt1, wordSize); ++ bltz(cnt1, SHORT); + -+ if (locals > thread->stack_base() || locals < (address) fp()) { -+ return false; -+ } -+ // We'd have to be pretty unlucky to be mislead at this point -+ return true; -+} ++ // Main 8 byte comparison loop. ++ bind(NEXT_WORD); { ++ ld(tmp1, Address(a1, 0)); ++ add(a1, a1, wordSize); ++ ld(tmp2, Address(a2, 0)); ++ add(a2, a2, wordSize); ++ sub(cnt1, cnt1, wordSize); ++ bne(tmp1, tmp2, DONE); ++ } bgtz(cnt1, NEXT_WORD); + -+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { -+ assert(is_interpreted_frame(), "interpreted frame expected"); -+ Method* method = interpreter_frame_method(); -+ BasicType type = method->result_type(); ++ // Last longword. In the case where length == 4 we compare the ++ // same longword twice, but that's still faster than another ++ // conditional branch. ++ // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when ++ // length == 4. ++ add(tmp1, a1, cnt1); ++ ld(tmp1, Address(tmp1, 0)); ++ add(tmp2, a2, cnt1); ++ ld(tmp2, Address(tmp2, 0)); ++ bne(tmp1, tmp2, DONE); ++ j(SAME); + -+ intptr_t* tos_addr = NULL; -+ if (method->is_native()) { -+ tos_addr = (intptr_t*)sp(); -+ if (type == T_FLOAT || type == T_DOUBLE) { -+ // This is because we do a push(ltos) after push(dtos) in generate_native_entry. -+ tos_addr += 2 * Interpreter::stackElementWords; -+ } -+ } else { -+ tos_addr = (intptr_t*)interpreter_frame_tos_address(); ++ bind(SHORT); ++ Label TAIL03, TAIL01; ++ ++ // 0-7 bytes left. ++ andi(t0, cnt1, 4); ++ beqz(t0, TAIL03); ++ { ++ lwu(tmp1, Address(a1, 0)); ++ add(a1, a1, 4); ++ lwu(tmp2, Address(a2, 0)); ++ add(a2, a2, 4); ++ bne(tmp1, tmp2, DONE); + } + -+ switch (type) { -+ case T_OBJECT : -+ case T_ARRAY : { -+ oop obj; -+ if (method->is_native()) { -+ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); -+ } else { -+ oop* obj_p = (oop*)tos_addr; -+ obj = (obj_p == NULL) ? (oop)NULL : *obj_p; -+ } -+ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); -+ *oop_result = obj; -+ break; -+ } -+ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; -+ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; -+ case T_CHAR : value_result->c = *(jchar*)tos_addr; break; -+ case T_SHORT : value_result->s = *(jshort*)tos_addr; break; -+ case T_INT : value_result->i = *(jint*)tos_addr; break; -+ case T_LONG : value_result->j = *(jlong*)tos_addr; break; -+ case T_FLOAT : { -+ value_result->f = *(jfloat*)tos_addr; -+ break; ++ bind(TAIL03); ++ // 0-3 bytes left. ++ andi(t0, cnt1, 2); ++ beqz(t0, TAIL01); ++ { ++ lhu(tmp1, Address(a1, 0)); ++ add(a1, a1, 2); ++ lhu(tmp2, Address(a2, 0)); ++ add(a2, a2, 2); ++ bne(tmp1, tmp2, DONE); ++ } ++ ++ bind(TAIL01); ++ if (elem_size == 1) { // Only needed when comparing 1-byte elements ++ // 0-1 bytes left. ++ andi(t0, cnt1, 1); ++ beqz(t0, SAME); ++ { ++ lbu(tmp1, a1, 0); ++ lbu(tmp2, a2, 0); ++ bne(tmp1, tmp2, DONE); + } -+ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; -+ case T_VOID : /* Nothing to do */ break; -+ default : ShouldNotReachHere(); + } + -+ return type; ++ // Arrays are equal. ++ bind(SAME); ++ mv(result, true); ++ ++ // That's it. ++ bind(DONE); ++ BLOCK_COMMENT("} string_equals"); +} + ++typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); ++typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, ++ bool is_far, bool is_unordered); ++ ++static conditional_branch_insn conditional_branches[] = ++{ ++ /* SHORT branches */ ++ (conditional_branch_insn)&Assembler::beq, ++ (conditional_branch_insn)&Assembler::bgt, ++ NULL, // BoolTest::overflow ++ (conditional_branch_insn)&Assembler::blt, ++ (conditional_branch_insn)&Assembler::bne, ++ (conditional_branch_insn)&Assembler::ble, ++ NULL, // BoolTest::no_overflow ++ (conditional_branch_insn)&Assembler::bge, + -+intptr_t* frame::interpreter_frame_tos_at(jint offset) const { -+ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); -+ return &interpreter_frame_tos_address()[index]; -+} ++ /* UNSIGNED branches */ ++ (conditional_branch_insn)&Assembler::beq, ++ (conditional_branch_insn)&Assembler::bgtu, ++ NULL, ++ (conditional_branch_insn)&Assembler::bltu, ++ (conditional_branch_insn)&Assembler::bne, ++ (conditional_branch_insn)&Assembler::bleu, ++ NULL, ++ (conditional_branch_insn)&Assembler::bgeu ++}; + -+#ifndef PRODUCT ++static float_conditional_branch_insn float_conditional_branches[] = ++{ ++ /* FLOAT SHORT branches */ ++ (float_conditional_branch_insn)&MacroAssembler::float_beq, ++ (float_conditional_branch_insn)&MacroAssembler::float_bgt, ++ NULL, // BoolTest::overflow ++ (float_conditional_branch_insn)&MacroAssembler::float_blt, ++ (float_conditional_branch_insn)&MacroAssembler::float_bne, ++ (float_conditional_branch_insn)&MacroAssembler::float_ble, ++ NULL, // BoolTest::no_overflow ++ (float_conditional_branch_insn)&MacroAssembler::float_bge, + -+#define DESCRIBE_FP_OFFSET(name) \ -+ values.describe(frame_no, fp() + frame::name##_offset, #name) ++ /* DOUBLE SHORT branches */ ++ (float_conditional_branch_insn)&MacroAssembler::double_beq, ++ (float_conditional_branch_insn)&MacroAssembler::double_bgt, ++ NULL, ++ (float_conditional_branch_insn)&MacroAssembler::double_blt, ++ (float_conditional_branch_insn)&MacroAssembler::double_bne, ++ (float_conditional_branch_insn)&MacroAssembler::double_ble, ++ NULL, ++ (float_conditional_branch_insn)&MacroAssembler::double_bge ++}; + -+void frame::describe_pd(FrameValues& values, int frame_no) { -+ if (is_interpreted_frame()) { -+ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); -+ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); -+ DESCRIBE_FP_OFFSET(interpreter_frame_method); -+ DESCRIBE_FP_OFFSET(interpreter_frame_mdp); -+ DESCRIBE_FP_OFFSET(interpreter_frame_mirror); -+ DESCRIBE_FP_OFFSET(interpreter_frame_cache); -+ DESCRIBE_FP_OFFSET(interpreter_frame_locals); -+ DESCRIBE_FP_OFFSET(interpreter_frame_bcp); -+ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); -+ } ++void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { ++ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), ++ "invalid conditional branch index"); ++ (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); +} -+#endif + -+intptr_t *frame::initial_deoptimization_info() { -+ // Not used on riscv, but we must return something. -+ return NULL; ++// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use ++// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). ++void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { ++ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), ++ "invalid float conditional branch index"); ++ int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask); ++ (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, ++ (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); +} + -+intptr_t* frame::real_fp() const { -+ if (_cb != NULL) { -+ // use the frame size if valid -+ int size = _cb->frame_size(); -+ if (size > 0) { -+ return unextended_sp() + size; -+ } ++void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { ++ switch (cmpFlag) { ++ case BoolTest::eq: ++ case BoolTest::le: ++ beqz(op1, L, is_far); ++ break; ++ case BoolTest::ne: ++ case BoolTest::gt: ++ bnez(op1, L, is_far); ++ break; ++ default: ++ ShouldNotReachHere(); + } -+ // else rely on fp() -+ assert(!is_compiled_frame(), "unknown compiled frame size"); -+ return fp(); +} + -+#undef DESCRIBE_FP_OFFSET ++void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { ++ switch (cmpFlag) { ++ case BoolTest::eq: ++ beqz(op1, L, is_far); ++ break; ++ case BoolTest::ne: ++ bnez(op1, L, is_far); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} + -+#ifndef PRODUCT -+// This is a generic constructor which is only used by pns() in debug.cpp. -+frame::frame(void* ptr_sp, void* ptr_fp, void* pc) { -+ init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc); ++void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { ++ Label L; ++ cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); ++ mv(dst, src); ++ bind(L); +} + -+void frame::pd_ps() {} -+#endif ++// Set dst to NaN if any NaN input. ++void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, ++ bool is_double, bool is_min) { ++ assert_different_registers(dst, src1, src2); + -+void JavaFrameAnchor::make_walkable(JavaThread* thread) { -+ // last frame set? -+ if (last_Java_sp() == NULL) { return; } -+ // already walkable? -+ if (walkable()) { return; } -+ vmassert(Thread::current() == (Thread*)thread, "not current thread"); -+ vmassert(last_Java_sp() != NULL, "not called from Java code?"); -+ vmassert(last_Java_pc() == NULL, "already walkable"); -+ capture_last_Java_pc(); -+ vmassert(walkable(), "something went wrong"); -+} ++ Label Done; ++ fsflags(zr); ++ if (is_double) { ++ is_min ? fmin_d(dst, src1, src2) ++ : fmax_d(dst, src1, src2); ++ // Checking NaNs ++ flt_d(zr, src1, src2); ++ } else { ++ is_min ? fmin_s(dst, src1, src2) ++ : fmax_s(dst, src1, src2); ++ // Checking NaNs ++ flt_s(zr, src1, src2); ++ } + -+void JavaFrameAnchor::capture_last_Java_pc() { -+ vmassert(_last_Java_sp != NULL, "no last frame set"); -+ vmassert(_last_Java_pc == NULL, "already walkable"); -+ _last_Java_pc = (address)_last_Java_sp[-1]; ++ frflags(t0); ++ beqz(t0, Done); ++ ++ // In case of NaNs ++ is_double ? fadd_d(dst, src1, src2) ++ : fadd_s(dst, src1, src2); ++ ++ bind(Done); +} -diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp -new file mode 100644 -index 000000000..7acabcbba ---- /dev/null -+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp -@@ -0,0 +1,200 @@ -+/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ + -+#ifndef CPU_RISCV_FRAME_RISCV_HPP -+#define CPU_RISCV_FRAME_RISCV_HPP ++void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, ++ VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) { ++ Label loop; ++ Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16; + -+#include "runtime/synchronizer.hpp" ++ bind(loop); ++ vsetvli(tmp1, cnt, sew, Assembler::m2); ++ vlex_v(vr1, a1, sew); ++ vlex_v(vr2, a2, sew); ++ vmsne_vv(vrs, vr1, vr2); ++ vfirst_m(tmp2, vrs); ++ bgez(tmp2, DONE); ++ sub(cnt, cnt, tmp1); ++ if (!islatin) { ++ slli(tmp1, tmp1, 1); // get byte counts ++ } ++ add(a1, a1, tmp1); ++ add(a2, a2, tmp1); ++ bnez(cnt, loop); + -+// A frame represents a physical stack frame (an activation). Frames can be -+// C or Java frames, and the Java frames can be interpreted or compiled. -+// In contrast, vframes represent source-level activations, so that one physical frame -+// can correspond to multiple source level frames because of inlining. -+// A frame is comprised of {pc, fp, sp} -+// ------------------------------ Asm interpreter ---------------------------------------- -+// Layout of asm interpreter frame: -+// [expression stack ] * <- sp ++ mv(result, true); ++} + -+// [monitors[0] ] \ -+// ... | monitor block size = k -+// [monitors[k-1] ] / -+// [frame initial esp ] ( == &monitors[0], initially here) initial_sp_offset -+// [byte code index/pointr] = bcx() bcx_offset ++void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) { ++ Label DONE; ++ Register tmp1 = t0; ++ Register tmp2 = t1; + -+// [pointer to locals ] = locals() locals_offset -+// [constant pool cache ] = cache() cache_offset ++ BLOCK_COMMENT("string_equals_v {"); + -+// [klass of method ] = mirror() mirror_offset -+// [padding ] ++ mv(result, false); + -+// [methodData ] = mdp() mdx_offset -+// [methodOop ] = method() method_offset ++ if (elem_size == 2) { ++ srli(cnt, cnt, 1); ++ } + -+// [last esp ] = last_sp() last_sp_offset -+// [old stack pointer ] (sender_sp) sender_sp_offset ++ element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); + -+// [old frame pointer ] -+// [return pc ] ++ bind(DONE); ++ BLOCK_COMMENT("} string_equals_v"); ++} + -+// [last sp ] <- fp = link() -+// [oop temp ] (only for native calls) ++// used by C2 ClearArray patterns. ++// base: Address of a buffer to be zeroed ++// cnt: Count in HeapWords ++// ++// base, cnt, v0, v1 and t0 are clobbered. ++void C2_MacroAssembler::clear_array_v(Register base, Register cnt) { ++ Label loop; + -+// [padding ] (to preserve machine SP alignment) -+// [locals and parameters ] -+// <- sender sp -+// ------------------------------ Asm interpreter ---------------------------------------- ++ // making zero words ++ vsetvli(t0, cnt, Assembler::e64, Assembler::m4); ++ vxor_vv(v0, v0, v0); + -+// ------------------------------ C Frame ------------------------------------------------ -+// Stack: gcc with -fno-omit-frame-pointer -+// . -+// . -+// +-> . -+// | +-----------------+ | -+// | | return address | | -+// | | previous fp ------+ -+// | | saved registers | -+// | | local variables | -+// | | ... | <-+ -+// | +-----------------+ | -+// | | return address | | -+// +------ previous fp | | -+// | saved registers | | -+// | local variables | | -+// +-> | ... | | -+// | +-----------------+ | -+// | | return address | | -+// | | previous fp ------+ -+// | | saved registers | -+// | | local variables | -+// | | ... | <-+ -+// | +-----------------+ | -+// | | return address | | -+// +------ previous fp | | -+// | saved registers | | -+// | local variables | | -+// $fp --> | ... | | -+// +-----------------+ | -+// | return address | | -+// | previous fp ------+ -+// | saved registers | -+// $sp --> | local variables | -+// +-----------------+ -+// ------------------------------ C Frame ------------------------------------------------ ++ bind(loop); ++ vsetvli(t0, cnt, Assembler::e64, Assembler::m4); ++ vse64_v(v0, base); ++ sub(cnt, cnt, t0); ++ shadd(base, t0, base, t0, 3); ++ bnez(cnt, loop); ++} + -+ public: -+ enum { -+ pc_return_offset = 0, -+ // All frames -+ link_offset = -2, -+ return_addr_offset = -1, -+ sender_sp_offset = 0, -+ // Interpreter frames -+ interpreter_frame_oop_temp_offset = 1, // for native calls only ++void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result, ++ Register cnt1, int elem_size) { ++ Label DONE; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ Register cnt2 = tmp2; ++ int length_offset = arrayOopDesc::length_offset_in_bytes(); ++ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); + -+ interpreter_frame_sender_sp_offset = -3, -+ // outgoing sp before a call to an invoked method -+ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, -+ interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1, -+ interpreter_frame_mdp_offset = interpreter_frame_method_offset - 1, -+ interpreter_frame_padding_offset = interpreter_frame_mdp_offset - 1, -+ interpreter_frame_mirror_offset = interpreter_frame_padding_offset - 1, -+ interpreter_frame_cache_offset = interpreter_frame_mirror_offset - 1, -+ interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1, -+ interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1, -+ interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, ++ BLOCK_COMMENT("arrays_equals_v {"); + -+ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, -+ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, ++ // if (a1 == a2), return true ++ mv(result, true); ++ beq(a1, a2, DONE); + -+ // Entry frames -+ // n.b. these values are determined by the layout defined in -+ // stubGenerator for the Java call stub -+ entry_frame_after_call_words = 34, -+ entry_frame_call_wrapper_offset = -10, ++ mv(result, false); ++ // if a1 == null or a2 == null, return false ++ beqz(a1, DONE); ++ beqz(a2, DONE); ++ // if (a1.length != a2.length), return false ++ lwu(cnt1, Address(a1, length_offset)); ++ lwu(cnt2, Address(a2, length_offset)); ++ bne(cnt1, cnt2, DONE); + -+ // we don't need a save area -+ arg_reg_save_area_bytes = 0 -+ }; ++ la(a1, Address(a1, base_offset)); ++ la(a2, Address(a2, base_offset)); + -+ intptr_t ptr_at(int offset) const { -+ return *ptr_at_addr(offset); -+ } ++ element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); + -+ void ptr_at_put(int offset, intptr_t value) { -+ *ptr_at_addr(offset) = value; -+ } ++ bind(DONE); + -+ private: -+ // an additional field beyond _sp and _pc: -+ intptr_t* _fp; // frame pointer -+ // The interpreter and adapters will extend the frame of the caller. -+ // Since oopMaps are based on the sp of the caller before extension -+ // we need to know that value. However in order to compute the address -+ // of the return address we need the real "raw" sp. Since sparc already -+ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's -+ // original sp we use that convention. ++ BLOCK_COMMENT("} arrays_equals_v"); ++} + -+ intptr_t* _unextended_sp; -+ void adjust_unextended_sp(); ++void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, ++ Register result, Register tmp1, Register tmp2, int encForm) { ++ Label DIFFERENCE, DONE, L, loop; ++ bool encLL = encForm == StrIntrinsicNode::LL; ++ bool encLU = encForm == StrIntrinsicNode::LU; ++ bool encUL = encForm == StrIntrinsicNode::UL; + -+ intptr_t* ptr_at_addr(int offset) const { -+ return (intptr_t*) addr_at(offset); ++ bool str1_isL = encLL || encLU; ++ bool str2_isL = encLL || encUL; ++ ++ int minCharsInWord = encLL ? wordSize : wordSize / 2; ++ ++ BLOCK_COMMENT("string_compare {"); ++ ++ // for Lating strings, 1 byte for 1 character ++ // for UTF16 strings, 2 bytes for 1 character ++ if (!str1_isL) ++ sraiw(cnt1, cnt1, 1); ++ if (!str2_isL) ++ sraiw(cnt2, cnt2, 1); ++ ++ // if str1 == str2, return the difference ++ // save the minimum of the string lengths in cnt2. ++ sub(result, cnt1, cnt2); ++ bgt(cnt1, cnt2, L); ++ mv(cnt2, cnt1); ++ bind(L); ++ ++ if (str1_isL == str2_isL) { // LL or UU ++ element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE); ++ j(DONE); ++ } else { // LU or UL ++ Register strL = encLU ? str1 : str2; ++ Register strU = encLU ? str2 : str1; ++ VectorRegister vstr1 = encLU ? v4 : v0; ++ VectorRegister vstr2 = encLU ? v0 : v4; ++ ++ bind(loop); ++ vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2); ++ vle8_v(vstr1, strL); ++ vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4); ++ vzext_vf2(vstr2, vstr1); ++ vle16_v(vstr1, strU); ++ vmsne_vv(v0, vstr2, vstr1); ++ vfirst_m(tmp2, v0); ++ bgez(tmp2, DIFFERENCE); ++ sub(cnt2, cnt2, tmp1); ++ add(strL, strL, tmp1); ++ shadd(strU, tmp1, strU, tmp1, 1); ++ bnez(cnt2, loop); ++ j(DONE); + } ++ bind(DIFFERENCE); ++ slli(tmp1, tmp2, 1); ++ add(str1, str1, str1_isL ? tmp2 : tmp1); ++ add(str2, str2, str2_isL ? tmp2 : tmp1); ++ str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0)); ++ str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0)); ++ sub(result, tmp1, tmp2); + -+#ifdef ASSERT -+ // Used in frame::sender_for_{interpreter,compiled}_frame -+ static void verify_deopt_original_pc( CompiledMethod* nm, intptr_t* unextended_sp); -+#endif ++ bind(DONE); ++} + -+ public: -+ // Constructors ++void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) { ++ Label loop; ++ assert_different_registers(src, dst, len, tmp, t0); + -+ frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc); ++ BLOCK_COMMENT("byte_array_inflate_v {"); ++ bind(loop); ++ vsetvli(tmp, len, Assembler::e8, Assembler::m2); ++ vle8_v(v2, src); ++ vsetvli(t0, len, Assembler::e16, Assembler::m4); ++ vzext_vf2(v0, v2); ++ vse16_v(v0, dst); ++ sub(len, len, tmp); ++ add(src, src, tmp); ++ shadd(dst, tmp, dst, tmp, 1); ++ bnez(len, loop); ++ BLOCK_COMMENT("} byte_array_inflate_v"); ++} + -+ frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc); ++// Compress char[] array to byte[]. ++// result: the array length if every element in array can be encoded; 0, otherwise. ++void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) { ++ Label done; ++ encode_iso_array_v(src, dst, len, result, tmp); ++ beqz(len, done); ++ mv(result, zr); ++ bind(done); ++} + -+ frame(intptr_t* ptr_sp, intptr_t* ptr_fp); ++// result: the number of elements had been encoded. ++void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) { ++ Label loop, DIFFERENCE, DONE; + -+ void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc); ++ BLOCK_COMMENT("encode_iso_array_v {"); ++ mv(result, 0); + -+ // accessors for the instance variables -+ // Note: not necessarily the real 'frame pointer' (see real_fp) -+ intptr_t* fp() const { return _fp; } ++ bind(loop); ++ mv(tmp, 0xff); ++ vsetvli(t0, len, Assembler::e16, Assembler::m2); ++ vle16_v(v2, src); ++ // if element > 0xff, stop ++ vmsgtu_vx(v1, v2, tmp); ++ vfirst_m(tmp, v1); ++ vmsbf_m(v0, v1); ++ // compress char to byte ++ vsetvli(t0, len, Assembler::e8); ++ vncvt_x_x_w(v1, v2, Assembler::v0_t); ++ vse8_v(v1, dst, Assembler::v0_t); + -+ inline address* sender_pc_addr() const; ++ bgez(tmp, DIFFERENCE); ++ add(result, result, t0); ++ add(dst, dst, t0); ++ sub(len, len, t0); ++ shadd(src, t0, src, t0, 1); ++ bnez(len, loop); ++ j(DONE); + -+ // expression stack tos if we are nested in a java call -+ intptr_t* interpreter_frame_last_sp() const; ++ bind(DIFFERENCE); ++ add(result, result, tmp); + -+ // helper to update a map with callee-saved RBP -+ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); ++ bind(DONE); ++ BLOCK_COMMENT("} encode_iso_array_v"); ++} + -+ // deoptimization support -+ void interpreter_frame_set_last_sp(intptr_t* ptr_sp); ++void C2_MacroAssembler::count_positives_v(Register ary, Register len, Register result, Register tmp) { ++ Label LOOP, SET_RESULT, DONE; + -+ static jint interpreter_frame_expression_stack_direction() { return -1; } ++ BLOCK_COMMENT("count_positives_v {"); ++ mv(result, zr); + -+#endif // CPU_RISCV_FRAME_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp ++ bind(LOOP); ++ vsetvli(t0, len, Assembler::e8, Assembler::m4); ++ vle8_v(v0, ary); ++ vmslt_vx(v0, v0, zr); ++ vfirst_m(tmp, v0); ++ bgez(tmp, SET_RESULT); ++ // if tmp == -1, all bytes are positive ++ add(result, result, t0); ++ ++ sub(len, len, t0); ++ add(ary, ary, t0); ++ bnez(len, LOOP); ++ j(DONE); ++ ++ // add remaining positive bytes count ++ bind(SET_RESULT); ++ add(result, result, tmp); ++ ++ bind(DONE); ++ BLOCK_COMMENT("} count_positives_v"); ++} ++ ++void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ bool isL) { ++ mv(result, zr); ++ ++ Label loop, MATCH, DONE; ++ Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16; ++ bind(loop); ++ vsetvli(tmp1, cnt1, sew, Assembler::m4); ++ vlex_v(v0, str1, sew); ++ vmseq_vx(v0, v0, ch); ++ vfirst_m(tmp2, v0); ++ bgez(tmp2, MATCH); // if equal, return index ++ ++ add(result, result, tmp1); ++ sub(cnt1, cnt1, tmp1); ++ if (!isL) slli(tmp1, tmp1, 1); ++ add(str1, str1, tmp1); ++ bnez(cnt1, loop); ++ ++ mv(result, -1); ++ j(DONE); ++ ++ bind(MATCH); ++ add(result, result, tmp2); ++ ++ bind(DONE); ++} ++ ++// Set dst to NaN if any NaN input. ++void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, ++ bool is_double, bool is_min) { ++ assert_different_registers(dst, src1, src2); ++ ++ vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); ++ ++ is_min ? vfmin_vv(dst, src1, src2) ++ : vfmax_vv(dst, src1, src2); ++ ++ vmfne_vv(v0, src1, src1); ++ vfadd_vv(dst, src1, src1, Assembler::v0_t); ++ vmfne_vv(v0, src2, src2); ++ vfadd_vv(dst, src2, src2, Assembler::v0_t); ++} ++ ++// Set dst to NaN if any NaN input. ++void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst, ++ FloatRegister src1, VectorRegister src2, ++ VectorRegister tmp1, VectorRegister tmp2, ++ bool is_double, bool is_min) { ++ assert_different_registers(src2, tmp1, tmp2); ++ ++ Label L_done, L_NaN; ++ vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); ++ vfmv_s_f(tmp2, src1); ++ ++ is_min ? vfredmin_vs(tmp1, src2, tmp2) ++ : vfredmax_vs(tmp1, src2, tmp2); ++ ++ fsflags(zr); ++ // Checking NaNs ++ vmflt_vf(tmp2, src2, src1); ++ frflags(t0); ++ bnez(t0, L_NaN); ++ j(L_done); ++ ++ bind(L_NaN); ++ vfmv_s_f(tmp2, src1); ++ vfredsum_vs(tmp1, src2, tmp2); ++ ++ bind(L_done); ++ vfmv_f_s(dst, tmp1); ++} +diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp new file mode 100644 -index 000000000..5bc6b430c +index 00000000000..c71df4c101b --- /dev/null -+++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp -@@ -0,0 +1,257 @@ ++++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +@@ -0,0 +1,193 @@ +/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -12391,246 +13371,183 @@ index 000000000..5bc6b430c + * + */ + -+#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP -+#define CPU_RISCV_FRAME_RISCV_INLINE_HPP -+ -+#include "code/codeCache.hpp" -+#include "code/vmreg.inline.hpp" -+ -+// Inline functions for RISCV frames: -+ -+// Constructors: -+ -+inline frame::frame() { -+ _pc = NULL; -+ _sp = NULL; -+ _unextended_sp = NULL; -+ _fp = NULL; -+ _cb = NULL; -+ _deopt_state = unknown; -+} -+ -+static int spin; -+ -+inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) { -+ intptr_t a = intptr_t(ptr_sp); -+ intptr_t b = intptr_t(ptr_fp); -+ _sp = ptr_sp; -+ _unextended_sp = ptr_sp; -+ _fp = ptr_fp; -+ _pc = pc; -+ assert(pc != NULL, "no pc?"); -+ _cb = CodeCache::find_blob(pc); -+ adjust_unextended_sp(); -+ -+ address original_pc = CompiledMethod::get_deopt_original_pc(this); -+ if (original_pc != NULL) { -+ _pc = original_pc; -+ _deopt_state = is_deoptimized; -+ } else { -+ _deopt_state = not_deoptimized; -+ } -+} -+ -+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) { -+ init(ptr_sp, ptr_fp, pc); -+} -+ -+inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) { -+ intptr_t a = intptr_t(ptr_sp); -+ intptr_t b = intptr_t(ptr_fp); -+ _sp = ptr_sp; -+ _unextended_sp = unextended_sp; -+ _fp = ptr_fp; -+ _pc = pc; -+ assert(pc != NULL, "no pc?"); -+ _cb = CodeCache::find_blob(pc); -+ adjust_unextended_sp(); -+ -+ address original_pc = CompiledMethod::get_deopt_original_pc(this); -+ if (original_pc != NULL) { -+ _pc = original_pc; -+ assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc), -+ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); -+ _deopt_state = is_deoptimized; -+ } else { -+ _deopt_state = not_deoptimized; -+ } -+} -+ -+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) { -+ intptr_t a = intptr_t(ptr_sp); -+ intptr_t b = intptr_t(ptr_fp); -+ _sp = ptr_sp; -+ _unextended_sp = ptr_sp; -+ _fp = ptr_fp; -+ _pc = (address)(ptr_sp[-1]); -+ -+ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace -+ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly -+ // unlucky the junk value could be to a zombied method and we'll die on the -+ // find_blob call. This is also why we can have no asserts on the validity -+ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler -+ // -> pd_last_frame should use a specialized version of pd_last_frame which could -+ // call a specilaized frame constructor instead of this one. -+ // Then we could use the assert below. However this assert is of somewhat dubious -+ // value. ++#ifndef CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP ++#define CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP + -+ _cb = CodeCache::find_blob(_pc); -+ adjust_unextended_sp(); ++// C2_MacroAssembler contains high-level macros for C2 + -+ address original_pc = CompiledMethod::get_deopt_original_pc(this); -+ if (original_pc != NULL) { -+ _pc = original_pc; -+ _deopt_state = is_deoptimized; -+ } else { -+ _deopt_state = not_deoptimized; -+ } -+} ++ private: ++ void element_compare(Register r1, Register r2, ++ Register result, Register cnt, ++ Register tmp1, Register tmp2, ++ VectorRegister vr1, VectorRegister vr2, ++ VectorRegister vrs, ++ bool is_latin, Label& DONE); ++ public: + -+// Accessors ++ void string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ Register tmp1, Register tmp2, Register tmp3, ++ int ae); + -+inline bool frame::equal(frame other) const { -+ bool ret = sp() == other.sp() && -+ unextended_sp() == other.unextended_sp() && -+ fp() == other.fp() && -+ pc() == other.pc(); -+ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); -+ return ret; -+} ++ void string_indexof_char_short(Register str1, Register cnt1, ++ Register ch, Register result, ++ bool isL); + -+// Return unique id for this frame. The id must have a value where we can distinguish -+// identity and younger/older relationship. NULL represents an invalid (incomparable) -+// frame. -+inline intptr_t* frame::id(void) const { return unextended_sp(); } ++ void string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ bool isL); + -+// Relationals on frames based ++ void string_indexof(Register str1, Register str2, ++ Register cnt1, Register cnt2, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, int ae); + -+// Return true if the frame is younger (more recent activation) than the frame represented by id -+inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); -+ return this->id() < id ; } -+// Return true if the frame is older (less recent activation) than the frame represented by id -+inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); -+ return this->id() > id ; } ++ void string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ int needle_con_cnt, Register result, int ae); + -+inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); } ++ void arrays_equals(Register r1, Register r2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, Register cnt1, ++ int elem_size); + -+inline intptr_t* frame::link_or_null() const { -+ intptr_t** ptr = (intptr_t **)addr_at(link_offset); -+ return os::is_readable_pointer(ptr) ? *ptr : NULL; -+} ++ void string_equals(Register r1, Register r2, ++ Register result, Register cnt1, ++ int elem_size); + -+inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } ++ // refer to conditional_branches and float_conditional_branches ++ static const int bool_test_bits = 3; ++ static const int neg_cond_bits = 2; ++ static const int unsigned_branch_mask = 1 << bool_test_bits; ++ static const int double_branch_mask = 1 << bool_test_bits; + -+// Return address -+inline address* frame::sender_pc_addr() const { return (address*) addr_at(return_addr_offset); } -+inline address frame::sender_pc() const { return *sender_pc_addr(); } -+inline intptr_t* frame::sender_sp() const { return addr_at(sender_sp_offset); } ++ // cmp ++ void cmp_branch(int cmpFlag, ++ Register op1, Register op2, ++ Label& label, bool is_far = false); + -+inline intptr_t** frame::interpreter_frame_locals_addr() const { -+ return (intptr_t**)addr_at(interpreter_frame_locals_offset); -+} ++ void float_cmp_branch(int cmpFlag, ++ FloatRegister op1, FloatRegister op2, ++ Label& label, bool is_far = false); + -+inline intptr_t* frame::interpreter_frame_last_sp() const { -+ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); -+} ++ void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op, ++ Label& L, bool is_far = false); + -+inline intptr_t* frame::interpreter_frame_bcp_addr() const { -+ return (intptr_t*)addr_at(interpreter_frame_bcp_offset); -+} ++ void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op, ++ Label& L, bool is_far = false); + -+inline intptr_t* frame::interpreter_frame_mdp_addr() const { -+ return (intptr_t*)addr_at(interpreter_frame_mdp_offset); -+} ++ void enc_cmove(int cmpFlag, ++ Register op1, Register op2, ++ Register dst, Register src); + ++ void spill(Register r, bool is64, int offset) { ++ is64 ? sd(r, Address(sp, offset)) ++ : sw(r, Address(sp, offset)); ++ } + -+// Constant pool cache ++ void spill(FloatRegister f, bool is64, int offset) { ++ is64 ? fsd(f, Address(sp, offset)) ++ : fsw(f, Address(sp, offset)); ++ } + -+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { -+ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); -+} ++ void spill(VectorRegister v, int offset) { ++ add(t0, sp, offset); ++ vs1r_v(v, t0); ++ } + -+// Method ++ void unspill(Register r, bool is64, int offset) { ++ is64 ? ld(r, Address(sp, offset)) ++ : lw(r, Address(sp, offset)); ++ } + -+inline Method** frame::interpreter_frame_method_addr() const { -+ return (Method**)addr_at(interpreter_frame_method_offset); -+} ++ void unspillu(Register r, bool is64, int offset) { ++ is64 ? ld(r, Address(sp, offset)) ++ : lwu(r, Address(sp, offset)); ++ } + -+// Mirror ++ void unspill(FloatRegister f, bool is64, int offset) { ++ is64 ? fld(f, Address(sp, offset)) ++ : flw(f, Address(sp, offset)); ++ } + -+inline oop* frame::interpreter_frame_mirror_addr() const { -+ return (oop*)addr_at(interpreter_frame_mirror_offset); -+} ++ void unspill(VectorRegister v, int offset) { ++ add(t0, sp, offset); ++ vl1r_v(v, t0); ++ } + -+// top of expression stack -+inline intptr_t* frame::interpreter_frame_tos_address() const { -+ intptr_t* last_sp = interpreter_frame_last_sp(); -+ if (last_sp == NULL) { -+ return sp(); -+ } else { -+ // sp() may have been extended or shrunk by an adapter. At least -+ // check that we don't fall behind the legal region. -+ // For top deoptimized frame last_sp == interpreter_frame_monitor_end. -+ assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos"); -+ return last_sp; ++ void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) { ++ assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size"); ++ unspill(v0, src_offset); ++ spill(v0, dst_offset); + } -+} + -+inline oop* frame::interpreter_frame_temp_oop_addr() const { -+ return (oop *)(fp() + interpreter_frame_oop_temp_offset); -+} ++ void minmax_FD(FloatRegister dst, ++ FloatRegister src1, FloatRegister src2, ++ bool is_double, bool is_min); + -+inline int frame::interpreter_frame_monitor_size() { -+ return BasicObjectLock::size(); -+} ++ // intrinsic methods implemented by rvv instructions ++ void string_equals_v(Register r1, Register r2, ++ Register result, Register cnt1, ++ int elem_size); + ++ void arrays_equals_v(Register r1, Register r2, ++ Register result, Register cnt1, ++ int elem_size); + -+// expression stack -+// (the max_stack arguments are used by the GC; see class FrameClosure) ++ void string_compare_v(Register str1, Register str2, ++ Register cnt1, Register cnt2, ++ Register result, ++ Register tmp1, Register tmp2, ++ int encForm); + -+inline intptr_t* frame::interpreter_frame_expression_stack() const { -+ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); -+ return monitor_end-1; -+} ++ void clear_array_v(Register base, Register cnt); + ++ void byte_array_inflate_v(Register src, Register dst, ++ Register len, Register tmp); + -+// Entry frames ++ void char_array_compress_v(Register src, Register dst, ++ Register len, Register result, ++ Register tmp); + -+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { -+ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); -+} ++ void encode_iso_array_v(Register src, Register dst, ++ Register len, Register result, ++ Register tmp); + ++ void count_positives_v(Register ary, Register len, ++ Register result, Register tmp); + -+// Compiled frames -+inline oop frame::saved_oop_result(RegisterMap* map) const { -+ oop* result_adr = (oop *)map->location(x10->as_VMReg()); -+ if(result_adr != NULL) { -+ return (*result_adr); -+ } else { -+ ShouldNotReachHere(); -+ return NULL; -+ } -+} ++ void string_indexof_char_v(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ bool isL); + -+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { -+ oop* result_adr = (oop *)map->location(x10->as_VMReg()); -+ if(result_adr != NULL) { -+ *result_adr = obj; -+ } else { -+ ShouldNotReachHere(); -+ } -+} ++ void minmax_FD_v(VectorRegister dst, ++ VectorRegister src1, VectorRegister src2, ++ bool is_double, bool is_min); + -+#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP -diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp ++ void reduce_minmax_FD_v(FloatRegister dst, ++ FloatRegister src1, VectorRegister src2, ++ VectorRegister tmp1, VectorRegister tmp2, ++ bool is_double, bool is_min); ++ ++#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp new file mode 100644 -index 000000000..6f778956d +index 00000000000..53a41665f4b --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -@@ -0,0 +1,479 @@ ++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +@@ -0,0 +1,83 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -12653,468 +13570,73 @@ index 000000000..6f778956d + * + */ + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "gc/g1/g1BarrierSet.hpp" -+#include "gc/g1/g1BarrierSetAssembler.hpp" -+#include "gc/g1/g1BarrierSetRuntime.hpp" -+#include "gc/g1/g1CardTable.hpp" -+#include "gc/g1/g1ThreadLocalData.hpp" -+#include "gc/g1/heapRegion.hpp" -+#include "gc/shared/collectedHeap.hpp" -+#include "interpreter/interp_masm.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/thread.hpp" -+#ifdef COMPILER1 -+#include "c1/c1_LIRAssembler.hpp" -+#include "c1/c1_MacroAssembler.hpp" -+#include "gc/g1/c1/g1BarrierSetC1.hpp" -+#endif ++#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP ++#define CPU_RISCV_C2_GLOBALS_RISCV_HPP + -+#define __ masm-> ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" + -+void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register addr, Register count, RegSet saved_regs) { -+ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; -+ if (!dest_uninitialized) { -+ Label done; -+ Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++// Sets the default values for platform dependent flags used by the server compiler. ++// (see c2_globals.hpp). Alpha-sorted. + -+ // Is marking active? -+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { -+ __ lwu(t0, in_progress); -+ } else { -+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); -+ __ lbu(t0, in_progress); -+ } -+ __ beqz(t0, done); ++define_pd_global(bool, BackgroundCompilation, true); ++define_pd_global(bool, CICompileOSR, true); ++define_pd_global(bool, InlineIntrinsics, true); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, true); ++define_pd_global(bool, UseOnStackReplacement, true); ++define_pd_global(bool, ProfileInterpreter, true); ++define_pd_global(bool, TieredCompilation, COMPILER1_PRESENT(true) NOT_COMPILER1(false)); ++define_pd_global(intx, CompileThreshold, 10000); + -+ __ push_reg(saved_regs, sp); -+ if (count == c_rarg0) { -+ if (addr == c_rarg1) { -+ // exactly backwards!! -+ __ mv(t0, c_rarg0); -+ __ mv(c_rarg0, c_rarg1); -+ __ mv(c_rarg1, t0); -+ } else { -+ __ mv(c_rarg1, count); -+ __ mv(c_rarg0, addr); -+ } -+ } else { -+ __ mv(c_rarg0, addr); -+ __ mv(c_rarg1, count); -+ } -+ if (UseCompressedOops) { -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); -+ } else { -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); -+ } -+ __ pop_reg(saved_regs, sp); -+ -+ __ bind(done); -+ } -+} -+ -+void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register start, Register count, Register tmp, RegSet saved_regs) { -+ __ push_reg(saved_regs, sp); -+ assert_different_registers(start, count, tmp); -+ assert_different_registers(c_rarg0, count); -+ __ mv(c_rarg0, start); -+ __ mv(c_rarg1, count); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); -+ __ pop_reg(saved_regs, sp); -+} -+ -+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, -+ Register obj, -+ Register pre_val, -+ Register thread, -+ Register tmp, -+ bool tosca_live, -+ bool expand_call) { -+ // If expand_call is true then we expand the call_VM_leaf macro -+ // directly to skip generating the check by -+ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. -+ -+ assert(thread == xthread, "must be"); -+ -+ Label done; -+ Label runtime; -+ -+ assert_different_registers(obj, pre_val, tmp, t0); -+ assert(pre_val != noreg && tmp != noreg, "expecting a register"); -+ -+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); -+ Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); -+ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); -+ -+ // Is marking active? -+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width -+ __ lwu(tmp, in_progress); -+ } else { -+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); -+ __ lbu(tmp, in_progress); -+ } -+ __ beqz(tmp, done); -+ -+ // Do we need to load the previous value? -+ if (obj != noreg) { -+ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); -+ } -+ -+ // Is the previous value null? -+ __ beqz(pre_val, done); -+ -+ // Can we store original value in the thread's buffer? -+ // Is index == 0? -+ // (The index field is typed as size_t.) -+ -+ __ ld(tmp, index); // tmp := *index_adr -+ __ beqz(tmp, runtime); // tmp == 0? -+ // If yes, goto runtime -+ -+ __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize -+ __ sd(tmp, index); // *index_adr := tmp -+ __ ld(t0, buffer); -+ __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr -+ -+ // Record the previous value -+ __ sd(pre_val, Address(tmp, 0)); -+ __ j(done); -+ -+ __ bind(runtime); -+ // save the live input values -+ RegSet saved = RegSet::of(pre_val); -+ if (tosca_live) { saved += RegSet::of(x10); } -+ if (obj != noreg) { saved += RegSet::of(obj); } -+ -+ __ push_reg(saved, sp); -+ -+ if (expand_call) { -+ assert(pre_val != c_rarg1, "smashed arg"); -+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); -+ } else { -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); -+ } -+ -+ __ pop_reg(saved, sp); -+ -+ __ bind(done); -+ -+} -+ -+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, -+ Register store_addr, -+ Register new_val, -+ Register thread, -+ Register tmp, -+ Register tmp2) { -+ assert(thread == xthread, "must be"); -+ assert_different_registers(store_addr, new_val, thread, tmp, tmp2, -+ t0); -+ assert(store_addr != noreg && new_val != noreg && tmp != noreg && -+ tmp2 != noreg, "expecting a register"); -+ -+ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); -+ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); -+ -+ BarrierSet* bs = BarrierSet::barrier_set(); -+ CardTableBarrierSet* ctbs = barrier_set_cast(bs); -+ CardTable* ct = ctbs->card_table(); -+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); -+ -+ Label done; -+ Label runtime; -+ -+ // Does store cross heap regions? -+ -+ __ xorr(tmp, store_addr, new_val); -+ __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes); -+ __ beqz(tmp, done); -+ -+ // crosses regions, storing NULL? -+ -+ __ beqz(new_val, done); -+ -+ // storing region crossing non-NULL, is card already dirty? -+ -+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); -+ const Register card_addr = tmp; -+ -+ __ srli(card_addr, store_addr, CardTable::card_shift); -+ -+ // get the address of the card -+ __ load_byte_map_base(tmp2); -+ __ add(card_addr, card_addr, tmp2); -+ __ lbu(tmp2, Address(card_addr)); -+ __ mv(t0, (int)G1CardTable::g1_young_card_val()); -+ __ beq(tmp2, t0, done); -+ -+ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); -+ -+ __ membar(MacroAssembler::StoreLoad); -+ -+ __ lbu(tmp2, Address(card_addr)); -+ __ beqz(tmp2, done); -+ -+ // storing a region crossing, non-NULL oop, card is clean. -+ // dirty card and log. -+ -+ __ sb(zr, Address(card_addr)); -+ -+ __ ld(t0, queue_index); -+ __ beqz(t0, runtime); -+ __ sub(t0, t0, wordSize); -+ __ sd(t0, queue_index); -+ -+ __ ld(tmp2, buffer); -+ __ add(t0, tmp2, t0); -+ __ sd(card_addr, Address(t0, 0)); -+ __ j(done); -+ -+ __ bind(runtime); -+ // save the live input values -+ RegSet saved = RegSet::of(store_addr, new_val); -+ __ push_reg(saved, sp); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); -+ __ pop_reg(saved, sp); -+ -+ __ bind(done); -+} -+ -+void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Register dst, Address src, Register tmp1, Register tmp_thread) { -+ bool on_oop = type == T_OBJECT || type == T_ARRAY; -+ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; -+ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; -+ bool on_reference = on_weak || on_phantom; -+ ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); -+ if (on_oop && on_reference) { -+ // RA is live. It must be saved around calls. -+ __ enter(); // barrier may call runtime -+ // Generate the G1 pre-barrier code to log the value of -+ // the referent field in an SATB buffer. -+ g1_write_barrier_pre(masm /* masm */, -+ noreg /* obj */, -+ dst /* pre_val */, -+ xthread /* thread */, -+ tmp1 /* tmp */, -+ true /* tosca_live */, -+ true /* expand_call */); -+ __ leave(); -+ } -+} -+ -+void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { -+ // flatten object address if needed -+ if (dst.offset() == 0) { -+ __ mv(tmp3, dst.base()); -+ } else { -+ __ la(tmp3, dst); -+ } -+ -+ g1_write_barrier_pre(masm, -+ tmp3 /* obj */, -+ tmp2 /* pre_val */, -+ xthread /* thread */, -+ tmp1 /* tmp */, -+ val != noreg /* tosca_live */, -+ false /* expand_call */); -+ -+ if (val == noreg) { -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg, noreg); -+ } else { -+ // G1 barrier needs uncompressed oop for region cross check. -+ Register new_val = val; -+ if (UseCompressedOops) { -+ new_val = t1; -+ __ mv(new_val, val); -+ } -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg); -+ g1_write_barrier_post(masm, -+ tmp3 /* store_adr */, -+ new_val /* new_val */, -+ xthread /* thread */, -+ tmp1 /* tmp */, -+ tmp2 /* tmp2 */); -+ } -+} -+ -+#ifdef COMPILER1 -+ -+#undef __ -+#define __ ce->masm()-> -+ -+void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { -+ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); -+ -+ // At this point we know that marking is in progress. -+ // If do_load() is true then we have to emit the -+ // load of the previous value; otherwise it has already -+ // been loaded into _pre_val. -+ __ bind(*stub->entry()); -+ -+ assert(stub->pre_val()->is_register(), "Precondition."); -+ -+ Register pre_val_reg = stub->pre_val()->as_register(); -+ -+ if (stub->do_load()) { -+ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), -+ false /* wide */, false /* unaligned */); -+ } -+ __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); -+ ce->store_parameter(stub->pre_val()->as_register(), 0); -+ __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); -+ __ j(*stub->continuation()); -+} -+ -+void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { -+ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); -+ __ bind(*stub->entry()); -+ assert(stub->addr()->is_register(), "Precondition"); -+ assert(stub->new_val()->is_register(), "Precondition"); -+ Register new_val_reg = stub->new_val()->as_register(); -+ __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true); -+ ce->store_parameter(stub->addr()->as_pointer_register(), 0); -+ __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin())); -+ __ j(*stub->continuation()); -+} -+ -+#undef __ -+ -+#define __ sasm-> -+ -+void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { -+ __ prologue("g1_pre_barrier", false); -+ -+ BarrierSet* bs = BarrierSet::barrier_set(); -+ -+ // arg0 : previous value of memory -+ const Register pre_val = x10; -+ const Register thread = xthread; -+ const Register tmp = t0; -+ -+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); -+ Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); -+ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); -+ -+ Label done; -+ Label runtime; -+ -+ // Is marking still active? -+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width -+ __ lwu(tmp, in_progress); -+ } else { -+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); -+ __ lbu(tmp, in_progress); -+ } -+ __ beqz(tmp, done); -+ -+ // Can we store original value in the thread's buffer? -+ __ ld(tmp, queue_index); -+ __ beqz(tmp, runtime); -+ -+ __ sub(tmp, tmp, wordSize); -+ __ sd(tmp, queue_index); -+ __ ld(t1, buffer); -+ __ add(tmp, tmp, t1); -+ __ load_parameter(0, t1); -+ __ sd(t1, Address(tmp, 0)); -+ __ j(done); -+ -+ __ bind(runtime); -+ __ push_call_clobbered_registers(); -+ __ load_parameter(0, pre_val); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); -+ __ pop_call_clobbered_registers(); -+ __ bind(done); -+ -+ __ epilogue(); -+} -+ -+void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { -+ __ prologue("g1_post_barrier", false); -+ -+ // arg0 : store_address -+ Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp -+ -+ BarrierSet* bs = BarrierSet::barrier_set(); -+ CardTableBarrierSet* ctbs = barrier_set_cast(bs); -+ CardTable* ct = ctbs->card_table(); -+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); -+ -+ Label done; -+ Label runtime; -+ -+ // At this point we know new_value is non-NULL and the new_value crosses regions. -+ // Must check to see if card is already dirty -+ const Register thread = xthread; -+ -+ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); -+ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); -+ -+ const Register card_offset = t1; -+ // RA is free here, so we can use it to hold the byte_map_base. -+ const Register byte_map_base = ra; -+ -+ assert_different_registers(card_offset, byte_map_base, t0); -+ -+ __ load_parameter(0, card_offset); -+ __ srli(card_offset, card_offset, CardTable::card_shift); -+ __ load_byte_map_base(byte_map_base); -+ -+ // Convert card offset into an address in card_addr -+ Register card_addr = card_offset; -+ __ add(card_addr, byte_map_base, card_addr); -+ -+ __ lbu(t0, Address(card_addr, 0)); -+ __ sub(t0, t0, (int)G1CardTable::g1_young_card_val()); -+ __ beqz(t0, done); -+ -+ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); -+ -+ __ membar(MacroAssembler::StoreLoad); -+ __ lbu(t0, Address(card_addr, 0)); -+ __ beqz(t0, done); -+ -+ // storing region crossing non-NULL, card is clean. -+ // dirty card and log. -+ __ sb(zr, Address(card_addr, 0)); ++define_pd_global(intx, OnStackReplacePercentage, 140); ++define_pd_global(intx, ConditionalMoveLimit, 0); ++define_pd_global(intx, FreqInlineSize, 325); ++define_pd_global(intx, MinJumpTableSize, 10); ++define_pd_global(intx, InteriorEntryAlignment, 16); ++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); ++define_pd_global(intx, LoopUnrollLimit, 60); ++define_pd_global(intx, LoopPercentProfileLimit, 10); ++// InitialCodeCacheSize derived from specjbb2000 run. ++define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize ++define_pd_global(intx, CodeCacheExpansionSize, 64*K); + -+ __ ld(t0, queue_index); -+ __ beqz(t0, runtime); -+ __ sub(t0, t0, wordSize); -+ __ sd(t0, queue_index); ++// Ergonomics related flags ++define_pd_global(uint64_t,MaxRAM, 128ULL*G); ++define_pd_global(intx, RegisterCostAreaRatio, 16000); + -+ // Reuse RA to hold buffer_addr -+ const Register buffer_addr = ra; ++// Peephole and CISC spilling both break the graph, and so makes the ++// scheduler sick. ++define_pd_global(bool, OptoPeephole, false); ++define_pd_global(bool, UseCISCSpill, false); ++define_pd_global(bool, OptoScheduling, true); ++define_pd_global(bool, OptoBundling, false); ++define_pd_global(bool, OptoRegScheduling, false); ++define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); ++define_pd_global(bool, IdealizeClearArrayNode, true); + -+ __ ld(buffer_addr, buffer); -+ __ add(t0, buffer_addr, t0); -+ __ sd(card_addr, Address(t0, 0)); -+ __ j(done); ++define_pd_global(intx, ReservedCodeCacheSize, 48*M); ++define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); ++define_pd_global(intx, ProfiledCodeHeapSize, 22*M); ++define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); ++define_pd_global(uintx, CodeCacheMinBlockLength, 6); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + -+ __ bind(runtime); -+ __ push_call_clobbered_registers(); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); -+ __ pop_call_clobbered_registers(); -+ __ bind(done); -+ __ epilogue(); -+} ++// Ergonomics related flags ++define_pd_global(bool, NeverActAsServerClassMachine, false); + -+#undef __ ++define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed. + -+#endif // COMPILER1 -diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp ++#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp new file mode 100644 -index 000000000..7f85e002d +index 00000000000..cdbd69807be --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp -@@ -0,0 +1,78 @@ ++++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp +@@ -0,0 +1,38 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -13138,68 +13660,27 @@ index 000000000..7f85e002d + * + */ + -+#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP -+#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP -+ -+#include "asm/macroAssembler.hpp" -+#include "gc/shared/modRefBarrierSetAssembler.hpp" -+#include "utilities/macros.hpp" -+ -+#ifdef COMPILER1 -+class LIR_Assembler; -+#endif -+class StubAssembler; -+class G1PreBarrierStub; -+class G1PostBarrierStub; -+ -+class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { -+protected: -+ void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register addr, Register count, RegSet saved_regs); -+ void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register start, Register count, Register tmp, RegSet saved_regs); -+ -+ void g1_write_barrier_pre(MacroAssembler* masm, -+ Register obj, -+ Register pre_val, -+ Register thread, -+ Register tmp, -+ bool tosca_live, -+ bool expand_call); -+ -+ void g1_write_barrier_post(MacroAssembler* masm, -+ Register store_addr, -+ Register new_val, -+ Register thread, -+ Register tmp, -+ Register tmp2); -+ -+ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); -+ -+public: -+#ifdef COMPILER1 -+ void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); -+ void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); ++#include "precompiled.hpp" ++#include "opto/compile.hpp" ++#include "opto/node.hpp" + -+ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); -+ void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); -+#endif ++// processor dependent initialization for riscv + -+ void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Register dst, Address src, Register tmp1, Register tmp_thread); -+}; ++extern void reg_mask_init(); + -+#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp ++void Compile::pd_compiler2_init() { ++ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); ++ reg_mask_init(); ++} +diff --git a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp new file mode 100644 -index 000000000..203b82744 +index 00000000000..a90d9fdc160 --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp -@@ -0,0 +1,226 @@ ++++ b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp +@@ -0,0 +1,47 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13223,215 +13704,37 @@ index 000000000..203b82744 + */ + +#include "precompiled.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" -+#include "gc/shared/collectedHeap.hpp" -+#include "runtime/jniHandles.hpp" -+#include "runtime/thread.hpp" -+ -+#define __ masm-> ++#include "asm/macroAssembler.hpp" ++#include "opto/compile.hpp" ++#include "opto/node.hpp" ++#include "opto/output.hpp" ++#include "runtime/sharedRuntime.hpp" + -+void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Register dst, Address src, Register tmp1, Register tmp_thread) { -+ // RA is live. It must be saved around calls. ++#define __ masm. ++void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const { ++ assert(SharedRuntime::polling_page_return_handler_blob() != NULL, ++ "polling page return stub not created yet"); ++ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); ++ RuntimeAddress callback_addr(stub); + -+ bool in_heap = (decorators & IN_HEAP) != 0; -+ bool in_native = (decorators & IN_NATIVE) != 0; -+ bool is_not_null = (decorators & IS_NOT_NULL) != 0; -+ switch (type) { -+ case T_OBJECT: // fall through -+ case T_ARRAY: { -+ if (in_heap) { -+ if (UseCompressedOops) { -+ __ lwu(dst, src); -+ if (is_not_null) { -+ __ decode_heap_oop_not_null(dst); -+ } else { -+ __ decode_heap_oop(dst); -+ } -+ } else { -+ __ ld(dst, src); -+ } -+ } else { -+ assert(in_native, "why else?"); -+ __ ld(dst, src); -+ } -+ break; -+ } -+ case T_BOOLEAN: __ load_unsigned_byte (dst, src); break; -+ case T_BYTE: __ load_signed_byte (dst, src); break; -+ case T_CHAR: __ load_unsigned_short(dst, src); break; -+ case T_SHORT: __ load_signed_short (dst, src); break; -+ case T_INT: __ lw (dst, src); break; -+ case T_LONG: __ ld (dst, src); break; -+ case T_ADDRESS: __ ld (dst, src); break; -+ case T_FLOAT: __ flw (f10, src); break; -+ case T_DOUBLE: __ fld (f10, src); break; -+ default: Unimplemented(); -+ } -+} -+ -+void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { -+ bool in_heap = (decorators & IN_HEAP) != 0; -+ bool in_native = (decorators & IN_NATIVE) != 0; -+ switch (type) { -+ case T_OBJECT: // fall through -+ case T_ARRAY: { -+ val = val == noreg ? zr : val; -+ if (in_heap) { -+ if (UseCompressedOops) { -+ assert(!dst.uses(val), "not enough registers"); -+ if (val != zr) { -+ __ encode_heap_oop(val); -+ } -+ __ sw(val, dst); -+ } else { -+ __ sd(val, dst); -+ } -+ } else { -+ assert(in_native, "why else?"); -+ __ sd(val, dst); -+ } -+ break; -+ } -+ case T_BOOLEAN: -+ __ andi(val, val, 0x1); // boolean is true if LSB is 1 -+ __ sb(val, dst); -+ break; -+ case T_BYTE: __ sb(val, dst); break; -+ case T_CHAR: __ sh(val, dst); break; -+ case T_SHORT: __ sh(val, dst); break; -+ case T_INT: __ sw(val, dst); break; -+ case T_LONG: __ sd(val, dst); break; -+ case T_ADDRESS: __ sd(val, dst); break; -+ case T_FLOAT: __ fsw(f10, dst); break; -+ case T_DOUBLE: __ fsd(f10, dst); break; -+ default: Unimplemented(); -+ } -+ -+} -+ -+void BarrierSetAssembler::obj_equals(MacroAssembler* masm, Register obj1, Register obj2, Label& equal, bool is_far) { -+ __ beq(obj1, obj2, equal, is_far); -+} -+ -+void BarrierSetAssembler::obj_nequals(MacroAssembler* masm, Register obj1, Register obj2, Label& nequal, bool is_far) { -+ __ bne(obj1, obj2, nequal, is_far); -+} -+ -+void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, -+ Register obj, Register tmp, Label& slowpath) { -+ // If mask changes we need to ensure that the inverse is still encodable as an immediate -+ STATIC_ASSERT(JNIHandles::weak_tag_mask == 1); -+ __ andi(obj, obj, ~JNIHandles::weak_tag_mask); -+ __ ld(obj, Address(obj, 0)); // *obj -+} -+ -+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. -+void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, -+ Register var_size_in_bytes, -+ int con_size_in_bytes, -+ Register tmp1, -+ Register tmp2, -+ Label& slow_case, -+ bool is_far) { -+ assert_different_registers(obj, tmp2); -+ assert_different_registers(obj, var_size_in_bytes); -+ Register end = tmp2; -+ -+ __ ld(obj, Address(xthread, JavaThread::tlab_top_offset())); -+ if (var_size_in_bytes == noreg) { -+ __ la(end, Address(obj, con_size_in_bytes)); -+ } else { -+ __ add(end, obj, var_size_in_bytes); -+ } -+ __ ld(t0, Address(xthread, JavaThread::tlab_end_offset())); -+ __ bgtu(end, t0, slow_case, is_far); -+ -+ // update the tlab top pointer -+ __ sd(end, Address(xthread, JavaThread::tlab_top_offset())); -+ -+ // recover var_size_in_bytes if necessary -+ if (var_size_in_bytes == end) { -+ __ sub(var_size_in_bytes, var_size_in_bytes, obj); -+ } -+} -+ -+// Defines obj, preserves var_size_in_bytes -+void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, -+ Register var_size_in_bytes, -+ int con_size_in_bytes, -+ Register tmp1, -+ Label& slow_case, -+ bool is_far) { -+ assert_different_registers(obj, var_size_in_bytes, tmp1); -+ if (!Universe::heap()->supports_inline_contig_alloc()) { -+ __ j(slow_case); -+ } else { -+ Register end = tmp1; -+ Label retry; -+ __ bind(retry); -+ -+ // Get the current end of the heap -+ ExternalAddress address_end((address) Universe::heap()->end_addr()); -+ { -+ int32_t offset; -+ __ la_patchable(t1, address_end, offset); -+ __ ld(t1, Address(t1, offset)); -+ } -+ -+ // Get the current top of the heap -+ ExternalAddress address_top((address) Universe::heap()->top_addr()); -+ { -+ int32_t offset; -+ __ la_patchable(t0, address_top, offset); -+ __ addi(t0, t0, offset); -+ __ lr_d(obj, t0, Assembler::aqrl); -+ } -+ -+ // Adjust it my the size of our new object -+ if (var_size_in_bytes == noreg) { -+ __ la(end, Address(obj, con_size_in_bytes)); -+ } else { -+ __ add(end, obj, var_size_in_bytes); -+ } -+ -+ // if end < obj then we wrapped around high memory -+ __ bltu(end, obj, slow_case, is_far); -+ -+ __ bgtu(end, t1, slow_case, is_far); -+ -+ // If heap_top hasn't been changed by some other thread, update it. -+ __ sc_d(t1, end, t0, Assembler::rl); -+ __ bnez(t1, retry); -+ -+ incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1); -+ } -+} -+ -+void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, -+ Register var_size_in_bytes, -+ int con_size_in_bytes, -+ Register tmp1) { -+ assert(tmp1->is_valid(), "need temp reg"); -+ -+ __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); -+ if (var_size_in_bytes->is_valid()) { -+ __ add(tmp1, tmp1, var_size_in_bytes); -+ } else { -+ __ add(tmp1, tmp1, con_size_in_bytes); -+ } -+ __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); ++ __ bind(entry->_stub_label); ++ InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset); ++ masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec()); ++ __ la(t0, safepoint_pc.target()); ++ __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); ++ __ far_jump(callback_addr); +} -diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp ++#undef __ +diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp new file mode 100644 -index 000000000..964fc28be +index 00000000000..14a68b45026 --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp -@@ -0,0 +1,75 @@ ++++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp +@@ -0,0 +1,36 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13454,65 +13757,26 @@ index 000000000..964fc28be + * + */ + -+#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP -+#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP -+ -+#include "asm/macroAssembler.hpp" -+#include "memory/allocation.hpp" -+#include "oops/access.hpp" ++#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP ++#define CPU_RISCV_CODEBUFFER_RISCV_HPP + -+class BarrierSetAssembler: public CHeapObj { +private: -+ void incr_allocated_bytes(MacroAssembler* masm, -+ Register var_size_in_bytes, int con_size_in_bytes, -+ Register t1 = noreg); ++ void pd_initialize() {} + +public: -+ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register src, Register dst, Register count, RegSet saved_regs) {} -+ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register start, Register end, Register tmp, RegSet saved_regs) {} -+ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Register dst, Address src, Register tmp1, Register tmp_thread); -+ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); -+ virtual void obj_equals(MacroAssembler* masm, Register obj1, Register obj2, Label& equal, bool is_far = false); -+ virtual void obj_nequals(MacroAssembler* masm, Register obj1, Register obj2, Label& nequal, bool is_far = false); -+ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, -+ Register obj, Register tmp, Label& slowpath); -+ -+ virtual void tlab_allocate(MacroAssembler* masm, -+ Register obj, // result: pointer to object after successful allocation -+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise -+ int con_size_in_bytes, // object size in bytes if known at compile time -+ Register tmp1, // temp register -+ Register tmp2, // temp register -+ Label& slow_case, // continuation point if fast allocation fails -+ bool is_far = false // the distance of label slowcase could be more than 12KiB in C1 -+ ); -+ -+ void eden_allocate(MacroAssembler* masm, -+ Register obj, // result: pointer to object after successful allocation -+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise -+ int con_size_in_bytes, // object size in bytes if known at compile time -+ Register tmp1, // temp register -+ Label& slow_case, // continuation point if fast allocation fails -+ bool is_far = false // the distance of label slowcase could be more than 12KiB in C1 -+ ); -+ virtual void barrier_stubs_init() {} -+ virtual ~BarrierSetAssembler() {} -+}; ++ void flush_bundle(bool start_new_bundle) {} + -+#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp ++#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp new file mode 100644 -index 000000000..1720488fb +index 00000000000..75bc4be7840 --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp -@@ -0,0 +1,120 @@ ++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +@@ -0,0 +1,149 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13537,107 +13801,136 @@ index 000000000..1720488fb + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" -+#include "gc/shared/barrierSet.hpp" -+#include "gc/shared/cardTable.hpp" -+#include "gc/shared/cardTableBarrierSet.hpp" -+#include "gc/shared/cardTableBarrierSetAssembler.hpp" -+#include "interpreter/interp_masm.hpp" -+ -+#define __ masm-> ++#include "code/compiledIC.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nmethod.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/safepoint.hpp" + ++// ---------------------------------------------------------------------------- + -+void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) { -+ assert_different_registers(obj, tmp); -+ BarrierSet* bs = BarrierSet::barrier_set(); -+ assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); ++#define __ _masm. ++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { ++ precond(cbuf.stubs()->start() != badAddress); ++ precond(cbuf.stubs()->end() != badAddress); ++ // Stub is fixed up when the corresponding call is converted from ++ // calling compiled code to calling interpreted code. ++ // mv xmethod, 0 ++ // jalr -4 # to self + -+ CardTableBarrierSet* ctbs = barrier_set_cast(bs); -+ CardTable* ct = ctbs->card_table(); -+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ if (mark == NULL) { ++ mark = cbuf.insts_mark(); // Get mark within main instrs section. ++ } + -+ __ srli(obj, obj, CardTable::card_shift); ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a stub. ++ MacroAssembler _masm(&cbuf); + -+ assert(CardTable::dirty_card_val() == 0, "must be"); ++ address base = __ start_a_stub(to_interp_stub_size()); ++ int offset = __ offset(); ++ if (base == NULL) { ++ return NULL; // CodeBuffer::expand failed ++ } ++ // static stub relocation stores the instruction address of the call ++ __ relocate(static_stub_Relocation::spec(mark)); + -+ __ load_byte_map_base(tmp); -+ __ add(tmp, obj, tmp); ++ __ emit_static_call_stub(); + -+ if (UseCondCardMark) { -+ Label L_already_dirty; -+ __ membar(MacroAssembler::StoreLoad); -+ __ lbu(t1, Address(tmp)); -+ __ beqz(t1, L_already_dirty); -+ __ sb(zr, Address(tmp)); -+ __ bind(L_already_dirty); -+ } else { -+ if (ct->scanned_concurrently()) { -+ __ membar(MacroAssembler::StoreStore); -+ } -+ __ sb(zr, Address(tmp)); -+ } ++ assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big"); ++ __ end_a_stub(); ++ return base; +} ++#undef __ + -+void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register start, Register count, Register tmp, RegSet saved_regs) { -+ assert_different_registers(start, tmp); -+ assert_different_registers(count, tmp); -+ BarrierSet* bs = BarrierSet::barrier_set(); -+ CardTableBarrierSet* ctbs = barrier_set_cast(bs); -+ CardTable* ct = ctbs->card_table(); ++int CompiledStaticCall::to_interp_stub_size() { ++ // fence_i + fence* + (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr ++ return NativeFenceI::instruction_size() + 12 * NativeInstruction::instruction_size; ++} + -+ Label L_loop, L_done; -+ const Register end = count; ++int CompiledStaticCall::to_trampoline_stub_size() { ++ // Somewhat pessimistically, we count 4 instructions here (although ++ // there are only 3) because we sometimes emit an alignment nop. ++ // Trampoline stubs are always word aligned. ++ return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size; ++} + -+ __ beqz(count, L_done); // zero count - nothing to do -+ // end = start + count << LogBytesPerHeapOop -+ __ shadd(end, count, start, count, LogBytesPerHeapOop); -+ __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive ++// Relocation entries for call stub, compiled java to interpreter. ++int CompiledStaticCall::reloc_to_interp_stub() { ++ return 4; // 3 in emit_to_interp_stub + 1 in emit_call ++} + -+ __ srli(start, start, CardTable::card_shift); -+ __ srli(end, end, CardTable::card_shift); -+ __ sub(count, end, start); // number of bytes to copy ++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { ++ address stub = find_stub(); ++ guarantee(stub != NULL, "stub not found"); + -+ __ load_byte_map_base(tmp); -+ __ add(start, start, tmp); -+ if (ct->scanned_concurrently()) { -+ __ membar(MacroAssembler::StoreStore); ++ if (TraceICs) { ++ ResourceMark rm; ++ tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", ++ p2i(instruction_address()), ++ callee->name_and_sig_as_C_string()); + } + -+ __ bind(L_loop); -+ __ add(tmp, start, count); -+ __ sb(zr, Address(tmp)); -+ __ sub(count, count, 1); -+ __ bgez(count, L_loop); -+ __ bind(L_done); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder ++ = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); ++#ifdef ASSERT ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ ++ verify_mt_safe(callee, entry, method_holder, jump); ++#endif ++ // Update stub. ++ method_holder->set_data((intptr_t)callee()); ++ NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry); ++ ICache::invalidate_range(stub, to_interp_stub_size()); ++ // Update jump to call. ++ set_destination_mt_safe(stub); +} + -+void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { -+ bool in_heap = (decorators & IN_HEAP) != 0; -+ bool is_array = (decorators & IS_ARRAY) != 0; -+ bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; -+ bool precise = is_array || on_anonymous; ++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { ++ // Reset stub. ++ address stub = static_stub->addr(); ++ assert(stub != NULL, "stub not found"); ++ assert(CompiledICLocker::is_safe(stub), "mt unsafe call"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder ++ = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); ++ method_holder->set_data(0); ++ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); ++ jump->set_jump_destination((address)-1); ++} + -+ bool needs_post_barrier = val != noreg && in_heap; -+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg, noreg); -+ if (needs_post_barrier) { -+ // flatten object address if needed -+ if (!precise || dst.offset() == 0) { -+ store_check(masm, dst.base(), tmp3); -+ } else { -+ __ la(tmp3, dst); -+ store_check(masm, tmp3, t0); -+ } -+ } ++//----------------------------------------------------------------------------- ++// Non-product mode code ++#ifndef PRODUCT ++ ++void CompiledDirectStaticCall::verify() { ++ // Verify call. ++ _call->verify(); ++ _call->verify_alignment(); ++ ++ // Verify stub. ++ address stub = find_stub(); ++ assert(stub != NULL, "no stub found for static call"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder ++ = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); ++ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); ++ ++ // Verify state. ++ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); +} -diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp ++ ++#endif // !PRODUCT +diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp new file mode 100644 -index 000000000..a5b3f9fe8 +index 00000000000..bceadcc5dcc --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp -@@ -0,0 +1,43 @@ ++++ b/src/hotspot/cpu/riscv/copy_riscv.hpp +@@ -0,0 +1,136 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -13661,92 +13954,125 @@ index 000000000..a5b3f9fe8 + * + */ + -+#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP -+#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP ++#ifndef CPU_RISCV_COPY_RISCV_HPP ++#define CPU_RISCV_COPY_RISCV_HPP + -+#include "asm/macroAssembler.hpp" -+#include "gc/shared/modRefBarrierSetAssembler.hpp" ++#include OS_CPU_HEADER(copy) + -+class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { -+protected: -+ void store_check(MacroAssembler* masm, Register obj, Register tmp); ++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { ++ julong* to = (julong*) tohw; ++ julong v = ((julong) value << 32) | value; ++ while (count-- > 0) { ++ *to++ = v; ++ } ++} + -+ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register start, Register count, Register tmp, RegSet saved_regs); -+ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); ++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { ++ pd_fill_to_words(tohw, count, value); ++} + -+}; ++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { ++ (void)memset(to, value, count); ++} + -+#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp -new file mode 100644 -index 000000000..b82275297 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp -@@ -0,0 +1,54 @@ -+/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++static void pd_zero_to_words(HeapWord* tohw, size_t count) { ++ pd_fill_to_words(tohw, count, 0); ++} + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "gc/shared/modRefBarrierSetAssembler.hpp" ++static void pd_zero_to_bytes(void* to, size_t count) { ++ (void)memset(to, 0, count); ++} + -+#define __ masm-> ++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ (void)memmove(to, from, count * HeapWordSize); ++} + -+void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register src, Register dst, Register count, RegSet saved_regs) { -+ if (is_oop) { -+ gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs); ++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; // fall through ++ case 7: to[6] = from[6]; // fall through ++ case 6: to[5] = from[5]; // fall through ++ case 5: to[4] = from[4]; // fall through ++ case 4: to[3] = from[3]; // fall through ++ case 3: to[2] = from[2]; // fall through ++ case 2: to[1] = from[1]; // fall through ++ case 1: to[0] = from[0]; // fall through ++ case 0: break; ++ default: ++ memcpy(to, from, count * HeapWordSize); ++ break; + } +} + -+void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register start, Register count, Register tmp, -+ RegSet saved_regs) { -+ if (is_oop) { -+ gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs); -+ } ++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { ++ shared_disjoint_words_atomic(from, to, count); +} + -+void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { -+ if (type == T_OBJECT || type == T_ARRAY) { -+ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); -+ } else { -+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); -+ } ++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_words(from, to, count); +} -diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp -new file mode 100644 -index 000000000..df206cc87 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp -@@ -0,0 +1,55 @@ -+/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ ++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words(from, to, count); ++} ++ ++static void pd_conjoint_bytes(const void* from, void* to, size_t count) { ++ (void)memmove(to, from, count); ++} ++ ++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { ++ pd_conjoint_bytes(from, to, count); ++} ++ ++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { ++ _Copy_conjoint_jshorts_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { ++ _Copy_conjoint_jints_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { ++ _Copy_conjoint_jlongs_atomic(from, to, count); ++} ++ ++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size."); ++ _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); ++} ++ ++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_bytes(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jshorts(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jints(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jlongs(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { ++ assert(!UseCompressedOops, "foo!"); ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); ++ _Copy_arrayof_conjoint_jlongs(from, to, count); ++} ++ ++#endif // CPU_RISCV_COPY_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp +new file mode 100644 +index 00000000000..b0e5560c906 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp +@@ -0,0 +1,58 @@ ++/* ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -13770,44 +14096,46 @@ index 000000000..df206cc87 + * + */ + -+#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP -+#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP ++#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP ++#define CPU_RISCV_DISASSEMBLER_RISCV_HPP + -+#include "asm/macroAssembler.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" ++static int pd_instruction_alignment() { ++ return 1; ++} + -+// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other -+// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected -+// accesses, which are overridden in the concrete BarrierSetAssembler. ++static const char* pd_cpu_opts() { ++ return ""; ++} + -+class ModRefBarrierSetAssembler: public BarrierSetAssembler { -+protected: -+ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register addr, Register count, RegSet saved_regs) {} -+ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register start, Register count, Register tmp, RegSet saved_regs) {} ++// Returns address of n-th instruction preceding addr, ++// NULL if no preceding instruction can be found. ++// On riscv, we assume a constant instruction length. ++// It might be beneficial to check "is_readable" as we do on ppc and s390. ++static address find_prev_instr(address addr, int n_instr) { ++ return addr - Assembler::instruction_size * n_instr; ++} + -+ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) = 0; ++// special-case instruction decoding. ++// There may be cases where the binutils disassembler doesn't do ++// the perfect job. In those cases, decode_instruction0 may kick in ++// and do it right. ++// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)" ++static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) { ++ return here; ++} + -+public: -+ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register src, Register dst, Register count, RegSet saved_regs); -+ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register start, Register count, Register tmp, RegSet saved_regs); -+ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); -+}; ++// platform-specific instruction annotations (like value of loaded constants) ++static void annotate(address pc, outputStream* st) {} + -+#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp ++#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp new file mode 100644 -index 000000000..6657f1be0 +index 00000000000..5c700be9c91 --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp -@@ -0,0 +1,124 @@ ++++ b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp +@@ -0,0 +1,44 @@ +/* -+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -13832,113 +14160,72 @@ index 000000000..6657f1be0 + */ + +#include "precompiled.hpp" -+#include "c1/c1_LIRAssembler.hpp" -+#include "c1/c1_MacroAssembler.hpp" -+#include "gc/shenandoah/shenandoahBarrierSet.hpp" -+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" -+ -+#define __ masm->masm()-> -+ -+void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) { -+ Register addr = _addr->as_register_lo(); -+ Register newval = _new_value->as_register(); -+ Register cmpval = _cmp_value->as_register(); -+ Register tmp1 = _tmp1->as_register(); -+ Register tmp2 = _tmp2->as_register(); -+ Register result = result_opr()->as_register(); -+ -+ ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1); -+ -+ if (UseCompressedOops) { -+ __ encode_heap_oop(tmp1, cmpval); -+ cmpval = tmp1; -+ __ encode_heap_oop(tmp2, newval); -+ newval = tmp2; -+ } ++#include "prims/foreign_globals.hpp" ++#include "utilities/debug.hpp" + -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq, -+ /* release */ Assembler::rl, /* is_cae */ false, result); -+ if (UseBarriersForVolatile) { -+ // The membar here is necessary to prevent reordering between the -+ // release store in the CAS above and a subsequent volatile load. -+ // However for !UseBarriersForVolatile, C1 inserts a full barrier before -+ // volatile loads which means we don't need an additional barrier -+ // here (see LIRGenerator::volatile_field_load()). -+ __ membar(MacroAssembler::AnyAny); -+ } ++// Stubbed out, implement later ++const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const { ++ Unimplemented(); ++ return {}; +} + -+#undef __ -+ -+#ifdef ASSERT -+#define __ gen->lir(__FILE__, __LINE__)-> -+#else -+#define __ gen->lir()-> -+#endif -+ -+LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) { -+ BasicType bt = access.type(); -+ if (access.is_oop()) { -+ LIRGenerator *gen = access.gen(); -+ if (ShenandoahSATBBarrier) { -+ pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(), -+ LIR_OprFact::illegalOpr /* pre_val */); -+ } -+ if (ShenandoahCASBarrier) { -+ cmp_value.load_item(); -+ new_value.load_item(); -+ -+ LIR_Opr tmp1 = gen->new_register(T_OBJECT); -+ LIR_Opr tmp2 = gen->new_register(T_OBJECT); -+ LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base(); -+ LIR_Opr result = gen->new_register(T_INT); -+ -+ __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result)); -+ return result; -+ } -+ } -+ return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value); ++const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const { ++ Unimplemented(); ++ return {}; +} + -+LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) { -+ LIRGenerator* gen = access.gen(); -+ BasicType type = access.type(); -+ -+ LIR_Opr result = gen->new_register(type); -+ value.load_item(); -+ LIR_Opr value_opr = value.result(); -+ -+ if (access.is_oop()) { -+ value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators()); -+ } ++const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const { ++ ShouldNotCallThis(); ++ return {}; ++} +diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp +new file mode 100644 +index 00000000000..3ac89752c27 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp +@@ -0,0 +1,32 @@ ++/* ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type"); -+ LIR_Opr tmp = gen->new_register(T_INT); -+ __ xchg(access.resolved_addr(), value_opr, result, tmp); ++#ifndef CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP ++#define CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP + -+ if (access.is_oop()) { -+ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0)); -+ LIR_Opr tmp_opr = gen->new_register(type); -+ __ move(result, tmp_opr); -+ result = tmp_opr; -+ if (ShenandoahSATBBarrier) { -+ pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr, -+ result /* pre_val */); -+ } -+ } ++class ABIDescriptor {}; ++class BufferLayout {}; + -+ return result; -+} -diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp ++#endif // CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp new file mode 100644 -index 000000000..1bc01e454 +index 00000000000..6e38960598a --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp -@@ -0,0 +1,743 @@ ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -0,0 +1,697 @@ +/* -+ * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13962,731 +14249,684 @@ index 000000000..1bc01e454 + */ + +#include "precompiled.hpp" -+#include "gc/shenandoah/shenandoahBarrierSet.hpp" -+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -+#include "gc/shenandoah/shenandoahForwarding.hpp" -+#include "gc/shenandoah/shenandoahHeap.hpp" -+#include "gc/shenandoah/shenandoahHeapRegion.hpp" -+#include "gc/shenandoah/shenandoahRuntime.hpp" -+#include "gc/shenandoah/shenandoahThreadLocalData.hpp" -+#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" ++#include "compiler/oopMap.hpp" +#include "interpreter/interpreter.hpp" -+#include "interpreter/interp_masm.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/thread.hpp" ++#include "memory/resourceArea.hpp" ++#include "memory/universe.hpp" ++#include "oops/markWord.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/monitorChunk.hpp" ++#include "runtime/os.inline.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stackWatermarkSet.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_riscv.inline.hpp" +#ifdef COMPILER1 -+#include "c1/c1_LIRAssembler.hpp" -+#include "c1/c1_MacroAssembler.hpp" -+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "runtime/vframeArray.hpp" +#endif + -+#define __ masm-> ++#ifdef ASSERT ++void RegisterMap::check_location_valid() { ++} ++#endif + -+address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; + -+void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register src, Register dst, Register count, RegSet saved_regs) { -+ if (is_oop) { -+ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; -+ if ((ShenandoahSATBBarrier && !dest_uninitialized) || -+ ShenandoahIUBarrier || ShenandoahLoadRefBarrier) { -+ Label done; ++// Profiling/safepoint support + -+ // Avoid calling runtime if count == 0 -+ __ beqz(count, done); ++bool frame::safe_for_sender(JavaThread *thread) { ++ address addr_sp = (address)_sp; ++ address addr_fp = (address)_fp; ++ address unextended_sp = (address)_unextended_sp; + -+ // Is GC active? -+ Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); -+ assert_different_registers(src, dst, count, t0); ++ // consider stack guards when trying to determine "safe" stack pointers ++ // sp must be within the usable part of the stack (not in guards) ++ if (!thread->is_in_usable_stack(addr_sp)) { ++ return false; ++ } + -+ __ lbu(t0, gc_state); -+ if (ShenandoahSATBBarrier && dest_uninitialized) { -+ __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED); -+ __ beqz(t0, done); -+ } else { -+ __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING); -+ __ beqz(t0, done); -+ } ++ // When we are running interpreted code the machine stack pointer, SP, is ++ // set low enough so that the Java expression stack can grow and shrink ++ // without ever exceeding the machine stack bounds. So, ESP >= SP. + -+ __ push_reg(saved_regs, sp); -+ if (UseCompressedOops) { -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), -+ src, dst, count); -+ } else { -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count); -+ } -+ __ pop_reg(saved_regs, sp); -+ __ bind(done); -+ } -+ } -+} ++ // When we call out of an interpreted method, SP is incremented so that ++ // the space between SP and ESP is removed. The SP saved in the callee's ++ // frame is the SP *before* this increment. So, when we walk a stack of ++ // interpreter frames the sender's SP saved in a frame might be less than ++ // the SP at the point of call. + -+void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, -+ Register obj, -+ Register pre_val, -+ Register thread, -+ Register tmp, -+ bool tosca_live, -+ bool expand_call) { -+ if (ShenandoahSATBBarrier) { -+ satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); ++ // So unextended sp must be within the stack but we need not to check ++ // that unextended sp >= sp ++ ++ if (!thread->is_in_full_stack_checked(unextended_sp)) { ++ return false; + } -+} + -+void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, -+ Register obj, -+ Register pre_val, -+ Register thread, -+ Register tmp, -+ bool tosca_live, -+ bool expand_call) { -+ // If expand_call is true then we expand the call_VM_leaf macro -+ // directly to skip generating the check by -+ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. -+ assert(thread == xthread, "must be"); ++ // an fp must be within the stack and above (but not equal) sp ++ // second evaluation on fp+ is added to handle situation where fp is -1 ++ bool fp_safe = thread->is_in_stack_range_excl(addr_fp, addr_sp) && ++ thread->is_in_full_stack_checked(addr_fp + (return_addr_offset * sizeof(void*))); + -+ Label done; -+ Label runtime; ++ // We know sp/unextended_sp are safe only fp is questionable here + -+ assert_different_registers(obj, pre_val, tmp, t0); -+ assert(pre_val != noreg && tmp != noreg, "expecting a register"); ++ // If the current frame is known to the code cache then we can attempt to ++ // to construct the sender and do some validation of it. This goes a long way ++ // toward eliminating issues when we get in frame construction code + -+ Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); -+ Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); -+ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); ++ if (_cb != NULL) { + -+ // Is marking active? -+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { -+ __ lwu(tmp, in_progress); -+ } else { -+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); -+ __ lbu(tmp, in_progress); -+ } -+ __ beqz(tmp, done); ++ // First check if frame is complete and tester is reliable ++ // Unfortunately we can only check frame complete for runtime stubs and nmethod ++ // other generic buffer blobs are more problematic so we just assume they are ++ // ok. adapter blobs never have a frame complete and are never ok. + -+ // Do we need to load the previous value? -+ if (obj != noreg) { -+ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); -+ } ++ if (!_cb->is_frame_complete_at(_pc)) { ++ if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { ++ return false; ++ } ++ } + -+ // Is the previous value null? -+ __ beqz(pre_val, done); ++ // Could just be some random pointer within the codeBlob ++ if (!_cb->code_contains(_pc)) { ++ return false; ++ } + -+ // Can we store original value in the thread's buffer? -+ // Is index == 0? -+ // (The index field is typed as size_t.) -+ __ ld(tmp, index); // tmp := *index_adr -+ __ beqz(tmp, runtime); // tmp == 0? If yes, goto runtime ++ // Entry frame checks ++ if (is_entry_frame()) { ++ // an entry frame must have a valid fp. ++ return fp_safe && is_entry_frame_valid(thread); ++ } + -+ __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize -+ __ sd(tmp, index); // *index_adr := tmp -+ __ ld(t0, buffer); -+ __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr ++ intptr_t* sender_sp = NULL; ++ intptr_t* sender_unextended_sp = NULL; ++ address sender_pc = NULL; ++ intptr_t* saved_fp = NULL; + -+ // Record the previous value -+ __ sd(pre_val, Address(tmp, 0)); -+ __ j(done); ++ if (is_interpreted_frame()) { ++ // fp must be safe ++ if (!fp_safe) { ++ return false; ++ } + -+ __ bind(runtime); -+ // save the live input values -+ RegSet saved = RegSet::of(pre_val); -+ if (tosca_live) saved += RegSet::of(x10); -+ if (obj != noreg) saved += RegSet::of(obj); ++ sender_pc = (address)this->fp()[return_addr_offset]; ++ // for interpreted frames, the value below is the sender "raw" sp, ++ // which can be different from the sender unextended sp (the sp seen ++ // by the sender) because of current frame local variables ++ sender_sp = (intptr_t*) addr_at(sender_sp_offset); ++ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; ++ saved_fp = (intptr_t*) this->fp()[link_offset]; ++ } else { ++ // must be some sort of compiled/runtime frame ++ // fp does not have to be safe (although it could be check for c1?) + -+ __ push_reg(saved, sp); ++ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc ++ if (_cb->frame_size() <= 0) { ++ return false; ++ } + -+ // Calling the runtime using the regular call_VM_leaf mechanism generates -+ // code (generated by InterpreterMacroAssember::call_VM_leaf_base) -+ // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL. -+ // -+ // If we care generating the pre-barrier without a frame (e.g. in the -+ // intrinsified Reference.get() routine) then ebp might be pointing to -+ // the caller frame and so this check will most likely fail at runtime. -+ // -+ // Expanding the call directly bypasses the generation of the check. -+ // So when we do not have have a full interpreter frame on the stack -+ // expand_call should be passed true. -+ if (expand_call) { -+ assert(pre_val != c_rarg1, "smashed arg"); -+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); -+ } else { -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); -+ } ++ sender_sp = _unextended_sp + _cb->frame_size(); ++ // Is sender_sp safe? ++ if (!thread->is_in_full_stack_checked((address)sender_sp)) { ++ return false; ++ } + -+ __ pop_reg(saved, sp); ++ sender_unextended_sp = sender_sp; ++ sender_pc = (address) *(sender_sp - 1); ++ saved_fp = (intptr_t*) *(sender_sp - 2); ++ } + -+ __ bind(done); -+} + -+void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { -+ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); ++ // If the potential sender is the interpreter then we can do some more checking ++ if (Interpreter::contains(sender_pc)) { + -+ Label is_null; -+ __ beqz(dst, is_null); -+ resolve_forward_pointer_not_null(masm, dst, tmp); -+ __ bind(is_null); -+} ++ // fp is always saved in a recognizable place in any code we generate. However ++ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp ++ // is really a frame pointer. ++ if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { ++ return false; ++ } + -+// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely -+// passed in. -+void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { -+ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); -+ // The below loads the mark word, checks if the lowest two bits are -+ // set, and if so, clear the lowest two bits and copy the result -+ // to dst. Otherwise it leaves dst alone. -+ // Implementing this is surprisingly awkward. I do it here by: -+ // - Inverting the mark word -+ // - Test lowest two bits == 0 -+ // - If so, set the lowest two bits -+ // - Invert the result back, and copy to dst -+ RegSet savedRegs = RegSet::of(t2); -+ bool borrow_reg = (tmp == noreg); -+ if (borrow_reg) { -+ // No free registers available. Make one useful. -+ tmp = t0; -+ if (tmp == dst) { -+ tmp = t1; ++ // construct the potential sender ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ return sender.is_interpreted_frame_valid(thread); + } -+ savedRegs += RegSet::of(tmp); -+ } + -+ assert_different_registers(tmp, dst, t2); -+ __ push_reg(savedRegs, sp); ++ // We must always be able to find a recognizable pc ++ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); ++ if (sender_pc == NULL || sender_blob == NULL) { ++ return false; ++ } + -+ Label done; -+ __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); -+ __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1 -+ __ andi(t2, tmp, markOopDesc::lock_mask_in_place); -+ __ bnez(t2, done); -+ __ ori(tmp, tmp, markOopDesc::marked_value); -+ __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1 -+ __ bind(done); ++ // Could be a zombie method ++ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { ++ return false; ++ } + -+ __ pop_reg(savedRegs, sp); -+} ++ // Could just be some random pointer within the codeBlob ++ if (!sender_blob->code_contains(sender_pc)) { ++ return false; ++ } + -+void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, -+ Register dst, Address load_addr) { -+ assert(ShenandoahLoadRefBarrier, "Should be enabled"); -+ assert(dst != t1 && load_addr.base() != t1, "need t1"); -+ assert_different_registers(load_addr.base(), t1, t2); ++ // We should never be able to see an adapter if the current frame is something from code cache ++ if (sender_blob->is_adapter_blob()) { ++ return false; ++ } + -+ Label done; -+ __ enter(); -+ Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); -+ __ lbu(t1, gc_state); ++ // Could be the call_stub ++ if (StubRoutines::returns_to_call_stub(sender_pc)) { ++ if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { ++ return false; ++ } + -+ // Check for heap stability -+ __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED); -+ __ beqz(t1, done); -+ -+ // use x11 for load address -+ Register result_dst = dst; -+ if (dst == x11) { -+ __ mv(t1, dst); -+ dst = t1; -+ } -+ -+ // Save x10 and x11, unless it is an output register -+ RegSet to_save = RegSet::of(x10, x11) - result_dst; -+ __ push_reg(to_save, sp); -+ __ la(x11, load_addr); -+ __ mv(x10, dst); -+ -+ __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); -+ -+ __ mv(result_dst, x10); -+ __ pop_reg(to_save, sp); -+ -+ __ bind(done); -+ __ leave(); -+} -+ -+void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) { -+ if (ShenandoahIUBarrier) { -+ __ push_call_clobbered_registers(); -+ satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false); -+ __ pop_call_clobbered_registers(); -+ } -+} ++ // construct the potential sender ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + -+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) { -+ if (ShenandoahLoadRefBarrier) { -+ Label is_null; -+ __ beqz(dst, is_null); -+ load_reference_barrier_not_null(masm, dst, load_addr); -+ __ bind(is_null); -+ } -+} ++ // Validate the JavaCallWrapper an entry frame must have ++ address jcw = (address)sender.entry_frame_call_wrapper(); + -+// -+// Arguments: -+// -+// Inputs: -+// src: oop location to load from, might be clobbered -+// -+// Output: -+// dst: oop loaded from src location -+// -+// Kill: -+// x30 (tmp reg) -+// -+// Alias: -+// dst: x30 (might use x30 as temporary output register to avoid clobbering src) -+// -+void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, -+ DecoratorSet decorators, -+ BasicType type, -+ Register dst, -+ Address src, -+ Register tmp1, -+ Register tmp_thread) { -+ // 1: non-reference load, no additional barrier is needed -+ if (!is_reference_type(type)) { -+ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); -+ return; -+ } ++ bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp()); + -+ // 2: load a reference from src location and apply LRB if needed -+ if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { -+ guarantee(dst != x30 && src.base() != x30, "load_at need x30"); -+ bool ist5 = (dst == src.base()); -+ if (ist5) { -+ __ push_reg(RegSet::of(x30), sp); ++ return jcw_safe; + } -+ Register result_dst = dst; + -+ // Preserve src location for LRB -+ if (dst == src.base()) { -+ dst = x30; ++ CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); ++ if (nm != NULL) { ++ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || ++ nm->method()->is_method_handle_intrinsic()) { ++ return false; ++ } + } -+ assert_different_registers(dst, src.base()); -+ -+ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); -+ -+ load_reference_barrier(masm, dst, src); + -+ if (dst != result_dst) { -+ __ mv(result_dst, dst); -+ dst = result_dst; ++ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size ++ // because the return address counts against the callee's frame. ++ if (sender_blob->frame_size() <= 0) { ++ assert(!sender_blob->is_compiled(), "should count return address at least"); ++ return false; + } + -+ if (ist5) { -+ __ pop_reg(RegSet::of(x30), sp); ++ // We should never be able to see anything here except an nmethod. If something in the ++ // code cache (current frame) is called by an entity within the code cache that entity ++ // should not be anything but the call stub (already covered), the interpreter (already covered) ++ // or an nmethod. ++ if (!sender_blob->is_compiled()) { ++ return false; + } -+ } else { -+ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); -+ } + -+ // 3: apply keep-alive barrier if needed -+ if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { -+ __ enter(); -+ __ push_call_clobbered_registers(); -+ satb_write_barrier_pre(masm /* masm */, -+ noreg /* obj */, -+ dst /* pre_val */, -+ xthread /* thread */, -+ tmp1 /* tmp */, -+ true /* tosca_live */, -+ true /* expand_call */); -+ __ pop_call_clobbered_registers(); -+ __ leave(); -+ } -+} ++ // Could put some more validation for the potential non-interpreted sender ++ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... + -+void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { -+ bool on_oop = is_reference_type(type); -+ if (!on_oop) { -+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); -+ return; ++ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb ++ ++ // We've validated the potential sender that would be created ++ return true; + } + -+ // flatten object address if needed -+ if (dst.offset() == 0) { -+ if (dst.base() != tmp3) { -+ __ mv(tmp3, dst.base()); -+ } -+ } else { -+ __ la(tmp3, dst); ++ // Must be native-compiled frame. Since sender will try and use fp to find ++ // linkages it must be safe ++ if (!fp_safe) { ++ return false; + } + -+ shenandoah_write_barrier_pre(masm, -+ tmp3 /* obj */, -+ tmp2 /* pre_val */, -+ xthread /* thread */, -+ tmp1 /* tmp */, -+ val != noreg /* tosca_live */, -+ false /* expand_call */); ++ // Will the pc we fetch be non-zero (which we'll find at the oldest frame) ++ if ((address)this->fp()[return_addr_offset] == NULL) { return false; } + -+ if (val == noreg) { -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg); ++ return true; ++} ++ ++void frame::patch_pc(Thread* thread, address pc) { ++ assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); ++ address* pc_addr = &(((address*) sp())[-1]); ++ if (TracePcPatching) { ++ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", ++ p2i(pc_addr), p2i(*pc_addr), p2i(pc)); ++ } ++ // Either the return address is the original one or we are going to ++ // patch in the same address that's already there. ++ assert(_pc == *pc_addr || pc == *pc_addr, "must be"); ++ *pc_addr = pc; ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ assert(original_pc == _pc, "expected original PC to be stored before patching"); ++ _deopt_state = is_deoptimized; ++ // leave _pc as is + } else { -+ iu_barrier(masm, val, tmp1); -+ // G1 barrier needs uncompressed oop for region cross check. -+ Register new_val = val; -+ if (UseCompressedOops) { -+ new_val = t1; -+ __ mv(new_val, val); -+ } -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg); ++ _deopt_state = not_deoptimized; ++ _pc = pc; + } +} + -+void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, -+ Register obj, Register tmp, Label& slowpath) { -+ Label done; -+ // Resolve jobject -+ BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); -+ -+ // Check for null. -+ __ beqz(obj, done); ++bool frame::is_interpreted_frame() const { ++ return Interpreter::contains(pc()); ++} + -+ assert(obj != t1, "need t1"); -+ Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); -+ __ lbu(t1, gc_state); ++int frame::frame_size(RegisterMap* map) const { ++ frame sender = this->sender(map); ++ return sender.sp() - sp(); ++} + -+ // Check for heap in evacuation phase -+ __ andi(t0, t1, ShenandoahHeap::EVACUATION); -+ __ bnez(t0, slowpath); ++intptr_t* frame::entry_frame_argument_at(int offset) const { ++ // convert offset to index to deal with tsi ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ // Entry frame's arguments are always in relation to unextended_sp() ++ return &unextended_sp()[index]; ++} + -+ __ bind(done); ++// sender_sp ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ return (intptr_t*) at(interpreter_frame_sender_sp_offset); +} + -+// Special Shenandoah CAS implementation that handles false negatives due -+// to concurrent evacuation. The service is more complex than a -+// traditional CAS operation because the CAS operation is intended to -+// succeed if the reference at addr exactly matches expected or if the -+// reference at addr holds a pointer to a from-space object that has -+// been relocated to the location named by expected. There are two -+// races that must be addressed: -+// a) A parallel thread may mutate the contents of addr so that it points -+// to a different object. In this case, the CAS operation should fail. -+// b) A parallel thread may heal the contents of addr, replacing a -+// from-space pointer held in addr with the to-space pointer -+// representing the new location of the object. -+// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL -+// or it refers to an object that is not being evacuated out of -+// from-space, or it refers to the to-space version of an object that -+// is being evacuated out of from-space. -+// -+// By default, this operation implements sequential consistency and the -+// value held in the result register following execution of the -+// generated code sequence is 0 to indicate failure of CAS, non-zero -+// to indicate success. Arguments support variations on this theme: -+// -+// acquire: Allow relaxation of the memory ordering on CAS from -+// sequential consistency. This can be useful when -+// sequential consistency is not required, such as when -+// another sequentially consistent operation is already -+// present in the execution stream. If acquire, successful -+// execution has the side effect of assuring that memory -+// values updated by other threads and "released" will be -+// visible to any read operations perfomed by this thread -+// which follow this operation in program order. This is a -+// special optimization that should not be enabled by default. -+// release: Allow relaxation of the memory ordering on CAS from -+// sequential consistency. This can be useful when -+// sequential consistency is not required, such as when -+// another sequentially consistent operation is already -+// present in the execution stream. If release, successful -+// completion of this operation has the side effect of -+// assuring that all writes to memory performed by this -+// thread that precede this operation in program order are -+// visible to all other threads that subsequently "acquire" -+// before reading the respective memory values. This is a -+// special optimization that should not be enabled by default. -+// is_cae: This turns CAS (compare and swap) into CAE (compare and -+// exchange). This HotSpot convention is that CAE makes -+// available to the caller the "failure witness", which is -+// the value that was stored in memory which did not match -+// the expected value. If is_cae, the result is the value -+// most recently fetched from addr rather than a boolean -+// success indicator. -+// -+// Clobbers t0, t1 -+void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, -+ Register addr, -+ Register expected, -+ Register new_val, -+ Assembler::Aqrl acquire, -+ Assembler::Aqrl release, -+ bool is_cae, -+ Register result) { -+ bool is_narrow = UseCompressedOops; -+ Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64; ++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); ++} + -+ assert_different_registers(addr, expected, t0, t1); -+ assert_different_registers(addr, new_val, t0, t1); + -+ Label retry, success, fail, done; ++// monitor elements + -+ __ bind(retry); ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); ++} + -+ // Step1: Try to CAS. -+ __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1); ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); ++ // make sure the pointer points inside the frame ++ assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer"); ++ assert((intptr_t*) result < fp(), "monitor end should be strictly below the frame pointer"); ++ return result; ++} + -+ // If success, then we are done. -+ __ beq(expected, t1, success); ++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { ++ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; ++} + -+ // Step2: CAS failed, check the forwared pointer. -+ __ mv(t0, t1); ++// Used by template based interpreter deoptimization ++void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) { ++ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp; ++} + -+ if (is_narrow) { -+ __ decode_heap_oop(t0, t0); ++frame frame::sender_for_entry_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); ++ assert(!entry_frame_is_first(), "next Java fp must be non zero"); ++ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); ++ // Since we are walking the stack now this nested anchor is obviously walkable ++ // even if it wasn't when it was stacked. ++ if (!jfa->walkable()) { ++ // Capture _last_Java_pc (if needed) and mark anchor walkable. ++ jfa->capture_last_Java_pc(); + } -+ resolve_forward_pointer(masm, t0); -+ -+ __ encode_heap_oop(t0, t0); -+ -+ // Report failure when the forwarded oop was not expected. -+ __ bne(t0, expected, fail); -+ -+ // Step 3: CAS again using the forwarded oop. -+ __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0); -+ -+ // Retry when failed. -+ __ bne(t0, t1, retry); ++ map->clear(); ++ assert(map->include_argument_oops(), "should be set by clear"); ++ vmassert(jfa->last_Java_pc() != NULL, "not walkable"); ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); ++ return fr; ++} + -+ __ bind(success); -+ if (is_cae) { -+ __ mv(result, expected); -+ } else { -+ __ mv(result, 1); -+ } -+ __ j(done); ++OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { ++ ShouldNotCallThis(); ++ return nullptr; ++} + -+ __ bind(fail); -+ if (is_cae) { -+ __ mv(result, t0); -+ } else { -+ __ mv(result, zr); -+ } ++bool frame::optimized_entry_frame_is_first() const { ++ ShouldNotCallThis(); ++ return false; ++} + -+ __ bind(done); ++frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const { ++ ShouldNotCallThis(); ++ return {}; +} + -+#undef __ ++//------------------------------------------------------------------------------ ++// frame::verify_deopt_original_pc ++// ++// Verifies the calculated original PC of a deoptimization PC for the ++// given unextended SP. ++#ifdef ASSERT ++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { ++ frame fr; + -+#ifdef COMPILER1 ++ // This is ugly but it's better than to change {get,set}_original_pc ++ // to take an SP value as argument. And it's only a debugging ++ // method anyway. ++ fr._unextended_sp = unextended_sp; + -+#define __ ce->masm()-> ++ assert_cond(nm != NULL); ++ address original_pc = nm->get_original_pc(&fr); ++ assert(nm->insts_contains_inclusive(original_pc), ++ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); ++} ++#endif + -+void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { -+ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); -+ // At this point we know that marking is in progress. -+ // If do_load() is true then we have to emit the -+ // load of the previous value; otherwise it has already -+ // been loaded into _pre_val. -+ __ bind(*stub->entry()); ++//------------------------------------------------------------------------------ ++// frame::adjust_unextended_sp ++void frame::adjust_unextended_sp() { ++ // On riscv, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. + -+ assert(stub->pre_val()->is_register(), "Precondition."); ++ if (_cb != NULL) { ++ CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); ++ if (sender_cm != NULL) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (sender_cm->is_deopt_entry(_pc) || ++ sender_cm->is_deopt_mh_entry(_pc)) { ++ DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); ++ } ++ } ++ } ++} + -+ Register pre_val_reg = stub->pre_val()->as_register(); ++//------------------------------------------------------------------------------ ++// frame::update_map_with_saved_link ++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { ++ // The interpreter and compiler(s) always save fp in a known ++ // location on entry. We must record where that location is ++ // so that if fp was live on callout from c2 we can find ++ // the saved copy no matter what it called. + -+ if (stub->do_load()) { -+ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), -+ stub->info(), false /* wide */, false /* unaligned */); -+ } -+ __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); -+ ce->store_parameter(stub->pre_val()->as_register(), 0); -+ __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); -+ __ j(*stub->continuation()); ++ // Since the interpreter always saves fp if we record where it is then ++ // we don't have to always save fp on entry and exit to c2 compiled ++ // code, on entry will be enough. ++ assert(map != NULL, "map must be set"); ++ map->set_location(::fp->as_VMReg(), (address) link_addr); ++ // this is weird "H" ought to be at a higher address however the ++ // oopMaps seems to have the "H" regs at the same address and the ++ // vanilla register. ++ map->set_location(::fp->as_VMReg()->next(), (address) link_addr); +} + -+void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, -+ ShenandoahLoadReferenceBarrierStub* stub) { -+ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); -+ __ bind(*stub->entry()); + -+ Register obj = stub->obj()->as_register(); -+ Register res = stub->result()->as_register(); -+ Register addr = stub->addr()->as_pointer_register(); -+ Register tmp1 = stub->tmp1()->as_register(); -+ Register tmp2 = stub->tmp2()->as_register(); ++//------------------------------------------------------------------------------ ++// frame::sender_for_interpreter_frame ++frame frame::sender_for_interpreter_frame(RegisterMap* map) const { ++ // SP is the raw SP from the sender after adapter or interpreter ++ // extension. ++ intptr_t* sender_sp = this->sender_sp(); + -+ assert(res == x10, "result must arrive in x10"); -+ assert_different_registers(tmp1, tmp2, t0); ++ // This is the sp before any possible extension (adapter/locals). ++ intptr_t* unextended_sp = interpreter_frame_sender_sp(); + -+ if (res != obj) { -+ __ mv(res, obj); ++#ifdef COMPILER2 ++ assert(map != NULL, "map must be set"); ++ if (map->update_map()) { ++ update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); + } ++#endif // COMPILER2 + -+ // Check for null. -+ __ beqz(res, *stub->continuation(), /* is_far */ true); ++ return frame(sender_sp, unextended_sp, link(), sender_pc()); ++} + -+ // Check for object in cset. -+ __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); -+ __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); -+ __ add(t0, tmp2, tmp1); -+ __ lb(tmp2, Address(t0)); -+ __ beqz(tmp2, *stub->continuation(), /* is_far */ true); + -+ // Check if object is already forwarded. -+ Label slow_path; -+ __ ld(tmp1, Address(res, oopDesc::mark_offset_in_bytes())); -+ __ xori(tmp1, tmp1, -1); -+ __ andi(t0, tmp1, markOopDesc::lock_mask_in_place); -+ __ bnez(t0, slow_path); ++//------------------------------------------------------------------------------ ++// frame::sender_for_compiled_frame ++frame frame::sender_for_compiled_frame(RegisterMap* map) const { ++ // we cannot rely upon the last fp having been saved to the thread ++ // in C2 code but it will have been pushed onto the stack. so we ++ // have to find it relative to the unextended sp + -+ // Decode forwarded object. -+ __ ori(tmp1, tmp1, markOopDesc::marked_value); -+ __ xori(res, tmp1, -1); -+ __ j(*stub->continuation()); ++ assert(_cb->frame_size() >= 0, "must have non-zero frame size"); ++ intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size(); ++ intptr_t* unextended_sp = l_sender_sp; + -+ __ bind(slow_path); -+ ce->store_parameter(res, 0); -+ ce->store_parameter(addr, 1); -+ __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); ++ // the return_address is always the word on the stack ++ address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset); + -+ __ j(*stub->continuation()); -+} ++ intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset); + -+#undef __ ++ assert(map != NULL, "map must be set"); ++ if (map->update_map()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); ++ if (_cb->oop_maps() != NULL) { ++ OopMapSet::update_register_map(this, map); ++ } + -+#define __ sasm-> ++ // Since the prolog does the save and restore of FP there is no ++ // oopmap for it so we must fill in its location as if there was ++ // an oopmap entry since if our caller was compiled code there ++ // could be live jvm state in it. ++ update_map_with_saved_link(map, saved_fp_addr); ++ } + -+void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { -+ __ prologue("shenandoah_pre_barrier", false); ++ return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc); ++} + -+ // arg0 : previous value of memory ++//------------------------------------------------------------------------------ ++// frame::sender_raw ++frame frame::sender_raw(RegisterMap* map) const { ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ assert(map != NULL, "map must be set"); ++ map->set_include_argument_oops(false); + -+ BarrierSet* bs = BarrierSet::barrier_set(); ++ if (is_entry_frame()) { ++ return sender_for_entry_frame(map); ++ } ++ if (is_interpreted_frame()) { ++ return sender_for_interpreter_frame(map); ++ } ++ assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); + -+ const Register pre_val = x10; -+ const Register thread = xthread; -+ const Register tmp = t0; ++ // This test looks odd: why is it not is_compiled_frame() ? That's ++ // because stubs also have OOP maps. ++ if (_cb != NULL) { ++ return sender_for_compiled_frame(map); ++ } + -+ Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); -+ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return frame(sender_sp(), link(), sender_pc()); ++} + -+ Label done; -+ Label runtime; ++frame frame::sender(RegisterMap* map) const { ++ frame result = sender_raw(map); + -+ // Is marking still active? -+ Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); -+ __ lb(tmp, gc_state); -+ __ andi(tmp, tmp, ShenandoahHeap::MARKING); -+ __ beqz(tmp, done); ++ if (map->process_frames()) { ++ StackWatermarkSet::on_iteration(map->thread(), result); ++ } + -+ // Can we store original value in the thread's buffer? -+ __ ld(tmp, queue_index); -+ __ beqz(tmp, runtime); ++ return result; ++} + -+ __ sub(tmp, tmp, wordSize); -+ __ sd(tmp, queue_index); -+ __ ld(t1, buffer); -+ __ add(tmp, tmp, t1); -+ __ load_parameter(0, t1); -+ __ sd(t1, Address(tmp, 0)); -+ __ j(done); ++bool frame::is_interpreted_frame_valid(JavaThread* thread) const { ++ assert(is_interpreted_frame(), "Not an interpreted frame"); ++ // These are reasonable sanity checks ++ if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (fp() + interpreter_frame_initial_sp_offset < sp()) { ++ return false; ++ } ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above ++ return false; ++ } + -+ __ bind(runtime); -+ __ push_call_clobbered_registers(); -+ __ load_parameter(0, pre_val); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); -+ __ pop_call_clobbered_registers(); -+ __ bind(done); ++ // do some validation of frame elements + -+ __ epilogue(); -+} ++ // first the method ++ Method* m = *interpreter_frame_method_addr(); ++ // validate the method we'd find in this potential sender ++ if (!Method::is_valid_method(m)) { ++ return false; ++ } + -+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { -+ __ prologue("shenandoah_load_reference_barrier", false); -+ // arg0 : object to be resolved ++ // stack frames shouldn't be much larger than max_stack elements ++ // this test requires the use of unextended_sp which is the sp as seen by ++ // the current frame, and not sp which is the "raw" pc which could point ++ // further because of local variables of the callee method inserted after ++ // method arguments ++ if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) { ++ return false; ++ } + -+ __ push_call_clobbered_registers(); -+ __ load_parameter(0, x10); -+ __ load_parameter(1, x11); -+ if (UseCompressedOops) { -+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); -+ } else { -+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); ++ // validate bci/bcx ++ address bcp = interpreter_frame_bcp(); ++ if (m->validate_bci_from_bcp(bcp) < 0) { ++ return false; + } -+ __ jalr(ra); -+ __ mv(t0, x10); -+ __ pop_call_clobbered_registers(); -+ __ mv(x10, t0); + -+ __ epilogue(); ++ // validate constantPoolCache* ++ ConstantPoolCache* cp = *interpreter_frame_cache_addr(); ++ if (MetaspaceObj::is_valid(cp) == false) { ++ return false; ++ } ++ ++ // validate locals ++ address locals = (address) *interpreter_frame_locals_addr(); ++ if (locals > thread->stack_base() || locals < (address) fp()) { ++ return false; ++ } ++ ++ // We'd have to be pretty unlucky to be mislead at this point ++ return true; +} + -+#undef __ ++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ Method* method = interpreter_frame_method(); ++ BasicType type = method->result_type(); + -+#endif // COMPILER1 ++ intptr_t* tos_addr = NULL; ++ if (method->is_native()) { ++ tos_addr = (intptr_t*)sp(); ++ if (type == T_FLOAT || type == T_DOUBLE) { ++ // This is because we do a push(ltos) after push(dtos) in generate_native_entry. ++ tos_addr += 2 * Interpreter::stackElementWords; ++ } ++ } else { ++ tos_addr = (intptr_t*)interpreter_frame_tos_address(); ++ } + -+address ShenandoahBarrierSetAssembler::shenandoah_lrb() { -+ assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); -+ return _shenandoah_lrb; ++ switch (type) { ++ case T_OBJECT : ++ case T_ARRAY : { ++ oop obj; ++ if (method->is_native()) { ++ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); ++ } else { ++ oop* obj_p = (oop*)tos_addr; ++ obj = (obj_p == NULL) ? (oop)NULL : *obj_p; ++ } ++ assert(Universe::is_in_heap_or_null(obj), "sanity check"); ++ *oop_result = obj; ++ break; ++ } ++ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; ++ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; ++ case T_CHAR : value_result->c = *(jchar*)tos_addr; break; ++ case T_SHORT : value_result->s = *(jshort*)tos_addr; break; ++ case T_INT : value_result->i = *(jint*)tos_addr; break; ++ case T_LONG : value_result->j = *(jlong*)tos_addr; break; ++ case T_FLOAT : { ++ value_result->f = *(jfloat*)tos_addr; ++ break; ++ } ++ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; ++ case T_VOID : /* Nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ return type; +} + -+#define __ cgen->assembler()-> + -+// Shenandoah load reference barrier. -+// -+// Input: -+// x10: OOP to evacuate. Not null. -+// x11: load address -+// -+// Output: -+// x10: Pointer to evacuated OOP. -+// -+// Trash t0 t1 Preserve everything else. -+address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { -+ __ align(6); -+ StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); -+ address start = __ pc(); ++intptr_t* frame::interpreter_frame_tos_at(jint offset) const { ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ return &interpreter_frame_tos_address()[index]; ++} + -+ Label slow_path; -+ __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr()); -+ __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint()); -+ __ add(t1, t1, t0); -+ __ lbu(t1, Address(t1, 0)); -+ __ andi(t0, t1, 1); -+ __ bnez(t0, slow_path); -+ __ ret(); ++#ifndef PRODUCT + -+ __ bind(slow_path); -+ __ enter(); // required for proper stackwalking of RuntimeStub frame ++#define DESCRIBE_FP_OFFSET(name) \ ++ values.describe(frame_no, fp() + frame::name##_offset, #name) + -+ __ push_call_clobbered_registers(); ++void frame::describe_pd(FrameValues& values, int frame_no) { ++ if (is_interpreted_frame()) { ++ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_method); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mdp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mirror); ++ DESCRIBE_FP_OFFSET(interpreter_frame_cache); ++ DESCRIBE_FP_OFFSET(interpreter_frame_locals); ++ DESCRIBE_FP_OFFSET(interpreter_frame_bcp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); ++ } ++} ++#endif + -+ if (UseCompressedOops) { -+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); -+ } else { -+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); ++intptr_t *frame::initial_deoptimization_info() { ++ // Not used on riscv, but we must return something. ++ return NULL; ++} ++ ++intptr_t* frame::real_fp() const { ++ if (_cb != NULL) { ++ // use the frame size if valid ++ int size = _cb->frame_size(); ++ if (size > 0) { ++ return unextended_sp() + size; ++ } + } -+ __ jalr(ra); -+ __ mv(t0, x10); -+ __ pop_call_clobbered_registers(); -+ __ mv(x10, t0); ++ // else rely on fp() ++ assert(!is_compiled_frame(), "unknown compiled frame size"); ++ return fp(); ++} + -+ __ leave(); // required for proper stackwalking of RuntimeStub frame -+ __ ret(); ++#undef DESCRIBE_FP_OFFSET + -+ return start; ++#ifndef PRODUCT ++// This is a generic constructor which is only used by pns() in debug.cpp. ++frame::frame(void* ptr_sp, void* ptr_fp, void* pc) { ++ init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc); +} + -+#undef __ ++#endif + -+void ShenandoahBarrierSetAssembler::barrier_stubs_init() { -+ if (ShenandoahLoadRefBarrier) { -+ int stub_code_size = 2048; -+ ResourceMark rm; -+ BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); -+ CodeBuffer buf(bb); -+ StubCodeGenerator cgen(&buf); -+ _shenandoah_lrb = generate_shenandoah_lrb(&cgen); -+ } ++void JavaFrameAnchor::make_walkable(JavaThread* thread) { ++ // last frame set? ++ if (last_Java_sp() == NULL) { return; } ++ // already walkable? ++ if (walkable()) { return; } ++ vmassert(Thread::current() == (Thread*)thread, "not current thread"); ++ vmassert(last_Java_sp() != NULL, "not called from Java code?"); ++ vmassert(last_Java_pc() == NULL, "already walkable"); ++ capture_last_Java_pc(); ++ vmassert(walkable(), "something went wrong"); +} -diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp ++ ++void JavaFrameAnchor::capture_last_Java_pc() { ++ vmassert(_last_Java_sp != NULL, "no last frame set"); ++ vmassert(_last_Java_pc == NULL, "already walkable"); ++ _last_Java_pc = (address)_last_Java_sp[-1]; ++} +diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp new file mode 100644 -index 000000000..84bc55706 +index 00000000000..c06aaa9e391 --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp -@@ -0,0 +1,92 @@ ++++ b/src/hotspot/cpu/riscv/frame_riscv.hpp +@@ -0,0 +1,202 @@ +/* -+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -14710,277 +14950,193 @@ index 000000000..84bc55706 + * + */ + -+#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP -+#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP ++#ifndef CPU_RISCV_FRAME_RISCV_HPP ++#define CPU_RISCV_FRAME_RISCV_HPP + -+#include "asm/macroAssembler.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" -+#ifdef COMPILER1 -+class LIR_Assembler; -+class ShenandoahPreBarrierStub; -+class ShenandoahLoadReferenceBarrierStub; -+class StubAssembler; -+#endif -+class StubCodeGenerator; ++#include "runtime/synchronizer.hpp" + -+class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { -+public: -+ static address shenandoah_lrb(); ++// A frame represents a physical stack frame (an activation). Frames can be ++// C or Java frames, and the Java frames can be interpreted or compiled. ++// In contrast, vframes represent source-level activations, so that one physical frame ++// can correspond to multiple source level frames because of inlining. ++// A frame is comprised of {pc, fp, sp} ++// ------------------------------ Asm interpreter ---------------------------------------- ++// Layout of asm interpreter frame: ++// [expression stack ] * <- sp + -+ void iu_barrier(MacroAssembler *masm, Register dst, Register tmp); ++// [monitors[0] ] \ ++// ... | monitor block size = k ++// [monitors[k-1] ] / ++// [frame initial esp ] ( == &monitors[0], initially here) initial_sp_offset ++// [byte code index/pointr] = bcx() bcx_offset + -+#ifdef COMPILER1 -+ void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); -+ void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); -+ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); -+ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm); -+#endif ++// [pointer to locals ] = locals() locals_offset ++// [constant pool cache ] = cache() cache_offset + -+ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register src, Register dst, Register count, RegSet saved_regs); -+ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Register dst, Address src, Register tmp1, Register tmp_thread); -+ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); -+ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, -+ Register obj, Register tmp, Label& slowpath); -+ virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); ++// [klass of method ] = mirror() mirror_offset ++// [padding ] + -+ virtual void barrier_stubs_init(); ++// [methodData ] = mdp() mdx_offset ++// [Method ] = method() method_offset + -+private: ++// [last esp ] = last_sp() last_sp_offset ++// [old stack pointer ] (sender_sp) sender_sp_offset + -+ static address _shenandoah_lrb; ++// [old frame pointer ] ++// [return pc ] + -+ void satb_write_barrier_pre(MacroAssembler* masm, -+ Register obj, -+ Register pre_val, -+ Register thread, -+ Register tmp, -+ bool tosca_live, -+ bool expand_call); -+ void shenandoah_write_barrier_pre(MacroAssembler* masm, -+ Register obj, -+ Register pre_val, -+ Register thread, -+ Register tmp, -+ bool tosca_live, -+ bool expand_call); ++// [last sp ] <- fp = link() ++// [oop temp ] (only for native calls) + -+ void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg); -+ void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg); -+ void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr); -+ void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr); ++// [padding ] (to preserve machine SP alignment) ++// [locals and parameters ] ++// <- sender sp ++// ------------------------------ Asm interpreter ---------------------------------------- + -+ address generate_shenandoah_lrb(StubCodeGenerator* cgen); -+}; ++// ------------------------------ C Frame ------------------------------------------------ ++// Stack: gcc with -fno-omit-frame-pointer ++// . ++// . ++// +-> . ++// | +-----------------+ | ++// | | return address | | ++// | | previous fp ------+ ++// | | saved registers | ++// | | local variables | ++// | | ... | <-+ ++// | +-----------------+ | ++// | | return address | | ++// +------ previous fp | | ++// | saved registers | | ++// | local variables | | ++// +-> | ... | | ++// | +-----------------+ | ++// | | return address | | ++// | | previous fp ------+ ++// | | saved registers | ++// | | local variables | ++// | | ... | <-+ ++// | +-----------------+ | ++// | | return address | | ++// +------ previous fp | | ++// | saved registers | | ++// | local variables | | ++// $fp --> | ... | | ++// +-----------------+ | ++// | return address | | ++// | previous fp ------+ ++// | saved registers | ++// $sp --> | local variables | ++// +-----------------+ ++// ------------------------------ C Frame ------------------------------------------------ + -+#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad -new file mode 100644 -index 000000000..6e310697d ---- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad -@@ -0,0 +1,188 @@ -+// -+// Copyright (c) 2018, Red Hat, Inc. All rights reserved. -+// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+// -+// This code is free software; you can redistribute it and/or modify it -+// under the terms of the GNU General Public License version 2 only, as -+// published by the Free Software Foundation. -+// -+// This code is distributed in the hope that it will be useful, but WITHOUT -+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+// version 2 for more details (a copy is included in the LICENSE file that -+// accompanied this code). -+// -+// You should have received a copy of the GNU General Public License version -+// 2 along with this work; if not, write to the Free Software Foundation, -+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+// -+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+// or visit www.oracle.com if you need additional information or have any -+// questions. -+// -+// ++ public: ++ enum { ++ pc_return_offset = 0, ++ // All frames ++ link_offset = -2, ++ return_addr_offset = -1, ++ sender_sp_offset = 0, ++ // Interpreter frames ++ interpreter_frame_oop_temp_offset = 1, // for native calls only + -+source_hpp %{ -+#include "gc/shenandoah/shenandoahBarrierSet.hpp" -+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -+%} ++ interpreter_frame_sender_sp_offset = -3, ++ // outgoing sp before a call to an invoked method ++ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, ++ interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1, ++ interpreter_frame_mdp_offset = interpreter_frame_method_offset - 1, ++ interpreter_frame_padding_offset = interpreter_frame_mdp_offset - 1, ++ interpreter_frame_mirror_offset = interpreter_frame_padding_offset - 1, ++ interpreter_frame_cache_offset = interpreter_frame_mirror_offset - 1, ++ interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1, ++ interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1, ++ interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, + -+instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ -+ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); ++ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, ++ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, + -+ effect(TEMP tmp, KILL cr); ++ // Entry frames ++ // n.b. these values are determined by the layout defined in ++ // stubGenerator for the Java call stub ++ entry_frame_after_call_words = 22, ++ entry_frame_call_wrapper_offset = -10, + -+ format %{ -+ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah" -+ %} ++ // we don't need a save area ++ arg_reg_save_area_bytes = 0 ++ }; + -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, -+ false /* is_cae */, $res$$Register); -+ %} ++ intptr_t ptr_at(int offset) const { ++ return *ptr_at_addr(offset); ++ } + -+ ins_pipe(pipe_slow); -+%} ++ void ptr_at_put(int offset, intptr_t value) { ++ *ptr_at_addr(offset) = value; ++ } + -+instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ -+ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); ++ private: ++ // an additional field beyond _sp and _pc: ++ intptr_t* _fp; // frame pointer ++ // The interpreter and adapters will extend the frame of the caller. ++ // Since oopMaps are based on the sp of the caller before extension ++ // we need to know that value. However in order to compute the address ++ // of the return address we need the real "raw" sp. Since sparc already ++ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's ++ // original sp we use that convention. + -+ effect(TEMP tmp, KILL cr); ++ intptr_t* _unextended_sp; ++ void adjust_unextended_sp(); + -+ format %{ -+ "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah" -+ %} ++ intptr_t* ptr_at_addr(int offset) const { ++ return (intptr_t*) addr_at(offset); ++ } + -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, -+ false /* is_cae */, $res$$Register); -+ %} ++#ifdef ASSERT ++ // Used in frame::sender_for_{interpreter,compiled}_frame ++ static void verify_deopt_original_pc( CompiledMethod* nm, intptr_t* unextended_sp); ++#endif + -+ ins_pipe(pipe_slow); -+%} ++ public: ++ // Constructors + -+instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); ++ frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc); + -+ effect(TEMP tmp, KILL cr); ++ frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc); + -+ format %{ -+ "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah" -+ %} ++ frame(intptr_t* ptr_sp, intptr_t* ptr_fp); + -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, -+ false /* is_cae */, $res$$Register); -+ %} ++ void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc); + -+ ins_pipe(pipe_slow); -+%} ++ // accessors for the instance variables ++ // Note: not necessarily the real 'frame pointer' (see real_fp) ++ intptr_t* fp() const { return _fp; } + -+instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); ++ inline address* sender_pc_addr() const; + -+ effect(TEMP tmp, KILL cr); ++ // expression stack tos if we are nested in a java call ++ intptr_t* interpreter_frame_last_sp() const; + -+ format %{ -+ "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah" -+ %} ++ // helper to update a map with callee-saved RBP ++ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); + -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, -+ false /* is_cae */, $res$$Register); -+ %} -+ -+ ins_pipe(pipe_slow); -+%} -+ -+instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ -+ match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); -+ effect(TEMP_DEF res, TEMP tmp, KILL cr); -+ format %{ -+ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah" -+ %} -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, -+ true /* is_cae */, $res$$Register); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ -+ match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); -+ -+ effect(TEMP_DEF res, TEMP tmp, KILL cr); -+ format %{ -+ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah" -+ %} -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, -+ true /* is_cae */, $res$$Register); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ -+ match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); ++ // deoptimization support ++ void interpreter_frame_set_last_sp(intptr_t* last_sp); + -+ effect(TEMP tmp, KILL cr); -+ format %{ -+ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah" -+ "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" -+ %} -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, -+ false /* is_cae */, $res$$Register); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ static jint interpreter_frame_expression_stack_direction() { return -1; } + -+instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ -+ match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); ++ // returns the sending frame, without applying any barriers ++ frame sender_raw(RegisterMap* map) const; + -+ effect(TEMP tmp, KILL cr); -+ format %{ -+ "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah" -+ %} -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, -+ false /* is_cae */, $res$$Register); -+ %} -+ ins_pipe(pipe_slow); -+%} -diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp ++#endif // CPU_RISCV_FRAME_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp new file mode 100644 -index 000000000..96068e637 +index 00000000000..5ac1bf57f57 --- /dev/null -+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -@@ -0,0 +1,44 @@ ++++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp +@@ -0,0 +1,248 @@ +/* -+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15003,310 +15159,236 @@ index 000000000..96068e637 + * + */ + -+#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP -+#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP ++#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP ++#define CPU_RISCV_FRAME_RISCV_INLINE_HPP + -+const int StackAlignmentInBytes = 16; ++#include "code/codeCache.hpp" ++#include "code/vmreg.inline.hpp" + -+// Indicates whether the C calling conventions require that -+// 32-bit integer argument values are extended to 64 bits. -+const bool CCallingConventionRequiresIntsAsLongs = false; ++// Inline functions for RISCV frames: + -+#define DEOPTIMIZE_WHEN_PATCHING ++// Constructors: + -+#define SUPPORTS_NATIVE_CX8 ++inline frame::frame() { ++ _pc = NULL; ++ _sp = NULL; ++ _unextended_sp = NULL; ++ _fp = NULL; ++ _cb = NULL; ++ _deopt_state = unknown; ++} + -+#define SUPPORT_RESERVED_STACK_AREA ++static int spin; + -+#define THREAD_LOCAL_POLL ++inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) { ++ intptr_t a = intptr_t(ptr_sp); ++ intptr_t b = intptr_t(ptr_fp); ++ _sp = ptr_sp; ++ _unextended_sp = ptr_sp; ++ _fp = ptr_fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); + -+#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp -new file mode 100644 -index 000000000..b46661a8f ---- /dev/null -+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp -@@ -0,0 +1,120 @@ -+/* -+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} + -+#ifndef CPU_RISCV_GLOBALS_RISCV_HPP -+#define CPU_RISCV_GLOBALS_RISCV_HPP ++inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) { ++ init(ptr_sp, ptr_fp, pc); ++} + -+#include "utilities/globalDefinitions.hpp" -+#include "utilities/macros.hpp" ++inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) { ++ intptr_t a = intptr_t(ptr_sp); ++ intptr_t b = intptr_t(ptr_fp); ++ _sp = ptr_sp; ++ _unextended_sp = unextended_sp; ++ _fp = ptr_fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); + -+// Sets the default values for platform dependent flags used by the runtime system. -+// (see globals.hpp) ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc), ++ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} + -+define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this ++inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) { ++ intptr_t a = intptr_t(ptr_sp); ++ intptr_t b = intptr_t(ptr_fp); ++ _sp = ptr_sp; ++ _unextended_sp = ptr_sp; ++ _fp = ptr_fp; ++ _pc = (address)(ptr_sp[-1]); + -+define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks -+define_pd_global(bool, TrapBasedNullChecks, false); -+define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast ++ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace ++ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly ++ // unlucky the junk value could be to a zombied method and we'll die on the ++ // find_blob call. This is also why we can have no asserts on the validity ++ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler ++ // -> pd_last_frame should use a specialized version of pd_last_frame which could ++ // call a specilaized frame constructor instead of this one. ++ // Then we could use the assert below. However this assert is of somewhat dubious ++ // value. + -+define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. -+define_pd_global(intx, CodeEntryAlignment, 64); -+define_pd_global(intx, OptoLoopAlignment, 16); -+define_pd_global(intx, InlineFrequencyCount, 100); ++ _cb = CodeCache::find_blob(_pc); ++ adjust_unextended_sp(); + -+#define DEFAULT_STACK_YELLOW_PAGES (2) -+#define DEFAULT_STACK_RED_PAGES (1) -+// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the -+// stack if compiled for unix and LP64. To pass stack overflow tests we need -+// 20 shadow pages. -+#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5)) -+#define DEFAULT_STACK_RESERVED_PAGES (1) ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} + -+#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES -+#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES -+#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES -+#define MIN_STACK_RESERVED_PAGES (0) ++// Accessors + -+define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); -+define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); -+define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); -+define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); ++inline bool frame::equal(frame other) const { ++ bool ret = sp() == other.sp() && ++ unextended_sp() == other.unextended_sp() && ++ fp() == other.fp() && ++ pc() == other.pc(); ++ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); ++ return ret; ++} + -+define_pd_global(bool, RewriteBytecodes, true); -+define_pd_global(bool, RewriteFrequentPairs, true); ++// Return unique id for this frame. The id must have a value where we can distinguish ++// identity and younger/older relationship. NULL represents an invalid (incomparable) ++// frame. ++inline intptr_t* frame::id(void) const { return unextended_sp(); } + -+define_pd_global(bool, UseMembar, true); ++// Return true if the frame is older (less recent activation) than the frame represented by id ++inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() > id ; } + -+define_pd_global(bool, PreserveFramePointer, false); ++inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); } + -+// GC Ergo Flags -+define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread ++inline intptr_t* frame::link_or_null() const { ++ intptr_t** ptr = (intptr_t **)addr_at(link_offset); ++ return os::is_readable_pointer(ptr) ? *ptr : NULL; ++} + -+define_pd_global(uintx, TypeProfileLevel, 111); ++inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } + -+define_pd_global(bool, CompactStrings, true); ++// Return address ++inline address* frame::sender_pc_addr() const { return (address*) addr_at(return_addr_offset); } ++inline address frame::sender_pc() const { return *sender_pc_addr(); } ++inline intptr_t* frame::sender_sp() const { return addr_at(sender_sp_offset); } + -+// Clear short arrays bigger than one word in an arch-specific way -+define_pd_global(intx, InitArrayShortSize, BytesPerLong); ++inline intptr_t** frame::interpreter_frame_locals_addr() const { ++ return (intptr_t**)addr_at(interpreter_frame_locals_offset); ++} + -+define_pd_global(bool, ThreadLocalHandshakes, true); ++inline intptr_t* frame::interpreter_frame_last_sp() const { ++ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); ++} + -+define_pd_global(intx, InlineSmallCode, 1000); ++inline intptr_t* frame::interpreter_frame_bcp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_bcp_offset); ++} + -+#define ARCH_FLAGS(develop, \ -+ product, \ -+ diagnostic, \ -+ experimental, \ -+ notproduct, \ -+ range, \ -+ constraint, \ -+ writeable) \ -+ \ -+ product(bool, NearCpool, true, \ -+ "constant pool is close to instructions") \ -+ product(bool, UseBarriersForVolatile, false, \ -+ "Use memory barriers to implement volatile accesses") \ -+ product(bool, UseCRC32, false, \ -+ "Use CRC32 instructions for CRC32 computation") \ -+ product(bool, UseBlockZeroing, true, \ -+ "Use DC ZVA for block zeroing") \ -+ product(intx, BlockZeroingLowLimit, 256, \ -+ "Minimum size in bytes when block zeroing will be used") \ -+ range(1, max_jint) \ -+ product(bool, TraceTraps, false, "Trace all traps the signal handler") \ -+ /* For now we're going to be safe and add the I/O bits to userspace fences. */ \ -+ product(bool, UseConservativeFence, true, \ -+ "Extend i for r and o for w in the pred/succ flags of fence") \ -+ product(bool, AvoidUnalignedAccesses, true, \ -+ "Avoid generating unaligned memory accesses") \ -+ product(intx, EagerArrayCopyThreshold, 128, \ -+ "Threshod of array length by bytes to " \ -+ "trigger the eager array copy") \ -+ range(0, 65535) \ -+ experimental(bool, UseRVV, false, "Use RVV instructions") \ -+ experimental(bool, UseZba, false, "Use Zba instructions") \ -+ experimental(bool, UseZbb, false, "Use Zbb instructions") ++inline intptr_t* frame::interpreter_frame_mdp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_mdp_offset); ++} + -+#endif // CPU_RISCV_GLOBALS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp -new file mode 100644 -index 000000000..980b2a81b ---- /dev/null -+++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp -@@ -0,0 +1,79 @@ -+/* -+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "code/icBuffer.hpp" -+#include "gc/shared/collectedHeap.inline.hpp" -+#include "interpreter/bytecodes.hpp" -+#include "memory/resourceArea.hpp" -+#include "nativeInst_riscv.hpp" -+#include "oops/oop.inline.hpp" ++// Constant pool cache + -+int InlineCacheBuffer::ic_stub_code_size() { -+ // 6: auipc + ld + auipc + jalr + address(2 * instruction_size) -+ // 5: auipc + ld + j + address(2 * instruction_size ) -+ return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size; ++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { ++ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); +} + -+#define __ masm-> ++// Method + -+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { -+ assert_cond(code_begin != NULL && entry_point != NULL); -+ ResourceMark rm; -+ CodeBuffer code(code_begin, ic_stub_code_size()); -+ MacroAssembler* masm = new MacroAssembler(&code); -+ // Note: even though the code contains an embedded value, we do not need reloc info -+ // because -+ // (1) the value is old (i.e., doesn't matter for scavenges) -+ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear ++inline Method** frame::interpreter_frame_method_addr() const { ++ return (Method**)addr_at(interpreter_frame_method_offset); ++} + -+ address start = __ pc(); -+ Label l; -+ __ ld(t1, l); -+ __ far_jump(ExternalAddress(entry_point)); -+ __ align(wordSize); -+ __ bind(l); -+ __ emit_int64((intptr_t)cached_value); -+ // Only need to invalidate the 1st two instructions - not the whole ic stub -+ ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size()); -+ assert(__ pc() - start == ic_stub_code_size(), "must be"); ++// Mirror ++ ++inline oop* frame::interpreter_frame_mirror_addr() const { ++ return (oop*)addr_at(interpreter_frame_mirror_offset); +} + -+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { -+ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object -+ NativeJump* jump = nativeJump_at(move->next_instruction_address()); -+ return jump->jump_destination(); ++// top of expression stack ++inline intptr_t* frame::interpreter_frame_tos_address() const { ++ intptr_t* last_sp = interpreter_frame_last_sp(); ++ if (last_sp == NULL) { ++ return sp(); ++ } else { ++ // sp() may have been extended or shrunk by an adapter. At least ++ // check that we don't fall behind the legal region. ++ // For top deoptimized frame last_sp == interpreter_frame_monitor_end. ++ assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos"); ++ return last_sp; ++ } +} + ++inline oop* frame::interpreter_frame_temp_oop_addr() const { ++ return (oop *)(fp() + interpreter_frame_oop_temp_offset); ++} + -+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { -+ // The word containing the cached value is at the end of this IC buffer -+ uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize); -+ void* o = (void*)*p; -+ return o; ++inline int frame::interpreter_frame_monitor_size() { ++ return BasicObjectLock::size(); +} -diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp -new file mode 100644 -index 000000000..ed8022784 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/icache_riscv.cpp -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ + -+#include "precompiled.hpp" -+#include "runtime/icache.hpp" -+#include "macroAssembler_riscv.hpp" + -+#define __ _masm-> ++// expression stack ++// (the max_stack arguments are used by the GC; see class FrameClosure) + -+static int icache_flush(address addr, int lines, int magic) { -+ // To make a store to instruction memory visible to all RISC-V harts, -+ // the writing hart has to execute a data FENCE before requesting that -+ // all remote RISC-V harts execute a FENCE.I -+ // -+ // No such-assurance is defined at the interface level of the builtin -+ // method, and so we should make sure it works. -+ __asm__ volatile("fence rw, rw" : : : "memory"); -+ -+ __builtin___clear_cache(addr, addr + (lines << ICache::log2_line_size)); -+ return magic; ++inline intptr_t* frame::interpreter_frame_expression_stack() const { ++ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); ++ return monitor_end-1; +} + -+void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) { + -+ address start = (address)icache_flush; ++// Entry frames + -+ *flush_icache_stub = (ICache::flush_icache_stub_t)start; ++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { ++ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); ++} + -+ // ICache::invalidate_range() contains explicit condition that the first -+ // call is invoked on the generated icache flush stub code range. -+ ICache::invalidate_range(start, 0); + -+ { -+ StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush"); -+ __ ret(); -+ } ++// Compiled frames ++PRAGMA_DIAG_PUSH ++PRAGMA_NONNULL_IGNORED ++inline oop frame::saved_oop_result(RegisterMap* map) const { ++ oop* result_adr = (oop *)map->location(x10->as_VMReg()); ++ guarantee(result_adr != NULL, "bad register save location"); ++ return (*result_adr); +} + -+#undef __ -diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp ++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { ++ oop* result_adr = (oop *)map->location(x10->as_VMReg()); ++ guarantee(result_adr != NULL, "bad register save location"); ++ *result_adr = obj; ++} ++PRAGMA_DIAG_POP ++ ++#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp new file mode 100644 -index 000000000..a503d3be3 +index 00000000000..1c46b3947d3 --- /dev/null -+++ b/src/hotspot/cpu/riscv/icache_riscv.hpp -@@ -0,0 +1,42 @@ ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +@@ -0,0 +1,484 @@ +/* -+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -15330,2259 +15412,2292 @@ index 000000000..a503d3be3 + * + */ + -+#ifndef CPU_RISCV_ICACHE_RISCV_HPP -+#define CPU_RISCV_ICACHE_RISCV_HPP -+ -+// Interface for updating the instruction cache. Whenever the VM -+// modifies code, part of the processor instruction cache potentially -+// has to be flushed. ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/g1/g1BarrierSet.hpp" ++#include "gc/g1/g1BarrierSetAssembler.hpp" ++#include "gc/g1/g1BarrierSetRuntime.hpp" ++#include "gc/g1/g1CardTable.hpp" ++#include "gc/g1/g1ThreadLocalData.hpp" ++#include "gc/g1/heapRegion.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/g1/c1/g1BarrierSetC1.hpp" ++#endif + -+class ICache : public AbstractICache { -+public: -+ enum { -+ stub_size = 16, // Size of the icache flush stub in bytes -+ line_size = BytesPerWord, // conservative -+ log2_line_size = LogBytesPerWord // log2(line_size) -+ }; -+}; ++#define __ masm-> + -+#endif // CPU_RISCV_ICACHE_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -new file mode 100644 -index 000000000..91deb0ae2 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -@@ -0,0 +1,1932 @@ -+/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, RegSet saved_regs) { ++ assert_cond(masm != NULL); ++ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; ++ if (!dest_uninitialized) { ++ Label done; ++ Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "gc/shared/barrierSet.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" -+#include "interp_masm_riscv.hpp" -+#include "interpreter/interpreter.hpp" -+#include "interpreter/interpreterRuntime.hpp" -+#include "logging/log.hpp" -+#include "oops/arrayOop.hpp" -+#include "oops/markOop.hpp" -+#include "oops/method.hpp" -+#include "oops/methodData.hpp" -+#include "prims/jvmtiExport.hpp" -+#include "prims/jvmtiThreadState.hpp" -+#include "runtime/basicLock.hpp" -+#include "runtime/biasedLocking.hpp" -+#include "runtime/frame.inline.hpp" -+#include "runtime/safepointMechanism.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/thread.inline.hpp" ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ lwu(t0, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lbu(t0, in_progress); ++ } ++ __ beqz(t0, done); + ++ __ push_reg(saved_regs, sp); ++ if (count == c_rarg0) { ++ if (addr == c_rarg1) { ++ // exactly backwards!! ++ __ mv(t0, c_rarg0); ++ __ mv(c_rarg0, c_rarg1); ++ __ mv(c_rarg1, t0); ++ } else { ++ __ mv(c_rarg1, count); ++ __ mv(c_rarg0, addr); ++ } ++ } else { ++ __ mv(c_rarg0, addr); ++ __ mv(c_rarg1, count); ++ } ++ if (UseCompressedOops) { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); ++ } ++ __ pop_reg(saved_regs, sp); + -+void InterpreterMacroAssembler::narrow(Register result) { -+ // Get method->_constMethod->_result_type -+ ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize)); -+ ld(t0, Address(t0, Method::const_offset())); -+ lbu(t0, Address(t0, ConstMethod::result_type_offset())); ++ __ bind(done); ++ } ++} + -+ Label done, notBool, notByte, notChar; ++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register start, Register count, Register tmp, RegSet saved_regs) { ++ assert_cond(masm != NULL); ++ __ push_reg(saved_regs, sp); ++ assert_different_registers(start, count, tmp); ++ assert_different_registers(c_rarg0, count); ++ __ mv(c_rarg0, start); ++ __ mv(c_rarg1, count); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); ++ __ pop_reg(saved_regs, sp); ++} + -+ // common case first -+ mv(t1, T_INT); -+ beq(t0, t1, done); ++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. + -+ // mask integer result to narrower return type. -+ mv(t1, T_BOOLEAN); -+ bne(t0, t1, notBool); ++ assert_cond(masm != NULL); ++ assert(thread == xthread, "must be"); + -+ andi(result, result, 0x1); -+ j(done); ++ Label done; ++ Label runtime; + -+ bind(notBool); -+ mv(t1, T_BYTE); -+ bne(t0, t1, notByte); -+ sign_extend(result, result, 8); -+ j(done); ++ assert_different_registers(obj, pre_val, tmp, t0); ++ assert(pre_val != noreg && tmp != noreg, "expecting a register"); + -+ bind(notByte); -+ mv(t1, T_CHAR); -+ bne(t0, t1, notChar); -+ zero_extend(result, result, 16); -+ j(done); ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + -+ bind(notChar); -+ sign_extend(result, result, 16); ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width ++ __ lwu(tmp, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lbu(tmp, in_progress); ++ } ++ __ beqz(tmp, done); + -+ // Nothing to do for T_INT -+ bind(done); -+ addw(result, result, zr); -+} ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); ++ } + -+void InterpreterMacroAssembler::jump_to_entry(address entry) { -+ assert(entry != NULL, "Entry must have been generated by now"); -+ j(entry); -+} ++ // Is the previous value null? ++ __ beqz(pre_val, done); + -+void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { -+ if (JvmtiExport::can_pop_frame()) { -+ Label L; -+ // Initiate popframe handling only if it is not already being -+ // processed. If the flag has the popframe_processing bit set, -+ // it means that this code is called *during* popframe handling - we -+ // don't want to reenter. -+ // This method is only called just after the call into the vm in -+ // call_VM_base, so the arg registers are available. -+ lwu(t1, Address(xthread, JavaThread::popframe_condition_offset())); -+ andi(t0, t1, JavaThread::popframe_pending_bit); -+ beqz(t0, L); -+ andi(t0, t1, JavaThread::popframe_processing_bit); -+ bnez(t0, L); -+ // Call Interpreter::remove_activation_preserving_args_entry() to get the -+ // address of the same-named entrypoint in the generated interpreter code. -+ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); -+ jr(x10); -+ bind(L); -+ } -+} ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) + ++ __ ld(tmp, index); // tmp := *index_adr ++ __ beqz(tmp, runtime); // tmp == 0? ++ // If yes, goto runtime + -+void InterpreterMacroAssembler::load_earlyret_value(TosState state) { -+ ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset())); -+ const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset()); -+ const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset()); -+ const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset()); -+ switch (state) { -+ case atos: -+ ld(x10, oop_addr); -+ sd(zr, oop_addr); -+ verify_oop(x10); -+ break; -+ case ltos: -+ ld(x10, val_addr); -+ break; -+ case btos: // fall through -+ case ztos: // fall through -+ case ctos: // fall through -+ case stos: // fall through -+ case itos: -+ lwu(x10, val_addr); -+ break; -+ case ftos: -+ flw(f10, val_addr); -+ break; -+ case dtos: -+ fld(f10, val_addr); -+ break; -+ case vtos: -+ /* nothing to do */ -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+ // Clean up tos value in the thread object -+ mvw(t0, (int) ilgl); -+ sw(t0, tos_addr); -+ sw(zr, val_addr); -+} ++ __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize ++ __ sd(tmp, index); // *index_adr := tmp ++ __ ld(t0, buffer); ++ __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr + ++ // Record the previous value ++ __ sd(pre_val, Address(tmp, 0)); ++ __ j(done); + -+void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { -+ if (JvmtiExport::can_force_early_return()) { -+ Label L; -+ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); -+ beqz(t0, L); // if [thread->jvmti_thread_state() == NULL] then exit ++ __ bind(runtime); ++ // save the live input values ++ RegSet saved = RegSet::of(pre_val); ++ if (tosca_live) { saved += RegSet::of(x10); } ++ if (obj != noreg) { saved += RegSet::of(obj); } + -+ // Initiate earlyret handling only if it is not already being processed. -+ // If the flag has the earlyret_processing bit set, it means that this code -+ // is called *during* earlyret handling - we don't want to reenter. -+ lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset())); -+ mv(t1, JvmtiThreadState::earlyret_pending); -+ bne(t0, t1, L); ++ __ push_reg(saved, sp); + -+ // Call Interpreter::remove_activation_early_entry() to get the address of the -+ // same-named entrypoint in the generated interpreter code. -+ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); -+ lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset())); -+ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0); -+ jr(x10); -+ bind(L); ++ if (expand_call) { ++ assert(pre_val != c_rarg1, "smashed arg"); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } -+} + -+void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) { -+ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); -+ lhu(reg, Address(xbcp, bcp_offset)); -+ revb_h(reg, reg); -+} ++ __ pop_reg(saved, sp); + -+void InterpreterMacroAssembler::get_dispatch() { -+ int32_t offset = 0; -+ la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset); -+ addi(xdispatch, xdispatch, offset); -+} ++ __ bind(done); + -+void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, -+ int bcp_offset, -+ size_t index_size) { -+ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); -+ if (index_size == sizeof(u2)) { -+ load_unsigned_short(index, Address(xbcp, bcp_offset)); -+ } else if (index_size == sizeof(u4)) { -+ lwu(index, Address(xbcp, bcp_offset)); -+ // Check if the secondary index definition is still ~x, otherwise -+ // we have to change the following assembler code to calculate the -+ // plain index. -+ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); -+ xori(index, index, -1); -+ addw(index, index, zr); -+ } else if (index_size == sizeof(u1)) { -+ load_unsigned_byte(index, Address(xbcp, bcp_offset)); -+ } else { -+ ShouldNotReachHere(); -+ } +} + -+// Return -+// Rindex: index into constant pool -+// Rcache: address of cache entry - ConstantPoolCache::base_offset() -+// -+// A caller must add ConstantPoolCache::base_offset() to Rcache to get -+// the true address of the cache entry. -+// -+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, -+ Register index, -+ int bcp_offset, -+ size_t index_size) { -+ assert_different_registers(cache, index); -+ assert_different_registers(cache, xcpool); -+ get_cache_index_at_bcp(index, bcp_offset, index_size); -+ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); -+ // Convert from field index to ConstantPoolCacheEntry -+ // riscv already has the cache in xcpool so there is no need to -+ // install it in cache. Instead we pre-add the indexed offset to -+ // xcpool and return it in cache. All clients of this method need to -+ // be modified accordingly. -+ shadd(cache, index, xcpool, cache, 5); -+} ++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2) { ++ assert_cond(masm != NULL); ++ assert(thread == xthread, "must be"); ++ assert_different_registers(store_addr, new_val, thread, tmp, tmp2, ++ t0); ++ assert(store_addr != noreg && new_val != noreg && tmp != noreg && ++ tmp2 != noreg, "expecting a register"); + ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + -+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, -+ Register index, -+ Register bytecode, -+ int byte_no, -+ int bcp_offset, -+ size_t index_size) { -+ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); -+ // We use a 32-bit load here since the layout of 64-bit words on -+ // little-endian machines allow us that. -+ // n.b. unlike x86 cache already includes the index offset -+ la(bytecode, Address(cache, -+ ConstantPoolCache::base_offset() + -+ ConstantPoolCacheEntry::indices_offset())); -+ membar(MacroAssembler::AnyAny); -+ lwu(bytecode, bytecode); -+ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -+ const int shift_count = (1 + byte_no) * BitsPerByte; -+ slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte)); -+ srli(bytecode, bytecode, XLEN - BitsPerByte); -+} ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); + -+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, -+ Register tmp, -+ int bcp_offset, -+ size_t index_size) { -+ assert(cache != tmp, "must use different register"); -+ get_cache_index_at_bcp(tmp, bcp_offset, index_size); -+ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); -+ // Convert from field index to ConstantPoolCacheEntry index -+ // and from word offset to byte offset -+ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); -+ ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); -+ // skip past the header -+ add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); -+ // construct pointer to cache entry -+ shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord); -+} ++ Label done; ++ Label runtime; + -+// Load object from cpool->resolved_references(index) -+void InterpreterMacroAssembler::load_resolved_reference_at_index( -+ Register result, Register index, Register tmp) { -+ assert_different_registers(result, index); ++ // Does store cross heap regions? + -+ get_constant_pool(result); -+ // Load pointer for resolved_references[] objArray -+ ld(result, Address(result, ConstantPool::cache_offset_in_bytes())); -+ ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes())); -+ resolve_oop_handle(result, tmp); -+ // Add in the index -+ addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); -+ shadd(result, index, result, index, LogBytesPerHeapOop); -+ load_heap_oop(result, Address(result, 0)); -+} ++ __ xorr(tmp, store_addr, new_val); ++ __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes); ++ __ beqz(tmp, done); + -+void InterpreterMacroAssembler::load_resolved_klass_at_offset( -+ Register cpool, Register index, Register klass, Register temp) { -+ shadd(temp, index, cpool, temp, LogBytesPerWord); -+ lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index -+ ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses -+ shadd(klass, temp, klass, temp, LogBytesPerWord); -+ ld(klass, Address(klass, Array::base_offset_in_bytes())); -+} ++ // crosses regions, storing NULL? + -+// Generate a subtype check: branch to ok_is_subtype if sub_klass is a -+// subtype of super_klass. -+// -+// Args: -+// x10: superklass -+// Rsub_klass: subklass -+// -+// Kills: -+// x12, x15 -+void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, -+ Label& ok_is_subtype) { -+ assert(Rsub_klass != x10, "x10 holds superklass"); -+ assert(Rsub_klass != x12, "x12 holds 2ndary super array length"); -+ assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr"); ++ __ beqz(new_val, done); + -+ // Profile the not-null value's klass. -+ profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15 ++ // storing region crossing non-NULL, is card already dirty? + -+ // Do the check. -+ check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12 ++ ExternalAddress cardtable((address) ct->byte_map_base()); ++ const Register card_addr = tmp; + -+ // Profile the failure of the check. -+ profile_typecheck_failed(x12); // blows x12 -+} ++ __ srli(card_addr, store_addr, CardTable::card_shift()); + -+// Java Expression Stack ++ // get the address of the card ++ __ load_byte_map_base(tmp2); ++ __ add(card_addr, card_addr, tmp2); ++ __ lbu(tmp2, Address(card_addr)); ++ __ mv(t0, (int)G1CardTable::g1_young_card_val()); ++ __ beq(tmp2, t0, done); + -+void InterpreterMacroAssembler::pop_ptr(Register r) { -+ ld(r, Address(esp, 0)); -+ addi(esp, esp, wordSize); -+} ++ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); + -+void InterpreterMacroAssembler::pop_i(Register r) { -+ lw(r, Address(esp, 0)); // lw do signed extended -+ addi(esp, esp, wordSize); -+} ++ __ membar(MacroAssembler::StoreLoad); + -+void InterpreterMacroAssembler::pop_l(Register r) { -+ ld(r, Address(esp, 0)); -+ addi(esp, esp, 2 * Interpreter::stackElementSize); -+} ++ __ lbu(tmp2, Address(card_addr)); ++ __ beqz(tmp2, done); + -+void InterpreterMacroAssembler::push_ptr(Register r) { -+ addi(esp, esp, -wordSize); -+ sd(r, Address(esp, 0)); -+} ++ // storing a region crossing, non-NULL oop, card is clean. ++ // dirty card and log. + -+void InterpreterMacroAssembler::push_i(Register r) { -+ addi(esp, esp, -wordSize); -+ addw(r, r, zr); // signed extended -+ sd(r, Address(esp, 0)); -+} ++ __ sb(zr, Address(card_addr)); + -+void InterpreterMacroAssembler::push_l(Register r) { -+ addi(esp, esp, -2 * wordSize); -+ sd(zr, Address(esp, wordSize)); -+ sd(r, Address(esp)); -+} ++ __ ld(t0, queue_index); ++ __ beqz(t0, runtime); ++ __ sub(t0, t0, wordSize); ++ __ sd(t0, queue_index); + -+void InterpreterMacroAssembler::pop_f(FloatRegister r) { -+ flw(r, esp, 0); -+ addi(esp, esp, wordSize); -+} ++ __ ld(tmp2, buffer); ++ __ add(t0, tmp2, t0); ++ __ sd(card_addr, Address(t0, 0)); ++ __ j(done); + -+void InterpreterMacroAssembler::pop_d(FloatRegister r) { -+ fld(r, esp, 0); -+ addi(esp, esp, 2 * Interpreter::stackElementSize); ++ __ bind(runtime); ++ // save the live input values ++ RegSet saved = RegSet::of(store_addr); ++ __ push_reg(saved, sp); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); ++ __ pop_reg(saved, sp); ++ ++ __ bind(done); +} + -+void InterpreterMacroAssembler::push_f(FloatRegister r) { -+ addi(esp, esp, -wordSize); -+ fsw(r, Address(esp, 0)); ++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ assert_cond(masm != NULL); ++ bool on_oop = is_reference_type(type); ++ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; ++ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; ++ bool on_reference = on_weak || on_phantom; ++ ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ if (on_oop && on_reference) { ++ // RA is live. It must be saved around calls. ++ __ enter(); // barrier may call runtime ++ // Generate the G1 pre-barrier code to log the value of ++ // the referent field in an SATB buffer. ++ g1_write_barrier_pre(masm /* masm */, ++ noreg /* obj */, ++ dst /* pre_val */, ++ xthread /* thread */, ++ tmp1 /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ __ leave(); ++ } +} + -+void InterpreterMacroAssembler::push_d(FloatRegister r) { -+ addi(esp, esp, -2 * wordSize); -+ fsd(r, Address(esp, 0)); -+} -+ -+void InterpreterMacroAssembler::pop(TosState state) { -+ switch (state) { -+ case atos: -+ pop_ptr(); -+ verify_oop(x10); -+ break; -+ case btos: // fall through -+ case ztos: // fall through -+ case ctos: // fall through -+ case stos: // fall through -+ case itos: -+ pop_i(); -+ break; -+ case ltos: -+ pop_l(); -+ break; -+ case ftos: -+ pop_f(); -+ break; -+ case dtos: -+ pop_d(); -+ break; -+ case vtos: -+ /* nothing to do */ -+ break; -+ default: -+ ShouldNotReachHere(); ++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ assert_cond(masm != NULL); ++ // flatten object address if needed ++ if (dst.offset() == 0) { ++ if (dst.base() != x13) { ++ __ mv(x13, dst.base()); ++ } ++ } else { ++ __ la(x13, dst); + } -+} + -+void InterpreterMacroAssembler::push(TosState state) { -+ switch (state) { -+ case atos: -+ verify_oop(x10); -+ push_ptr(); -+ break; -+ case btos: // fall through -+ case ztos: // fall through -+ case ctos: // fall through -+ case stos: // fall through -+ case itos: -+ push_i(); -+ break; -+ case ltos: -+ push_l(); -+ break; -+ case ftos: -+ push_f(); -+ break; -+ case dtos: -+ push_d(); -+ break; -+ case vtos: -+ /* nothing to do */ -+ break; -+ default: -+ ShouldNotReachHere(); ++ g1_write_barrier_pre(masm, ++ x13 /* obj */, ++ tmp2 /* pre_val */, ++ xthread /* thread */, ++ tmp1 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); ++ ++ if (val == noreg) { ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg); ++ } else { ++ // G1 barrier needs uncompressed oop for region cross check. ++ Register new_val = val; ++ if (UseCompressedOops) { ++ new_val = t1; ++ __ mv(new_val, val); ++ } ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg); ++ g1_write_barrier_post(masm, ++ x13 /* store_adr */, ++ new_val /* new_val */, ++ xthread /* thread */, ++ tmp1 /* tmp */, ++ tmp2 /* tmp2 */); + } +} + -+// Helpers for swap and dup -+void InterpreterMacroAssembler::load_ptr(int n, Register val) { -+ ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); -+} ++#ifdef COMPILER1 + -+void InterpreterMacroAssembler::store_ptr(int n, Register val) { -+ sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); -+} ++#undef __ ++#define __ ce->masm()-> + -+void InterpreterMacroAssembler::load_float(Address src) { -+ flw(f10, src); -+} ++void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { ++ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + -+void InterpreterMacroAssembler::load_double(Address src) { -+ fld(f10, src); -+} ++ // At this point we know that marking is in progress. ++ // If do_load() is true then we have to emit the ++ // load of the previous value; otherwise it has already ++ // been loaded into _pre_val. ++ __ bind(*stub->entry()); + -+void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { -+ // set sender sp -+ mv(x30, sp); -+ // record last_sp -+ sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); -+} ++ assert(stub->pre_val()->is_register(), "Precondition."); + -+// Jump to from_interpreted entry of a call unless single stepping is possible -+// in this thread in which case we must call the i2i entry -+void InterpreterMacroAssembler::jump_from_interpreted(Register method) { -+ prepare_to_jump_from_interpreted(); -+ if (JvmtiExport::can_post_interpreter_events()) { -+ Label run_compiled_code; -+ // JVMTI events, such as single-stepping, are implemented partly by avoiding running -+ // compiled code in threads for which the event is enabled. Check here for -+ // interp_only_mode if these events CAN be enabled. -+ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset())); -+ beqz(t0, run_compiled_code); -+ ld(t0, Address(method, Method::interpreter_entry_offset())); -+ jr(t0); -+ bind(run_compiled_code); -+ } ++ Register pre_val_reg = stub->pre_val()->as_register(); + -+ ld(t0, Address(method, Method::from_interpreted_offset())); -+ jr(t0); ++ if (stub->do_load()) { ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */); ++ } ++ __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); ++ ce->store_parameter(stub->pre_val()->as_register(), 0); ++ __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); ++ __ j(*stub->continuation()); +} + -+// The following two routines provide a hook so that an implementation -+// can schedule the dispatch in two parts. amd64 does not do this. -+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { ++void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { ++ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ __ bind(*stub->entry()); ++ assert(stub->addr()->is_register(), "Precondition"); ++ assert(stub->new_val()->is_register(), "Precondition"); ++ Register new_val_reg = stub->new_val()->as_register(); ++ __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true); ++ ce->store_parameter(stub->addr()->as_pointer_register(), 0); ++ __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin())); ++ __ j(*stub->continuation()); +} + -+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { -+ dispatch_next(state, step); -+} ++#undef __ + -+void InterpreterMacroAssembler::dispatch_base(TosState state, -+ address* table, -+ bool verifyoop, -+ bool generate_poll, -+ Register Rs) { -+ // Pay attention to the argument Rs, which is acquiesce in t0. -+ if (VerifyActivationFrameSize) { -+ Unimplemented(); -+ } -+ if (verifyoop && state == atos) { -+ verify_oop(x10); -+ } ++#define __ sasm-> + -+ Label safepoint; -+ address* const safepoint_table = Interpreter::safept_table(state); -+ bool needs_thread_local_poll = generate_poll && -+ SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; ++void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("g1_pre_barrier", false); + -+ if (needs_thread_local_poll) { -+ NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); -+ ld(t1, Address(xthread, Thread::polling_page_offset())); -+ andi(t1, t1, 1 << exact_log2(SafepointMechanism::poll_bit())); -+ bnez(t1, safepoint); -+ } -+ if (table == Interpreter::dispatch_table(state)) { -+ mv(t1, Interpreter::distance_from_dispatch_table(state)); -+ add(t1, Rs, t1); -+ shadd(t1, t1, xdispatch, t1, 3); ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ ++ // arg0 : previous value of memory ++ const Register pre_val = x10; ++ const Register thread = xthread; ++ const Register tmp = t0; ++ ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ Label done; ++ Label runtime; ++ ++ // Is marking still active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width ++ __ lwu(tmp, in_progress); + } else { -+ mv(t1, (address)table); -+ shadd(t1, Rs, t1, Rs, 3); ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lbu(tmp, in_progress); + } -+ ld(t1, Address(t1)); -+ jr(t1); ++ __ beqz(tmp, done); + -+ if (needs_thread_local_poll) { -+ bind(safepoint); -+ la(t1, ExternalAddress((address)safepoint_table)); -+ shadd(t1, Rs, t1, Rs, 3); -+ ld(t1, Address(t1)); -+ jr(t1); -+ } -+} ++ // Can we store original value in the thread's buffer? ++ __ ld(tmp, queue_index); ++ __ beqz(tmp, runtime); + -+void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) { -+ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs); -+} ++ __ sub(tmp, tmp, wordSize); ++ __ sd(tmp, queue_index); ++ __ ld(t1, buffer); ++ __ add(tmp, tmp, t1); ++ __ load_parameter(0, t1); ++ __ sd(t1, Address(tmp, 0)); ++ __ j(done); + -+void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) { -+ dispatch_base(state, Interpreter::normal_table(state), Rs); -+} ++ __ bind(runtime); ++ __ push_call_clobbered_registers(); ++ __ load_parameter(0, pre_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ __ pop_call_clobbered_registers(); ++ __ bind(done); + -+void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) { -+ dispatch_base(state, Interpreter::normal_table(state), false, Rs); ++ __ epilogue(); +} + -+void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { -+ // load next bytecode -+ load_unsigned_byte(t0, Address(xbcp, step)); -+ add(xbcp, xbcp, step); -+ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); -+} ++void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("g1_post_barrier", false); + -+void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { -+ // load current bytecode -+ lbu(t0, Address(xbcp, 0)); -+ dispatch_base(state, table); -+} ++ // arg0 : store_address ++ Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp + -+// remove activation -+// -+// Unlock the receiver if this is a synchronized method. -+// Unlock any Java monitors from syncronized blocks. -+// Remove the activation from the stack. -+// -+// If there are locked Java monitors -+// If throw_monitor_exception -+// throws IllegalMonitorStateException -+// Else if install_monitor_exception -+// installs IllegalMonitorStateException -+// Else -+// no error processing -+void InterpreterMacroAssembler::remove_activation( -+ TosState state, -+ bool throw_monitor_exception, -+ bool install_monitor_exception, -+ bool notify_jvmdi) { -+ // Note: Registers x13 may be in use for the -+ // result check if synchronized method -+ Label unlocked, unlock, no_unlock; ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); + -+ // get the value of _do_not_unlock_if_synchronized into x13 -+ const Address do_not_unlock_if_synchronized(xthread, -+ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); -+ lbu(x13, do_not_unlock_if_synchronized); -+ sb(zr, do_not_unlock_if_synchronized); // reset the flag ++ Label done; ++ Label runtime; + -+ // get method access flags -+ ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize)); -+ ld(x12, Address(x11, Method::access_flags_offset())); -+ andi(t0, x12, JVM_ACC_SYNCHRONIZED); -+ beqz(t0, unlocked); ++ // At this point we know new_value is non-NULL and the new_value crosses regions. ++ // Must check to see if card is already dirty ++ const Register thread = xthread; + -+ // Don't unlock anything if the _do_not_unlock_if_synchronized flag -+ // is set. -+ bnez(x13, no_unlock); ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + -+ // unlock monitor -+ push(state); // save result ++ const Register card_offset = t1; ++ // RA is free here, so we can use it to hold the byte_map_base. ++ const Register byte_map_base = ra; + -+ // BasicObjectLock will be first in list, since this is a -+ // synchronized method. However, need to check that the object has -+ // not been unlocked by an explicit monitorexit bytecode. -+ const Address monitor(fp, frame::interpreter_frame_initial_sp_offset * -+ wordSize - (int) sizeof(BasicObjectLock)); -+ // We use c_rarg1 so that if we go slow path it will be the correct -+ // register for unlock_object to pass to VM directly -+ la(c_rarg1, monitor); // address of first monitor ++ assert_different_registers(card_offset, byte_map_base, t0); + -+ ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); -+ bnez(x10, unlock); ++ __ load_parameter(0, card_offset); ++ __ srli(card_offset, card_offset, CardTable::card_shift()); ++ __ load_byte_map_base(byte_map_base); + -+ pop(state); -+ if (throw_monitor_exception) { -+ // Entry already unlocked, need to throw exception -+ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::throw_illegal_monitor_state_exception)); -+ should_not_reach_here(); -+ } else { -+ // Monitor already unlocked during a stack unroll. If requested, -+ // install an illegal_monitor_state_exception. Continue with -+ // stack unrolling. -+ if (install_monitor_exception) { -+ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::new_illegal_monitor_state_exception)); -+ } -+ j(unlocked); -+ } ++ // Convert card offset into an address in card_addr ++ Register card_addr = card_offset; ++ __ add(card_addr, byte_map_base, card_addr); + -+ bind(unlock); -+ unlock_object(c_rarg1); -+ pop(state); ++ __ lbu(t0, Address(card_addr, 0)); ++ __ sub(t0, t0, (int)G1CardTable::g1_young_card_val()); ++ __ beqz(t0, done); + -+ // Check that for block-structured locking (i.e., that all locked -+ // objects has been unlocked) -+ bind(unlocked); ++ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); + -+ // x10: Might contain return value ++ __ membar(MacroAssembler::StoreLoad); ++ __ lbu(t0, Address(card_addr, 0)); ++ __ beqz(t0, done); + -+ // Check that all monitors are unlocked -+ { -+ Label loop, exception, entry, restart; -+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; -+ const Address monitor_block_top( -+ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); -+ const Address monitor_block_bot( -+ fp, frame::interpreter_frame_initial_sp_offset * wordSize); ++ // storing region crossing non-NULL, card is clean. ++ // dirty card and log. ++ __ sb(zr, Address(card_addr, 0)); + -+ bind(restart); -+ // We use c_rarg1 so that if we go slow path it will be the correct -+ // register for unlock_object to pass to VM directly -+ ld(c_rarg1, monitor_block_top); // points to current entry, starting -+ // with top-most entry -+ la(x9, monitor_block_bot); // points to word before bottom of -+ // monitor block ++ __ ld(t0, queue_index); ++ __ beqz(t0, runtime); ++ __ sub(t0, t0, wordSize); ++ __ sd(t0, queue_index); + -+ j(entry); ++ // Reuse RA to hold buffer_addr ++ const Register buffer_addr = ra; + -+ // Entry already locked, need to throw exception -+ bind(exception); ++ __ ld(buffer_addr, buffer); ++ __ add(t0, buffer_addr, t0); ++ __ sd(card_addr, Address(t0, 0)); ++ __ j(done); + -+ if (throw_monitor_exception) { -+ // Throw exception -+ MacroAssembler::call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime:: -+ throw_illegal_monitor_state_exception)); ++ __ bind(runtime); ++ __ push_call_clobbered_registers(); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); ++ __ pop_call_clobbered_registers(); ++ __ bind(done); ++ __ epilogue(); ++} + -+ should_not_reach_here(); -+ } else { -+ // Stack unrolling. Unlock object and install illegal_monitor_exception. -+ // Unlock does not block, so don't have to worry about the frame. -+ // We don't have to preserve c_rarg1 since we are going to throw an exception. ++#undef __ + -+ push(state); -+ unlock_object(c_rarg1); -+ pop(state); ++#endif // COMPILER1 +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp +new file mode 100644 +index 00000000000..37bc183f39c +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp +@@ -0,0 +1,78 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ if (install_monitor_exception) { -+ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime:: -+ new_illegal_monitor_state_exception)); -+ } ++#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP + -+ j(restart); -+ } ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++#include "utilities/macros.hpp" + -+ bind(loop); -+ // check if current entry is used -+ add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes()); -+ ld(t0, Address(t0, 0)); -+ bnez(t0, exception); ++#ifdef COMPILER1 ++class LIR_Assembler; ++#endif ++class StubAssembler; ++class G1PreBarrierStub; ++class G1PostBarrierStub; + -+ add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry -+ bind(entry); -+ bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry -+ } ++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { ++protected: ++ void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, RegSet saved_regs); ++ void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register start, Register count, Register tmp, RegSet saved_regs); + -+ bind(no_unlock); ++ void g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); + -+ // jvmti support -+ if (notify_jvmdi) { -+ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA ++ void g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2); + -+ } else { -+ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA -+ } ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); + -+ // remove activation -+ // get sender esp -+ ld(t1, -+ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); -+ if (StackReservedPages > 0) { -+ // testing if reserved zone needs to be re-enabled -+ Label no_reserved_zone_enabling; ++public: ++#ifdef COMPILER1 ++ void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); ++ void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); + -+ ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); -+ ble(t1, t0, no_reserved_zone_enabling); ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); ++#endif + -+ call_VM_leaf( -+ CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread); -+ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::throw_delayed_StackOverflowError)); -+ should_not_reach_here(); ++ void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++}; + -+ bind(no_reserved_zone_enabling); -+ } -+ -+ // restore sender esp -+ mv(esp, t1); -+ // remove frame anchor -+ leave(); -+ // If we're returning to interpreted code we will shortly be -+ // adjusting SP to allow some space for ESP. If we're returning to -+ // compiled code the saved sender SP was saved in sender_sp, so this -+ // restores it. -+ andi(sp, esp, -16); -+} -+ -+// Lock object -+// -+// Args: -+// c_rarg1: BasicObjectLock to be used for locking -+// -+// Kills: -+// x10 -+// c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs) -+// t0, t1 (temp regs) -+void InterpreterMacroAssembler::lock_object(Register lock_reg) -+{ -+ assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1"); -+ if (UseHeavyMonitors) { -+ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), -+ lock_reg); -+ } else { -+ Label done; -+ -+ const Register swap_reg = x10; -+ const Register tmp = c_rarg2; -+ const Register obj_reg = c_rarg3; // Will contain the oop ++#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp +new file mode 100644 +index 00000000000..8735fd014ff +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); -+ const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); -+ const int mark_offset = lock_offset + -+ BasicLock::displaced_header_offset_in_bytes(); ++#ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP ++#define CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP + -+ Label slow_case; ++const size_t G1MergeHeapRootsPrefetchCacheSize = 16; + -+ // Load object pointer into obj_reg c_rarg3 -+ ld(obj_reg, Address(lock_reg, obj_offset)); ++#endif // CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp +new file mode 100644 +index 00000000000..3c115a2ea02 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp +@@ -0,0 +1,302 @@ ++/* ++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ if (UseBiasedLocking) { -+ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); -+ } ++#include "precompiled.hpp" ++#include "classfile/classLoaderData.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/barrierSetNMethod.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "memory/universe.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.hpp" + -+ // Load (object->mark() | 1) into swap_reg -+ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); -+ ori(swap_reg, t0, 1); ++#define __ masm-> + -+ // Save (object->mark() | 1) into BasicLock's displaced header -+ sd(swap_reg, Address(lock_reg, mark_offset)); ++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ assert_cond(masm != NULL); + -+ assert(lock_offset == 0, -+ "displached header must be first word in BasicObjectLock"); ++ // RA is live. It must be saved around calls. + -+ if (PrintBiasedLockingStatistics) { -+ Label fail, fast; -+ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail); -+ bind(fast); -+ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), -+ t1, t0); -+ j(done); -+ bind(fail); -+ } else { -+ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL); ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ switch (type) { ++ case T_OBJECT: // fall through ++ case T_ARRAY: { ++ if (in_heap) { ++ if (UseCompressedOops) { ++ __ lwu(dst, src); ++ if (is_not_null) { ++ __ decode_heap_oop_not_null(dst); ++ } else { ++ __ decode_heap_oop(dst); ++ } ++ } else { ++ __ ld(dst, src); ++ } ++ } else { ++ assert(in_native, "why else?"); ++ __ ld(dst, src); ++ } ++ break; + } ++ case T_BOOLEAN: __ load_unsigned_byte (dst, src); break; ++ case T_BYTE: __ load_signed_byte (dst, src); break; ++ case T_CHAR: __ load_unsigned_short(dst, src); break; ++ case T_SHORT: __ load_signed_short (dst, src); break; ++ case T_INT: __ lw (dst, src); break; ++ case T_LONG: __ ld (dst, src); break; ++ case T_ADDRESS: __ ld (dst, src); break; ++ case T_FLOAT: __ flw (f10, src); break; ++ case T_DOUBLE: __ fld (f10, src); break; ++ default: Unimplemented(); ++ } ++} + -+ // Test if the oopMark is an obvious stack pointer, i.e., -+ // 1) (mark & 7) == 0, and -+ // 2) sp <= mark < mark + os::pagesize() -+ // -+ // These 3 tests can be done by evaluating the following -+ // expression: ((mark - sp) & (7 - os::vm_page_size())), -+ // assuming both stack pointer and pagesize have their -+ // least significant 3 bits clear. -+ // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg -+ sub(swap_reg, swap_reg, sp); -+ mv(t0, (int64_t)(7 - os::vm_page_size())); -+ andr(swap_reg, swap_reg, t0); -+ -+ // Save the test result, for recursive case, the result is zero -+ sd(swap_reg, Address(lock_reg, mark_offset)); -+ -+ if (PrintBiasedLockingStatistics) { -+ bnez(swap_reg, slow_case); -+ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), -+ t1, t0); ++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ assert_cond(masm != NULL); ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ switch (type) { ++ case T_OBJECT: // fall through ++ case T_ARRAY: { ++ val = val == noreg ? zr : val; ++ if (in_heap) { ++ if (UseCompressedOops) { ++ assert(!dst.uses(val), "not enough registers"); ++ if (val != zr) { ++ __ encode_heap_oop(val); ++ } ++ __ sw(val, dst); ++ } else { ++ __ sd(val, dst); ++ } ++ } else { ++ assert(in_native, "why else?"); ++ __ sd(val, dst); ++ } ++ break; + } -+ beqz(swap_reg, done); -+ -+ bind(slow_case); -+ -+ // Call the runtime routine for slow case -+ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), -+ lock_reg); -+ -+ bind(done); ++ case T_BOOLEAN: ++ __ andi(val, val, 0x1); // boolean is true if LSB is 1 ++ __ sb(val, dst); ++ break; ++ case T_BYTE: __ sb(val, dst); break; ++ case T_CHAR: __ sh(val, dst); break; ++ case T_SHORT: __ sh(val, dst); break; ++ case T_INT: __ sw(val, dst); break; ++ case T_LONG: __ sd(val, dst); break; ++ case T_ADDRESS: __ sd(val, dst); break; ++ case T_FLOAT: __ fsw(f10, dst); break; ++ case T_DOUBLE: __ fsd(f10, dst); break; ++ default: Unimplemented(); + } ++ +} + ++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath) { ++ assert_cond(masm != NULL); ++ // If mask changes we need to ensure that the inverse is still encodable as an immediate ++ STATIC_ASSERT(JNIHandles::weak_tag_mask == 1); ++ __ andi(obj, obj, ~JNIHandles::weak_tag_mask); ++ __ ld(obj, Address(obj, 0)); // *obj ++} + -+// Unlocks an object. Used in monitorexit bytecode and -+// remove_activation. Throws an IllegalMonitorException if object is -+// not locked by current thread. -+// -+// Args: -+// c_rarg1: BasicObjectLock for lock -+// -+// Kills: -+// x10 -+// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs) -+// t0, t1 (temp regs) -+void InterpreterMacroAssembler::unlock_object(Register lock_reg) -+{ -+ assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); ++// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. ++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register tmp1, ++ Register tmp2, ++ Label& slow_case, ++ bool is_far) { ++ assert_cond(masm != NULL); ++ assert_different_registers(obj, tmp2); ++ assert_different_registers(obj, var_size_in_bytes); ++ Register end = tmp2; + -+ if (UseHeavyMonitors) { -+ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), -+ lock_reg); ++ __ ld(obj, Address(xthread, JavaThread::tlab_top_offset())); ++ if (var_size_in_bytes == noreg) { ++ __ la(end, Address(obj, con_size_in_bytes)); + } else { -+ Label done; -+ -+ const Register swap_reg = x10; -+ const Register header_reg = c_rarg2; // Will contain the old oopMark -+ const Register obj_reg = c_rarg3; // Will contain the oop -+ -+ save_bcp(); // Save in case of exception ++ __ add(end, obj, var_size_in_bytes); ++ } ++ __ ld(t0, Address(xthread, JavaThread::tlab_end_offset())); ++ __ bgtu(end, t0, slow_case, is_far); + -+ // Convert from BasicObjectLock structure to object and BasicLock -+ // structure Store the BasicLock address into x10 -+ la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); ++ // update the tlab top pointer ++ __ sd(end, Address(xthread, JavaThread::tlab_top_offset())); + -+ // Load oop into obj_reg(c_rarg3) -+ ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); ++ // recover var_size_in_bytes if necessary ++ if (var_size_in_bytes == end) { ++ __ sub(var_size_in_bytes, var_size_in_bytes, obj); ++ } ++} + -+ // Free entry -+ sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); ++// Defines obj, preserves var_size_in_bytes ++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register tmp1, ++ Label& slow_case, ++ bool is_far) { ++ assert_cond(masm != NULL); ++ assert_different_registers(obj, var_size_in_bytes, tmp1); ++ if (!Universe::heap()->supports_inline_contig_alloc()) { ++ __ j(slow_case); ++ } else { ++ Register end = tmp1; ++ Label retry; ++ __ bind(retry); + -+ if (UseBiasedLocking) { -+ biased_locking_exit(obj_reg, header_reg, done); ++ // Get the current end of the heap ++ ExternalAddress address_end((address) Universe::heap()->end_addr()); ++ { ++ int32_t offset; ++ __ la_patchable(t1, address_end, offset); ++ __ ld(t1, Address(t1, offset)); + } + -+ // Load the old header from BasicLock structure -+ ld(header_reg, Address(swap_reg, -+ BasicLock::displaced_header_offset_in_bytes())); ++ // Get the current top of the heap ++ ExternalAddress address_top((address) Universe::heap()->top_addr()); ++ { ++ int32_t offset; ++ __ la_patchable(t0, address_top, offset); ++ __ addi(t0, t0, offset); ++ __ lr_d(obj, t0, Assembler::aqrl); ++ } + -+ // Test for recursion -+ beqz(header_reg, done); ++ // Adjust it my the size of our new object ++ if (var_size_in_bytes == noreg) { ++ __ la(end, Address(obj, con_size_in_bytes)); ++ } else { ++ __ add(end, obj, var_size_in_bytes); ++ } + -+ // Atomic swap back the old header -+ cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL); ++ // if end < obj then we wrapped around high memory ++ __ bltu(end, obj, slow_case, is_far); + -+ // Call the runtime routine for slow case. -+ sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj -+ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), -+ lock_reg); ++ __ bgtu(end, t1, slow_case, is_far); + -+ bind(done); ++ // If heap_top hasn't been changed by some other thread, update it. ++ __ sc_d(t1, end, t0, Assembler::rl); ++ __ bnez(t1, retry); + -+ restore_bcp(); ++ incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1); + } +} + ++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register tmp1) { ++ assert_cond(masm != NULL); ++ assert(tmp1->is_valid(), "need temp reg"); + -+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, -+ Label& zero_continue) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); -+ beqz(mdp, zero_continue); ++ __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); ++ if (var_size_in_bytes->is_valid()) { ++ __ add(tmp1, tmp1, var_size_in_bytes); ++ } else { ++ __ add(tmp1, tmp1, con_size_in_bytes); ++ } ++ __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); +} + -+// Set the method data pointer for the current bcp. -+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ Label set_mdp; -+ push_reg(RegSet::of(x10, x11), sp); // save x10, x11 ++void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { ++ BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); + -+ // Test MDO to avoid the call if it is NULL. -+ ld(x10, Address(xmethod, in_bytes(Method::method_data_offset()))); -+ beqz(x10, set_mdp); -+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp); -+ // x10: mdi -+ // mdo is guaranteed to be non-zero here, we checked for it before the call. -+ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); -+ la(x11, Address(x11, in_bytes(MethodData::data_offset()))); -+ add(x10, x11, x10); -+ sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); -+ bind(set_mdp); -+ pop_reg(RegSet::of(x10, x11), sp); -+} ++ if (bs_nm == NULL) { ++ return; ++ } + -+void InterpreterMacroAssembler::verify_method_data_pointer() { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+#ifdef ASSERT -+ Label verify_continue; -+ add(sp, sp, -4 * wordSize); -+ sd(x10, Address(sp, 0)); -+ sd(x11, Address(sp, wordSize)); -+ sd(x12, Address(sp, 2 * wordSize)); -+ sd(x13, Address(sp, 3 * wordSize)); -+ test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue -+ get_method(x11); ++ // RISCV atomic operations require that the memory address be naturally aligned. ++ __ align(4); + -+ // If the mdp is valid, it will point to a DataLayout header which is -+ // consistent with the bcp. The converse is highly probable also. -+ lh(x12, Address(x13, in_bytes(DataLayout::bci_offset()))); -+ ld(t0, Address(x11, Method::const_offset())); -+ add(x12, x12, t0); -+ la(x12, Address(x12, ConstMethod::codes_offset())); -+ beq(x12, xbcp, verify_continue); -+ // x10: method -+ // xbcp: bcp // xbcp == 22 -+ // x13: mdp -+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), -+ x11, xbcp, x13); -+ bind(verify_continue); -+ ld(x10, Address(sp, 0)); -+ ld(x11, Address(sp, wordSize)); -+ ld(x12, Address(sp, 2 * wordSize)); -+ ld(x13, Address(sp, 3 * wordSize)); -+ add(sp, sp, 4 * wordSize); -+#endif // ASSERT -+} ++ Label skip, guard; ++ Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset())); + ++ __ lwu(t0, guard); + -+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, -+ int constant, -+ Register value) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ Address data(mdp_in, constant); -+ sd(value, data); -+} ++ // Subsequent loads of oops must occur after load of guard value. ++ // BarrierSetNMethod::disarm sets guard with release semantics. ++ __ membar(MacroAssembler::LoadLoad); ++ __ lwu(t1, thread_disarmed_addr); ++ __ beq(t0, t1, skip); + ++ int32_t offset = 0; ++ __ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset); ++ __ jalr(ra, t0, offset); ++ __ j(skip); + -+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, -+ int constant, -+ bool decrement) { -+ increment_mdp_data_at(mdp_in, noreg, constant, decrement); -+} ++ __ bind(guard); + -+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, -+ Register reg, -+ int constant, -+ bool decrement) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ // %%% this does 64bit counters at best it is wasting space -+ // at worst it is a rare bug when counters overflow ++ assert(__ offset() % 4 == 0, "bad alignment"); ++ __ emit_int32(0); // nmethod guard value. Skipped over in common case. + -+ assert_different_registers(t1, t0, mdp_in, reg); ++ __ bind(skip); ++} + -+ Address addr1(mdp_in, constant); -+ Address addr2(t1, 0); -+ Address &addr = addr1; -+ if (reg != noreg) { -+ la(t1, addr1); -+ add(t1, t1, reg); -+ addr = addr2; ++void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { ++ BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod(); ++ if (bs == NULL) { ++ return; + } + -+ if (decrement) { -+ ld(t0, addr); -+ addi(t0, t0, -DataLayout::counter_increment); -+ Label L; -+ bltz(t0, L); // skip store if counter underflow -+ sd(t0, addr); -+ bind(L); -+ } else { -+ assert(DataLayout::counter_increment == 1, -+ "flow-free idiom only works with 1"); -+ ld(t0, addr); -+ addi(t0, t0, DataLayout::counter_increment); -+ Label L; -+ blez(t0, L); // skip store if counter overflow -+ sd(t0, addr); -+ bind(L); -+ } -+} ++ Label bad_call; ++ __ beqz(xmethod, bad_call); + -+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, -+ int flag_byte_constant) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ int flags_offset = in_bytes(DataLayout::flags_offset()); -+ // Set the flag -+ lbu(t1, Address(mdp_in, flags_offset)); -+ ori(t1, t1, flag_byte_constant); -+ sb(t1, Address(mdp_in, flags_offset)); -+} ++ // Pointer chase to the method holder to find out if the method is concurrently unloading. ++ Label method_live; ++ __ load_method_holder_cld(t0, xmethod); + ++ // Is it a strong CLD? ++ __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_offset())); ++ __ bnez(t1, method_live); + -+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, -+ int offset, -+ Register value, -+ Register test_value_out, -+ Label& not_equal_continue) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ if (test_value_out == noreg) { -+ ld(t1, Address(mdp_in, offset)); -+ bne(value, t1, not_equal_continue); -+ } else { -+ // Put the test value into a register, so caller can use it: -+ ld(test_value_out, Address(mdp_in, offset)); -+ bne(value, test_value_out, not_equal_continue); -+ } -+} ++ // Is it a weak but alive CLD? ++ __ push_reg(RegSet::of(x28, x29), sp); + ++ __ ld(x28, Address(t0, ClassLoaderData::holder_offset())); + -+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, -+ int offset_of_disp) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ ld(t1, Address(mdp_in, offset_of_disp)); -+ add(mdp_in, mdp_in, t1); -+ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); -+} ++ // Uses x28 & x29, so we must pass new temporaries. ++ __ resolve_weak_handle(x28, x29); ++ __ mv(t0, x28); + -+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, -+ Register reg, -+ int offset_of_disp) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ add(t1, mdp_in, reg); -+ ld(t1, Address(t1, offset_of_disp)); -+ add(mdp_in, mdp_in, t1); -+ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); -+} ++ __ pop_reg(RegSet::of(x28, x29), sp); + ++ __ bnez(t0, method_live); + -+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, -+ int constant) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ addi(mdp_in, mdp_in, constant); -+ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++ __ bind(bad_call); ++ ++ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); ++ __ bind(method_live); +} +diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp +new file mode 100644 +index 00000000000..b85f7f5582b +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp +@@ -0,0 +1,79 @@ ++/* ++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + ++#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP + -+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetNMethod.hpp" ++#include "memory/allocation.hpp" ++#include "oops/access.hpp" + -+ // save/restore across call_VM -+ addi(sp, sp, -2 * wordSize); -+ sd(zr, Address(sp, 0)); -+ sd(return_bci, Address(sp, wordSize)); -+ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), -+ return_bci); -+ ld(zr, Address(sp, 0)); -+ ld(return_bci, Address(sp, wordSize)); -+ addi(sp, sp, 2 * wordSize); -+} ++class BarrierSetAssembler: public CHeapObj { ++private: ++ void incr_allocated_bytes(MacroAssembler* masm, ++ Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1 = noreg); + -+void InterpreterMacroAssembler::profile_taken_branch(Register mdp, -+ Register bumped_count) { -+ if (ProfileInterpreter) { -+ Label profile_continue; ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs) {} ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register start, Register end, Register tmp, RegSet saved_regs) {} ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); + -+ // If no method data exists, go to profile_continue. -+ // Otherwise, assign to mdp -+ test_method_data_pointer(mdp, profile_continue); ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); + -+ // We are taking a branch. Increment the taken count. -+ Address data(mdp, in_bytes(JumpData::taken_offset())); -+ ld(bumped_count, data); -+ assert(DataLayout::counter_increment == 1, -+ "flow-free idiom only works with 1"); -+ addi(bumped_count, bumped_count, DataLayout::counter_increment); -+ Label L; -+ // eg: bumped_count=0x7fff ffff ffff ffff + 1 < 0. so we use <= 0; -+ blez(bumped_count, L); // skip store if counter overflow, -+ sd(bumped_count, data); -+ bind(L); -+ // The method data pointer needs to be updated to reflect the new target. -+ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); -+ bind(profile_continue); -+ } -+} ++ virtual void tlab_allocate(MacroAssembler* masm, ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register tmp1, // temp register ++ Register tmp2, // temp register ++ Label& slow_case, // continuation point if fast allocation fails ++ bool is_far = false ++ ); + -+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { -+ if (ProfileInterpreter) { -+ Label profile_continue; ++ void eden_allocate(MacroAssembler* masm, ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register tmp1, // temp register ++ Label& slow_case, // continuation point if fast allocation fails ++ bool is_far = false ++ ); ++ virtual void barrier_stubs_init() {} + -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); ++ virtual void nmethod_entry_barrier(MacroAssembler* masm); ++ virtual void c2i_entry_barrier(MacroAssembler* masm); ++ virtual ~BarrierSetAssembler() {} ++}; + -+ // We are taking a branch. Increment the not taken count. -+ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); ++#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp +new file mode 100644 +index 00000000000..ae7ee4c5a44 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp +@@ -0,0 +1,171 @@ ++/* ++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // The method data pointer needs to be updated to correspond to -+ // the next bytecode -+ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); -+ bind(profile_continue); -+ } -+} ++#include "precompiled.hpp" ++#include "code/codeCache.hpp" ++#include "code/nativeInst.hpp" ++#include "gc/shared/barrierSetNMethod.hpp" ++#include "logging/log.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/registerMap.hpp" ++#include "runtime/thread.hpp" ++#include "utilities/align.hpp" ++#include "utilities/debug.hpp" + -+void InterpreterMacroAssembler::profile_call(Register mdp) { -+ if (ProfileInterpreter) { -+ Label profile_continue; ++class NativeNMethodBarrier: public NativeInstruction { ++ address instruction_address() const { return addr_at(0); } + -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); ++ int *guard_addr() { ++ /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */ ++ return reinterpret_cast(instruction_address() + 12 * 4); ++ } + -+ // We are making a call. Increment the count. -+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++public: ++ int get_value() { ++ return Atomic::load_acquire(guard_addr()); ++ } + -+ // The method data pointer needs to be updated to reflect the new target. -+ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); -+ bind(profile_continue); ++ void set_value(int value) { ++ Atomic::release_store(guard_addr(), value); + } -+} + -+void InterpreterMacroAssembler::profile_final_call(Register mdp) { -+ if (ProfileInterpreter) { -+ Label profile_continue; ++ void verify() const; ++}; + -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); ++// Store the instruction bitmask, bits and name for checking the barrier. ++struct CheckInsn { ++ uint32_t mask; ++ uint32_t bits; ++ const char *name; ++}; + -+ // We are making a call. Increment the count. -+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++static const struct CheckInsn barrierInsn[] = { ++ { 0x00000fff, 0x00000297, "auipc t0, 0 "}, ++ { 0x000fffff, 0x0002e283, "lwu t0, 48(t0) "}, ++ { 0xffffffff, 0x0aa0000f, "fence ir, ir "}, ++ { 0x000fffff, 0x000be303, "lwu t1, 112(xthread)"}, ++ { 0x01fff07f, 0x00628063, "beq t0, t1, skip "}, ++ { 0x00000fff, 0x000002b7, "lui t0, imm0 "}, ++ { 0x000fffff, 0x00028293, "addi t0, t0, imm1 "}, ++ { 0xffffffff, 0x00b29293, "slli t0, t0, 11 "}, ++ { 0x000fffff, 0x00028293, "addi t0, t0, imm2 "}, ++ { 0xffffffff, 0x00529293, "slli t0, t0, 5 "}, ++ { 0x000fffff, 0x000280e7, "jalr ra, imm3(t0) "}, ++ { 0x00000fff, 0x0000006f, "j skip "} ++ /* guard: */ ++ /* 32bit nmethod guard value */ ++ /* skip: */ ++}; + -+ // The method data pointer needs to be updated to reflect the new target. -+ update_mdp_by_constant(mdp, -+ in_bytes(VirtualCallData:: -+ virtual_call_data_size())); -+ bind(profile_continue); ++// The encodings must match the instructions emitted by ++// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific ++// register numbers and immediate values in the encoding. ++void NativeNMethodBarrier::verify() const { ++ intptr_t addr = (intptr_t) instruction_address(); ++ for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) { ++ uint32_t inst = *((uint32_t*) addr); ++ if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) { ++ tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst); ++ fatal("not an %s instruction.", barrierInsn[i].name); ++ } ++ addr += 4; + } +} + + -+void InterpreterMacroAssembler::profile_virtual_call(Register receiver, -+ Register mdp, -+ Register reg2, -+ bool receiver_can_be_null) { -+ if (ProfileInterpreter) { -+ Label profile_continue; ++/* We're called from an nmethod when we need to deoptimize it. We do ++ this by throwing away the nmethod's frame and jumping to the ++ ic_miss stub. This looks like there has been an IC miss at the ++ entry of the nmethod, so we resolve the call, which will fall back ++ to the interpreter if the nmethod has been unloaded. */ ++void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { + -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); ++ typedef struct { ++ intptr_t *sp; intptr_t *fp; address ra; address pc; ++ } frame_pointers_t; + -+ Label skip_receiver_profile; -+ if (receiver_can_be_null) { -+ Label not_null; -+ // We are making a call. Increment the count for null receiver. -+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); -+ j(skip_receiver_profile); -+ bind(not_null); -+ } ++ frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5); + -+ // Record the receiver type. -+ record_klass_in_profile(receiver, mdp, reg2, true); -+ bind(skip_receiver_profile); ++ JavaThread *thread = JavaThread::current(); ++ RegisterMap reg_map(thread, false); ++ frame frame = thread->last_frame(); + -+ // The method data pointer needs to be updated to reflect the new target. ++ assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be"); ++ assert(frame.cb() == nm, "must be"); ++ frame = frame.sender(®_map); + -+ update_mdp_by_constant(mdp, -+ in_bytes(VirtualCallData:: -+ virtual_call_data_size())); -+ bind(profile_continue); ++ LogTarget(Trace, nmethod, barrier) out; ++ if (out.is_enabled()) { ++ ResourceMark mark; ++ log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p", ++ nm->method()->name_and_sig_as_C_string(), ++ nm, *(address *) return_address_ptr, nm->is_osr_method(), thread, ++ thread->name(), frame.sp(), nm->verified_entry_point()); + } -+} -+ -+// This routine creates a state machine for updating the multi-row -+// type profile at a virtual call site (or other type-sensitive bytecode). -+// The machine visits each row (of receiver/count) until the receiver type -+// is found, or until it runs out of rows. At the same time, it remembers -+// the location of the first empty row. (An empty row records null for its -+// receiver, and can be allocated for a newly-observed receiver type.) -+// Because there are two degrees of freedom in the state, a simple linear -+// search will not work; it must be a decision tree. Hence this helper -+// function is recursive, to generate the required tree structured code. -+// It's the interpreter, so we are trading off code space for speed. -+// See below for example code. -+void InterpreterMacroAssembler::record_klass_in_profile_helper( -+ Register receiver, Register mdp, -+ Register reg2, -+ Label& done, bool is_virtual_call) { -+ if (TypeProfileWidth == 0) { -+ if (is_virtual_call) { -+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); -+ } -+ -+ } else { -+ int non_profiled_offset = -1; -+ if (is_virtual_call) { -+ non_profiled_offset = in_bytes(CounterData::count_offset()); -+ } + -+ record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth, -+ &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); -+ } ++ new_frame->sp = frame.sp(); ++ new_frame->fp = frame.fp(); ++ new_frame->ra = frame.pc(); ++ new_frame->pc = SharedRuntime::get_handle_wrong_method_stub(); +} + -+void InterpreterMacroAssembler::record_item_in_profile_helper( -+ Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows, -+ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) { -+ int last_row = total_rows - 1; -+ assert(start_row <= last_row, "must be work left to do"); -+ // Test this row for both the item and for null. -+ // Take any of three different outcomes: -+ // 1. found item => increment count and goto done -+ // 2. found null => keep looking for case 1, maybe allocate this cell -+ // 3. found something else => keep looking for cases 1 and 2 -+ // Case 3 is handled by a recursive call. -+ for (int row = start_row; row <= last_row; row++) { -+ Label next_test; -+ bool test_for_null_also = (row == start_row); -+ -+ // See if the item is item[n]. -+ int item_offset = in_bytes(item_offset_fn(row)); -+ test_mdp_data_at(mdp, item_offset, item, -+ (test_for_null_also ? reg2 : noreg), -+ next_test); -+ // (Reg2 now contains the item from the CallData.) -+ -+ // The item is item[n]. Increment count[n]. -+ int count_offset = in_bytes(item_count_offset_fn(row)); -+ increment_mdp_data_at(mdp, count_offset); -+ j(done); -+ bind(next_test); -+ -+ if (test_for_null_also) { -+ Label found_null; -+ // Failed the equality check on item[n]... Test for null. -+ if (start_row == last_row) { -+ // The only thing left to do is handle the null case. -+ if (non_profiled_offset >= 0) { -+ beqz(reg2, found_null); -+ // Item did not match any saved item and there is no empty row for it. -+ // Increment total counter to indicate polymorphic case. -+ increment_mdp_data_at(mdp, non_profiled_offset); -+ j(done); -+ bind(found_null); -+ } else { -+ bnez(reg2, done); -+ } -+ break; -+ } -+ // Since null is rare, make it be the branch-taken case. -+ beqz(reg2, found_null); ++// This is the offset of the entry barrier from where the frame is completed. ++// If any code changes between the end of the verified entry where the entry ++// barrier resides, and the completion of the frame, then ++// NativeNMethodCmpBarrier::verify() will immediately complain when it does ++// not find the expected native instruction at this offset, which needs updating. ++// Note that this offset is invariant of PreserveFramePointer. + -+ // Put all the "Case 3" tests here. -+ record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows, -+ item_offset_fn, item_count_offset_fn, non_profiled_offset); ++// see BarrierSetAssembler::nmethod_entry_barrier ++// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32 ++static const int entry_barrier_offset = -4 * 13; + -+ // Found a null. Keep searching for a matching item, -+ // but remember that this is an empty (unused) slot. -+ bind(found_null); -+ } -+ } ++static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) { ++ address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset; ++ NativeNMethodBarrier* barrier = reinterpret_cast(barrier_address); ++ debug_only(barrier->verify()); ++ return barrier; ++} + -+ // In the fall-through case, we found no matching item, but we -+ // observed the item[start_row] is NULL. -+ // Fill in the item field and increment the count. -+ int item_offset = in_bytes(item_offset_fn(start_row)); -+ set_mdp_data_at(mdp, item_offset, item); -+ int count_offset = in_bytes(item_count_offset_fn(start_row)); -+ mv(reg2, DataLayout::counter_increment); -+ set_mdp_data_at(mdp, count_offset, reg2); -+ if (start_row > 0) { -+ j(done); ++void BarrierSetNMethod::disarm(nmethod* nm) { ++ if (!supports_entry_barrier(nm)) { ++ return; + } -+} + -+// Example state machine code for three profile rows: -+// # main copy of decision tree, rooted at row[1] -+// if (row[0].rec == rec) then [ -+// row[0].incr() -+// goto done -+// ] -+// if (row[0].rec != NULL) then [ -+// # inner copy of decision tree, rooted at row[1] -+// if (row[1].rec == rec) then [ -+// row[1].incr() -+// goto done -+// ] -+// if (row[1].rec != NULL) then [ -+// # degenerate decision tree, rooted at row[2] -+// if (row[2].rec == rec) then [ -+// row[2].incr() -+// goto done -+// ] -+// if (row[2].rec != NULL) then [ -+// count.incr() -+// goto done -+// ] # overflow -+// row[2].init(rec) -+// goto done -+// ] else [ -+// # remember row[1] is empty -+// if (row[2].rec == rec) then [ -+// row[2].incr() -+// goto done -+// ] -+// row[1].init(rec) -+// goto done -+// ] -+// else [ -+// # remember row[0] is empty -+// if (row[1].rec == rec) then [ -+// row[1].incr() -+// goto done -+// ] -+// if (row[2].rec == rec) then [ -+// row[2].incr() -+// goto done -+// ] -+// row[0].init(rec) -+// goto done -+// ] -+// done: ++ // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier. ++ NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); + -+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, -+ Register mdp, Register reg2, -+ bool is_virtual_call) { -+ assert(ProfileInterpreter, "must be profiling"); -+ Label done; ++ barrier->set_value(disarmed_value()); ++} + -+ record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call); ++bool BarrierSetNMethod::is_armed(nmethod* nm) { ++ if (!supports_entry_barrier(nm)) { ++ return false; ++ } + -+ bind(done); ++ NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); ++ return barrier->get_value() != disarmed_value(); +} +diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +new file mode 100644 +index 00000000000..a419f92b5f6 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +@@ -0,0 +1,111 @@ ++/* ++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) { -+ if (ProfileInterpreter) { -+ Label profile_continue; -+ -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/cardTableBarrierSetAssembler.hpp" ++#include "gc/shared/gc_globals.hpp" ++#include "interpreter/interp_masm.hpp" + -+ // Update the total ret count. -+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++#define __ masm-> + -+ for (uint row = 0; row < RetData::row_limit(); row++) { -+ Label next_test; + -+ // See if return_bci is equal to bci[n]: -+ test_mdp_data_at(mdp, -+ in_bytes(RetData::bci_offset(row)), -+ return_bci, noreg, -+ next_test); ++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) { ++ assert_cond(masm != NULL); ++ assert_different_registers(obj, tmp); ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); + -+ // return_bci is equal to bci[n]. Increment the count. -+ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); ++ __ srli(obj, obj, CardTable::card_shift()); + -+ // The method data pointer needs to be updated to reflect the new target. -+ update_mdp_by_offset(mdp, -+ in_bytes(RetData::bci_displacement_offset(row))); -+ j(profile_continue); -+ bind(next_test); -+ } ++ assert(CardTable::dirty_card_val() == 0, "must be"); + -+ update_mdp_for_ret(return_bci); ++ __ load_byte_map_base(tmp); ++ __ add(tmp, obj, tmp); + -+ bind(profile_continue); ++ if (UseCondCardMark) { ++ Label L_already_dirty; ++ __ membar(MacroAssembler::StoreLoad); ++ __ lbu(t1, Address(tmp)); ++ __ beqz(t1, L_already_dirty); ++ __ sb(zr, Address(tmp)); ++ __ bind(L_already_dirty); ++ } else { ++ __ sb(zr, Address(tmp)); + } +} + -+void InterpreterMacroAssembler::profile_null_seen(Register mdp) { -+ if (ProfileInterpreter) { -+ Label profile_continue; -+ -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); -+ -+ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); ++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register start, Register count, Register tmp, RegSet saved_regs) { ++ assert_cond(masm != NULL); ++ assert_different_registers(start, tmp); ++ assert_different_registers(count, tmp); + -+ // The method data pointer needs to be updated. -+ int mdp_delta = in_bytes(BitData::bit_data_size()); -+ if (TypeProfileCasts) { -+ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); -+ } -+ update_mdp_by_constant(mdp, mdp_delta); ++ Label L_loop, L_done; ++ const Register end = count; + -+ bind(profile_continue); -+ } -+} ++ __ beqz(count, L_done); // zero count - nothing to do ++ // end = start + count << LogBytesPerHeapOop ++ __ shadd(end, count, start, count, LogBytesPerHeapOop); ++ __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive + -+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { -+ if (ProfileInterpreter && TypeProfileCasts) { -+ Label profile_continue; ++ __ srli(start, start, CardTable::card_shift()); ++ __ srli(end, end, CardTable::card_shift()); ++ __ sub(count, end, start); // number of bytes to copy + -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); ++ __ load_byte_map_base(tmp); ++ __ add(start, start, tmp); + -+ int count_offset = in_bytes(CounterData::count_offset()); -+ // Back up the address, since we have already bumped the mdp. -+ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); ++ __ bind(L_loop); ++ __ add(tmp, start, count); ++ __ sb(zr, Address(tmp)); ++ __ sub(count, count, 1); ++ __ bgez(count, L_loop); ++ __ bind(L_done); ++} + -+ // *Decrement* the counter. We expect to see zero or small negatives. -+ increment_mdp_data_at(mdp, count_offset, true); ++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool is_array = (decorators & IS_ARRAY) != 0; ++ bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; ++ bool precise = is_array || on_anonymous; + -+ bind (profile_continue); ++ bool needs_post_barrier = val != noreg && in_heap; ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); ++ if (needs_post_barrier) { ++ // flatten object address if needed ++ if (!precise || dst.offset() == 0) { ++ store_check(masm, dst.base(), x13); ++ } else { ++ assert_cond(masm != NULL); ++ __ la(x13, dst); ++ store_check(masm, x13, t0); ++ } + } +} +diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp +new file mode 100644 +index 00000000000..686fe8fa478 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { -+ if (ProfileInterpreter) { -+ Label profile_continue; -+ -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); -+ -+ // The method data pointer needs to be updated. -+ int mdp_delta = in_bytes(BitData::bit_data_size()); -+ if (TypeProfileCasts) { -+ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP + -+ // Record the object type. -+ record_klass_in_profile(klass, mdp, reg2, false); -+ } -+ update_mdp_by_constant(mdp, mdp_delta); ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" + -+ bind(profile_continue); -+ } -+} ++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { ++protected: ++ void store_check(MacroAssembler* masm, Register obj, Register tmp); + -+void InterpreterMacroAssembler::profile_switch_default(Register mdp) { -+ if (ProfileInterpreter) { -+ Label profile_continue; ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register start, Register count, Register tmp, RegSet saved_regs); ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; + -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); ++#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp +new file mode 100644 +index 00000000000..7aa2015f9ec +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // Update the default case count -+ increment_mdp_data_at(mdp, -+ in_bytes(MultiBranchData::default_count_offset())); ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" + -+ // The method data pointer needs to be updated. -+ update_mdp_by_offset(mdp, -+ in_bytes(MultiBranchData:: -+ default_displacement_offset())); ++#define __ masm-> + -+ bind(profile_continue); ++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs) { ++ ++ if (is_oop) { ++ gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs); + } +} + -+void InterpreterMacroAssembler::profile_switch_case(Register index, -+ Register mdp, -+ Register reg2) { -+ if (ProfileInterpreter) { -+ Label profile_continue; ++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register start, Register count, Register tmp, ++ RegSet saved_regs) { ++ if (is_oop) { ++ gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs); ++ } ++} + -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); ++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ if (is_reference_type(type)) { ++ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } else { ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } ++} +diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp +new file mode 100644 +index 00000000000..00419c3163c +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // Build the base (index * per_case_size_in_bytes()) + -+ // case_array_offset_in_bytes() -+ mvw(reg2, in_bytes(MultiBranchData::per_case_size())); -+ mvw(t0, in_bytes(MultiBranchData::case_array_offset())); -+ Assembler::mul(index, index, reg2); -+ Assembler::add(index, index, t0); ++#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP + -+ // Update the case count -+ increment_mdp_data_at(mdp, -+ index, -+ in_bytes(MultiBranchData::relative_count_offset())); ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" + -+ // The method data pointer need to be updated. -+ update_mdp_by_offset(mdp, -+ index, -+ in_bytes(MultiBranchData:: -+ relative_displacement_offset())); ++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other ++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected ++// accesses, which are overridden in the concrete BarrierSetAssembler. + -+ bind(profile_continue); -+ } -+} ++class ModRefBarrierSetAssembler: public BarrierSetAssembler { ++protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, RegSet saved_regs) {} ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register start, Register count, Register tmp, RegSet saved_regs) {} + -+void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; } ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) = 0; + -+void InterpreterMacroAssembler::notify_method_entry() { -+ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to -+ // track stack depth. If it is possible to enter interp_only_mode we add -+ // the code to check if the event should be sent. -+ if (JvmtiExport::can_post_interpreter_events()) { -+ Label L; -+ lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset())); -+ beqz(x13, L); -+ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::post_method_entry)); -+ bind(L); -+ } ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs); ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register start, Register count, Register tmp, RegSet saved_regs); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; + -+ { -+ SkipIfEqual skip(this, &DTraceMethodProbes, false); -+ get_method(c_rarg1); -+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), -+ xthread, c_rarg1); -+ } ++#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp +new file mode 100644 +index 00000000000..cd568cc723f +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp +@@ -0,0 +1,117 @@ ++/* ++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // RedefineClasses() tracing support for obsolete method entry -+ if (log_is_enabled(Trace, redefine, class, obsolete)) { -+ get_method(c_rarg1); -+ call_VM_leaf( -+ CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), -+ xthread, c_rarg1); -+ } -+} ++#include "precompiled.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/shared/gc_globals.hpp" ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" ++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" ++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" + ++#define __ masm->masm()-> + -+void InterpreterMacroAssembler::notify_method_exit( -+ TosState state, NotifyMethodExitMode mode) { -+ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to -+ // track stack depth. If it is possible to enter interp_only_mode we add -+ // the code to check if the event should be sent. -+ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { -+ Label L; -+ // Note: frame::interpreter_frame_result has a dependency on how the -+ // method result is saved across the call to post_method_exit. If this -+ // is changed then the interpreter_frame_result implementation will -+ // need to be updated too. ++void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) { ++ Register addr = _addr->as_register_lo(); ++ Register newval = _new_value->as_register(); ++ Register cmpval = _cmp_value->as_register(); ++ Register tmp1 = _tmp1->as_register(); ++ Register tmp2 = _tmp2->as_register(); ++ Register result = result_opr()->as_register(); + -+ // template interpreter will leave the result on the top of the stack. -+ push(state); -+ lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset())); -+ beqz(x13, L); -+ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); -+ bind(L); -+ pop(state); -+ } ++ ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1); + -+ { -+ SkipIfEqual skip(this, &DTraceMethodProbes, false); -+ push(state); -+ get_method(c_rarg1); -+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), -+ xthread, c_rarg1); -+ pop(state); ++ if (UseCompressedOops) { ++ __ encode_heap_oop(tmp1, cmpval); ++ cmpval = tmp1; ++ __ encode_heap_oop(tmp2, newval); ++ newval = tmp2; + } -+} -+ + -+// Jump if ((*counter_addr += increment) & mask) satisfies the condition. -+void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, -+ int increment, Address mask, -+ Register tmp1, Register tmp2, -+ bool preloaded, Label* where) { -+ Label done; -+ if (!preloaded) { -+ lwu(tmp1, counter_addr); -+ } -+ add(tmp1, tmp1, increment); -+ sw(tmp1, counter_addr); -+ lwu(tmp2, mask); -+ andr(tmp1, tmp1, tmp2); -+ bnez(tmp1, done); -+ j(*where); // offset is too large so we have to use j instead of beqz here -+ bind(done); ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq, ++ /* release */ Assembler::rl, /* is_cae */ false, result); +} + -+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, -+ int number_of_arguments) { -+ // interpreter specific -+ // -+ // Note: No need to save/restore rbcp & rlocals pointer since these -+ // are callee saved registers and no blocking/ GC can happen -+ // in leaf calls. -+#ifdef ASSERT -+ { -+ Label L; -+ ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); -+ beqz(t0, L); -+ stop("InterpreterMacroAssembler::call_VM_leaf_base:" -+ " last_sp != NULL"); -+ bind(L); -+ } -+#endif /* ASSERT */ -+ // super call -+ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); -+} ++#undef __ + -+void InterpreterMacroAssembler::call_VM_base(Register oop_result, -+ Register java_thread, -+ Register last_java_sp, -+ address entry_point, -+ int number_of_arguments, -+ bool check_exceptions) { -+ // interpreter specific -+ // -+ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't -+ // really make a difference for these runtime calls, since they are -+ // slow anyway. Btw., bcp must be saved/restored since it may change -+ // due to GC. -+ save_bcp(); +#ifdef ASSERT -+ { -+ Label L; -+ ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); -+ beqz(t0, L); -+ stop("InterpreterMacroAssembler::call_VM_base:" -+ " last_sp != NULL"); -+ bind(L); -+ } -+#endif /* ASSERT */ -+ // super call -+ MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp, -+ entry_point, number_of_arguments, -+ check_exceptions); -+// interpreter specific -+ restore_bcp(); -+ restore_locals(); -+} -+ -+void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) { -+ assert_different_registers(obj, tmp, t0, mdo_addr.base()); -+ Label update, next, none; ++#define __ gen->lir(__FILE__, __LINE__)-> ++#else ++#define __ gen->lir()-> ++#endif + -+ verify_oop(obj); ++LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) { ++ BasicType bt = access.type(); ++ if (access.is_oop()) { ++ LIRGenerator *gen = access.gen(); ++ if (ShenandoahSATBBarrier) { ++ pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(), ++ LIR_OprFact::illegalOpr /* pre_val */); ++ } ++ if (ShenandoahCASBarrier) { ++ cmp_value.load_item(); ++ new_value.load_item(); + -+ bnez(obj, update); -+ orptr(mdo_addr, TypeEntries::null_seen, t0, tmp); -+ j(next); ++ LIR_Opr tmp1 = gen->new_register(T_OBJECT); ++ LIR_Opr tmp2 = gen->new_register(T_OBJECT); ++ LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base(); ++ LIR_Opr result = gen->new_register(T_INT); + -+ bind(update); -+ load_klass(obj, obj); ++ __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result)); ++ return result; ++ } ++ } ++ return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value); ++} + -+ ld(t0, mdo_addr); -+ xorr(obj, obj, t0); -+ andi(t0, obj, TypeEntries::type_klass_mask); -+ beqz(t0, next); // klass seen before, nothing to -+ // do. The unknown bit may have been -+ // set already but no need to check. ++LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) { ++ LIRGenerator* gen = access.gen(); ++ BasicType type = access.type(); + -+ andi(t0, obj, TypeEntries::type_unknown); -+ bnez(t0, next); -+ // already unknown. Nothing to do anymore. ++ LIR_Opr result = gen->new_register(type); ++ value.load_item(); ++ LIR_Opr value_opr = value.result(); + -+ ld(t0, mdo_addr); -+ beqz(t0, none); -+ mv(tmp, (u1)TypeEntries::null_seen); -+ beq(t0, tmp, none); -+ // There is a chance that the checks above (re-reading profiling -+ // data from memory) fail if another thread has just set the -+ // profiling to this obj's klass -+ ld(t0, mdo_addr); -+ xorr(obj, obj, t0); -+ andi(t0, obj, TypeEntries::type_klass_mask); -+ beqz(t0, next); ++ if (access.is_oop()) { ++ value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators()); ++ } + -+ // different than before. Cannot keep accurate profile. -+ orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp); -+ j(next); ++ assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type"); ++ LIR_Opr tmp = gen->new_register(T_INT); ++ __ xchg(access.resolved_addr(), value_opr, result, tmp); + -+ bind(none); -+ // first time here. Set profile type. -+ sd(obj, mdo_addr); ++ if (access.is_oop()) { ++ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators()); ++ LIR_Opr tmp_opr = gen->new_register(type); ++ __ move(result, tmp_opr); ++ result = tmp_opr; ++ if (ShenandoahSATBBarrier) { ++ pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr, ++ result /* pre_val */); ++ } ++ } + -+ bind(next); ++ return result; +} +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +new file mode 100644 +index 00000000000..d0ac6e52436 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +@@ -0,0 +1,712 @@ ++/* ++ * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { -+ if (!ProfileInterpreter) { -+ return; -+ } ++#include "precompiled.hpp" ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" ++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" ++#include "gc/shenandoah/shenandoahForwarding.hpp" ++#include "gc/shenandoah/shenandoahHeap.inline.hpp" ++#include "gc/shenandoah/shenandoahHeapRegion.hpp" ++#include "gc/shenandoah/shenandoahRuntime.hpp" ++#include "gc/shenandoah/shenandoahThreadLocalData.hpp" ++#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" ++#endif + -+ if (MethodData::profile_arguments() || MethodData::profile_return()) { -+ Label profile_continue; ++#define __ masm-> + -+ test_method_data_pointer(mdp, profile_continue); ++void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs) { ++ if (is_oop) { ++ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; ++ if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) { + -+ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); ++ Label done; + -+ lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start)); -+ if (is_virtual) { -+ mv(tmp, (u1)DataLayout::virtual_call_type_data_tag); -+ bne(t0, tmp, profile_continue); -+ } else { -+ mv(tmp, (u1)DataLayout::call_type_data_tag); -+ bne(t0, tmp, profile_continue); -+ } ++ // Avoid calling runtime if count == 0 ++ __ beqz(count, done); + -+ // calculate slot step -+ static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0)); -+ static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0; -+ -+ // calculate type step -+ static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0)); -+ static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0; -+ -+ if (MethodData::profile_arguments()) { -+ Label done, loop, loopEnd, profileArgument, profileReturnType; -+ RegSet pushed_registers; -+ pushed_registers += x15; -+ pushed_registers += x16; -+ pushed_registers += x17; -+ Register mdo_addr = x15; -+ Register index = x16; -+ Register off_to_args = x17; -+ push_reg(pushed_registers, sp); -+ -+ mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset())); -+ mv(t0, TypeProfileArgsLimit); -+ beqz(t0, loopEnd); -+ -+ mv(index, zr); // index < TypeProfileArgsLimit -+ bind(loop); -+ bgtz(index, profileReturnType); -+ mv(t0, (int)MethodData::profile_return()); -+ beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false -+ bind(profileReturnType); -+ // If return value type is profiled we may have no argument to profile -+ ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); -+ mv(t1, - TypeStackSlotEntries::per_arg_count()); -+ mul(t1, index, t1); -+ add(tmp, tmp, t1); -+ mv(t1, TypeStackSlotEntries::per_arg_count()); -+ add(t0, mdp, off_to_args); -+ blt(tmp, t1, done); -+ -+ bind(profileArgument); -+ -+ ld(tmp, Address(callee, Method::const_offset())); -+ load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset())); -+ // stack offset o (zero based) from the start of the argument -+ // list, for n arguments translates into offset n - o - 1 from -+ // the end of the argument list -+ mv(t0, stack_slot_offset0); -+ mv(t1, slot_step); -+ mul(t1, index, t1); -+ add(t0, t0, t1); -+ add(t0, mdp, t0); -+ ld(t0, Address(t0)); -+ sub(tmp, tmp, t0); -+ addi(tmp, tmp, -1); -+ Address arg_addr = argument_address(tmp); -+ ld(tmp, arg_addr); -+ -+ mv(t0, argument_type_offset0); -+ mv(t1, type_step); -+ mul(t1, index, t1); -+ add(t0, t0, t1); -+ add(mdo_addr, mdp, t0); -+ Address mdo_arg_addr(mdo_addr, 0); -+ profile_obj_type(tmp, mdo_arg_addr, t1); -+ -+ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); -+ addi(off_to_args, off_to_args, to_add); -+ -+ // increment index by 1 -+ addi(index, index, 1); -+ mv(t1, TypeProfileArgsLimit); -+ blt(index, t1, loop); -+ bind(loopEnd); ++ // Is GC active? ++ Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); ++ assert_different_registers(src, dst, count, t0); + -+ if (MethodData::profile_return()) { -+ ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); -+ addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count()); ++ __ lbu(t0, gc_state); ++ if (ShenandoahSATBBarrier && dest_uninitialized) { ++ __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED); ++ __ beqz(t0, done); ++ } else { ++ __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING); ++ __ beqz(t0, done); + } + -+ add(t0, mdp, off_to_args); -+ bind(done); -+ mv(mdp, t0); -+ -+ // unspill the clobbered registers -+ pop_reg(pushed_registers, sp); -+ -+ if (MethodData::profile_return()) { -+ // We're right after the type profile for the last -+ // argument. tmp is the number of cells left in the -+ // CallTypeData/VirtualCallTypeData to reach its end. Non null -+ // if there's a return to profile. -+ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); -+ shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size)); ++ __ push_reg(saved_regs, sp); ++ if (UseCompressedOops) { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), ++ src, dst, count); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count); + } -+ sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); -+ } else { -+ assert(MethodData::profile_return(), "either profile call args or call ret"); -+ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); ++ __ pop_reg(saved_regs, sp); ++ __ bind(done); + } ++ } ++} + -+ // mdp points right after the end of the -+ // CallTypeData/VirtualCallTypeData, right after the cells for the -+ // return value type if there's one -+ -+ bind(profile_continue); ++void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ if (ShenandoahSATBBarrier) { ++ satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); + } +} + -+void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { -+ assert_different_registers(mdp, ret, tmp, xbcp, t0, t1); -+ if (ProfileInterpreter && MethodData::profile_return()) { -+ Label profile_continue, done; ++void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ assert(thread == xthread, "must be"); + -+ test_method_data_pointer(mdp, profile_continue); ++ Label done; ++ Label runtime; + -+ if (MethodData::profile_return_jsr292_only()) { -+ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ assert_different_registers(obj, pre_val, tmp, t0); ++ assert(pre_val != noreg && tmp != noreg, "expecting a register"); + -+ // If we don't profile all invoke bytecodes we must make sure -+ // it's a bytecode we indeed profile. We can't go back to the -+ // begining of the ProfileData we intend to update to check its -+ // type because we're right after it and we don't known its -+ // length -+ Label do_profile; -+ lbu(t0, Address(xbcp, 0)); -+ mv(tmp, (u1)Bytecodes::_invokedynamic); -+ beq(t0, tmp, do_profile); -+ mv(tmp, (u1)Bytecodes::_invokehandle); -+ beq(t0, tmp, do_profile); -+ get_method(tmp); -+ lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes())); -+ mv(t1, vmIntrinsics::_compiledLambdaForm); -+ bne(t0, t1, profile_continue); -+ bind(do_profile); -+ } ++ Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); ++ Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); + -+ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); -+ mv(tmp, ret); -+ profile_obj_type(tmp, mdo_ret_addr, t1); ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ lwu(tmp, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lbu(tmp, in_progress); ++ } ++ __ beqz(tmp, done); + -+ bind(profile_continue); ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); + } -+} + -+void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) { -+ assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3); -+ if (ProfileInterpreter && MethodData::profile_parameters()) { -+ Label profile_continue, done; ++ // Is the previous value null? ++ __ beqz(pre_val, done); + -+ test_method_data_pointer(mdp, profile_continue); ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) ++ __ ld(tmp, index); // tmp := *index_adr ++ __ beqz(tmp, runtime); // tmp == 0? If yes, goto runtime + -+ // Load the offset of the area within the MDO used for -+ // parameters. If it's negative we're not profiling any parameters -+ lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()))); -+ srli(tmp2, tmp1, 31); -+ bnez(tmp2, profile_continue); // i.e. sign bit set ++ __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize ++ __ sd(tmp, index); // *index_adr := tmp ++ __ ld(t0, buffer); ++ __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr + -+ // Compute a pointer to the area for parameters from the offset -+ // and move the pointer to the slot for the last -+ // parameters. Collect profiling from last parameter down. -+ // mdo start + parameters offset + array length - 1 -+ add(mdp, mdp, tmp1); -+ ld(tmp1, Address(mdp, ArrayData::array_len_offset())); -+ add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); ++ // Record the previous value ++ __ sd(pre_val, Address(tmp, 0)); ++ __ j(done); + -+ Label loop; -+ bind(loop); ++ __ bind(runtime); ++ // save the live input values ++ RegSet saved = RegSet::of(pre_val); ++ if (tosca_live) saved += RegSet::of(x10); ++ if (obj != noreg) saved += RegSet::of(obj); + -+ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); -+ int type_base = in_bytes(ParametersTypeData::type_offset(0)); -+ int per_arg_scale = exact_log2(DataLayout::cell_size); -+ add(t0, mdp, off_base); -+ add(t1, mdp, type_base); ++ __ push_reg(saved, sp); + ++ // Calling the runtime using the regular call_VM_leaf mechanism generates ++ // code (generated by InterpreterMacroAssember::call_VM_leaf_base) ++ // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL. ++ // ++ // If we care generating the pre-barrier without a frame (e.g. in the ++ // intrinsified Reference.get() routine) then ebp might be pointing to ++ // the caller frame and so this check will most likely fail at runtime. ++ // ++ // Expanding the call directly bypasses the generation of the check. ++ // So when we do not have have a full interpreter frame on the stack ++ // expand_call should be passed true. ++ if (expand_call) { ++ assert(pre_val != c_rarg1, "smashed arg"); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } + -+ shadd(tmp2, tmp1, t0, tmp2, per_arg_scale); -+ // load offset on the stack from the slot for this parameter -+ ld(tmp2, Address(tmp2, 0)); -+ neg(tmp2, tmp2); ++ __ pop_reg(saved, sp); + -+ // read the parameter from the local area -+ shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize); -+ ld(tmp2, Address(tmp2, 0)); ++ __ bind(done); ++} + -+ // profile the parameter -+ shadd(t1, tmp1, t1, t0, per_arg_scale); -+ Address arg_type(t1, 0); -+ profile_obj_type(tmp2, arg_type, tmp3); ++void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { ++ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); + -+ // go to next parameter -+ add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); -+ bgez(tmp1, loop); ++ Label is_null; ++ __ beqz(dst, is_null); ++ resolve_forward_pointer_not_null(masm, dst, tmp); ++ __ bind(is_null); ++} + -+ bind(profile_continue); ++// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely ++// passed in. ++void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { ++ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); ++ // The below loads the mark word, checks if the lowest two bits are ++ // set, and if so, clear the lowest two bits and copy the result ++ // to dst. Otherwise it leaves dst alone. ++ // Implementing this is surprisingly awkward. I do it here by: ++ // - Inverting the mark word ++ // - Test lowest two bits == 0 ++ // - If so, set the lowest two bits ++ // - Invert the result back, and copy to dst ++ RegSet saved_regs = RegSet::of(t2); ++ bool borrow_reg = (tmp == noreg); ++ if (borrow_reg) { ++ // No free registers available. Make one useful. ++ tmp = t0; ++ if (tmp == dst) { ++ tmp = t1; ++ } ++ saved_regs += RegSet::of(tmp); + } -+} + -+void InterpreterMacroAssembler::get_method_counters(Register method, -+ Register mcs, Label& skip) { -+ Label has_counters; -+ ld(mcs, Address(method, Method::method_counters_offset())); -+ bnez(mcs, has_counters); -+ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::build_method_counters), method); -+ ld(mcs, Address(method, Method::method_counters_offset())); -+ beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory -+ bind(has_counters); -+} ++ assert_different_registers(tmp, dst, t2); ++ __ push_reg(saved_regs, sp); + -+#ifdef ASSERT -+void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits, -+ const char* msg, bool stop_by_hit) { -+ Label L; -+ andi(t0, access_flags, flag_bits); -+ if (stop_by_hit) { -+ beqz(t0, L); -+ } else { -+ bnez(t0, L); -+ } -+ stop(msg); -+ bind(L); -+} ++ Label done; ++ __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); ++ __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1 ++ __ andi(t2, tmp, markWord::lock_mask_in_place); ++ __ bnez(t2, done); ++ __ ori(tmp, tmp, markWord::marked_value); ++ __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1 ++ __ bind(done); + -+void InterpreterMacroAssembler::verify_frame_setup() { -+ Label L; -+ const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); -+ ld(t0, monitor_block_top); -+ beq(esp, t0, L); -+ stop("broken stack frame setup in interpreter"); -+ bind(L); ++ __ pop_reg(saved_regs, sp); +} -+#endif -diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp -new file mode 100644 -index 000000000..042ee8280 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp -@@ -0,0 +1,283 @@ -+/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP -+#define CPU_RISCV_INTERP_MASM_RISCV_HPP + -+#include "asm/macroAssembler.hpp" -+#include "interpreter/invocationCounter.hpp" -+#include "runtime/frame.hpp" ++void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, ++ Register dst, ++ Address load_addr, ++ DecoratorSet decorators) { ++ assert(ShenandoahLoadRefBarrier, "Should be enabled"); ++ assert(dst != t1 && load_addr.base() != t1, "need t1"); ++ assert_different_registers(load_addr.base(), t0, t1); + -+// This file specializes the assember with interpreter-specific macros ++ bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); ++ bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); ++ bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); ++ bool is_native = ShenandoahBarrierSet::is_native_access(decorators); ++ bool is_narrow = UseCompressedOops && !is_native; + -+typedef ByteSize (*OffsetFunction)(uint); ++ Label heap_stable, not_cset; ++ __ enter(); ++ Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); ++ __ lbu(t1, gc_state); + -+class InterpreterMacroAssembler: public MacroAssembler { -+ protected: -+ // Interpreter specific version of call_VM_base -+ using MacroAssembler::call_VM_leaf_base; ++ // Check for heap stability ++ if (is_strong) { ++ __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED); ++ __ beqz(t1, heap_stable); ++ } else { ++ Label lrb; ++ __ andi(t0, t1, ShenandoahHeap::WEAK_ROOTS); ++ __ bnez(t0, lrb); ++ __ andi(t0, t1, ShenandoahHeap::HAS_FORWARDED); ++ __ beqz(t0, heap_stable); ++ __ bind(lrb); ++ } + -+ virtual void call_VM_leaf_base(address entry_point, -+ int number_of_arguments); ++ // use x11 for load address ++ Register result_dst = dst; ++ if (dst == x11) { ++ __ mv(t1, dst); ++ dst = t1; ++ } + -+ virtual void call_VM_base(Register oop_result, -+ Register java_thread, -+ Register last_java_sp, -+ address entry_point, -+ int number_of_arguments, -+ bool check_exceptions); ++ // Save x10 and x11, unless it is an output register ++ RegSet saved_regs = RegSet::of(x10, x11) - result_dst; ++ __ push_reg(saved_regs, sp); ++ __ la(x11, load_addr); ++ __ mv(x10, dst); + -+ // base routine for all dispatches -+ void dispatch_base(TosState state, address* table, bool verifyoop = true, -+ bool generate_poll = false, Register Rs = t0); ++ // Test for in-cset ++ if (is_strong) { ++ __ li(t1, (uint64_t)ShenandoahHeap::in_cset_fast_test_addr()); ++ __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint()); ++ __ add(t1, t1, t0); ++ __ lbu(t1, Address(t1)); ++ __ andi(t0, t1, 1); ++ __ beqz(t0, not_cset); ++ } + -+ public: -+ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {} -+ virtual ~InterpreterMacroAssembler() {} ++ __ push_call_clobbered_registers(); ++ if (is_strong) { ++ if (is_narrow) { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); ++ } else { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); ++ } ++ } else if (is_weak) { ++ if (is_narrow) { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); ++ } else { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); ++ } ++ } else { ++ assert(is_phantom, "only remaining strength"); ++ assert(!is_narrow, "phantom access cannot be narrow"); ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); ++ } ++ __ jalr(ra); ++ __ mv(t0, x10); ++ __ pop_call_clobbered_registers(); ++ __ mv(x10, t0); ++ __ bind(not_cset); ++ __ mv(result_dst, x10); ++ __ pop_reg(saved_regs, sp); + -+ void load_earlyret_value(TosState state); ++ __ bind(heap_stable); ++ __ leave(); ++} + -+ void jump_to_entry(address entry); ++void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) { ++ if (ShenandoahIUBarrier) { ++ __ push_call_clobbered_registers(); + -+ virtual void check_and_handle_popframe(Register java_thread); -+ virtual void check_and_handle_earlyret(Register java_thread); ++ satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false); + -+ // Interpreter-specific registers -+ void save_bcp() { -+ sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize)); ++ __ pop_call_clobbered_registers(); + } ++} + -+ void restore_bcp() { -+ ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize)); ++// ++// Arguments: ++// ++// Inputs: ++// src: oop location to load from, might be clobbered ++// ++// Output: ++// dst: oop loaded from src location ++// ++// Kill: ++// x30 (tmp reg) ++// ++// Alias: ++// dst: x30 (might use x30 as temporary output register to avoid clobbering src) ++// ++void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, ++ DecoratorSet decorators, ++ BasicType type, ++ Register dst, ++ Address src, ++ Register tmp1, ++ Register tmp_thread) { ++ // 1: non-reference load, no additional barrier is needed ++ if (!is_reference_type(type)) { ++ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ return; + } + -+ void restore_locals() { -+ ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize)); -+ } ++ // 2: load a reference from src location and apply LRB if needed ++ if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { ++ Register result_dst = dst; + -+ void restore_constant_pool_cache() { -+ ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); -+ } ++ // Preserve src location for LRB ++ RegSet saved_regs; ++ if (dst == src.base()) { ++ dst = (src.base() == x28) ? x29 : x28; ++ saved_regs = RegSet::of(dst); ++ __ push_reg(saved_regs, sp); ++ } ++ assert_different_registers(dst, src.base()); + -+ void get_dispatch(); ++ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + -+ // Helpers for runtime call arguments/results -+ void get_method(Register reg) { -+ ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize)); -+ } ++ load_reference_barrier(masm, dst, src, decorators); + -+ void get_const(Register reg) { -+ get_method(reg); -+ ld(reg, Address(reg, in_bytes(Method::const_offset()))); -+ } ++ if (dst != result_dst) { ++ __ mv(result_dst, dst); ++ dst = result_dst; ++ } + -+ void get_constant_pool(Register reg) { -+ get_const(reg); -+ ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset()))); ++ if (saved_regs.bits() != 0) { ++ __ pop_reg(saved_regs, sp); ++ } ++ } else { ++ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + } + -+ void get_constant_pool_cache(Register reg) { -+ get_constant_pool(reg); -+ ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes())); ++ // 3: apply keep-alive barrier if needed ++ if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { ++ __ enter(); ++ __ push_call_clobbered_registers(); ++ satb_write_barrier_pre(masm /* masm */, ++ noreg /* obj */, ++ dst /* pre_val */, ++ xthread /* thread */, ++ tmp1 /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ __ pop_call_clobbered_registers(); ++ __ leave(); + } ++} + -+ void get_cpool_and_tags(Register cpool, Register tags) { -+ get_constant_pool(cpool); -+ ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes())); ++void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool on_oop = is_reference_type(type); ++ if (!on_oop) { ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ return; + } + -+ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); -+ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); -+ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); -+ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); -+ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); -+ void get_method_counters(Register method, Register mcs, Label& skip); ++ // flatten object address if needed ++ if (dst.offset() == 0) { ++ if (dst.base() != x13) { ++ __ mv(x13, dst.base()); ++ } ++ } else { ++ __ la(x13, dst); ++ } + -+ // Load cpool->resolved_references(index). -+ void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15); ++ shenandoah_write_barrier_pre(masm, ++ x13 /* obj */, ++ tmp2 /* pre_val */, ++ xthread /* thread */, ++ tmp1 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); + -+ // Load cpool->resolved_klass_at(index). -+ void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp); ++ if (val == noreg) { ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg); ++ } else { ++ iu_barrier(masm, val, tmp1); ++ // G1 barrier needs uncompressed oop for region cross check. ++ Register new_val = val; ++ if (UseCompressedOops) { ++ new_val = t1; ++ __ mv(new_val, val); ++ } ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg); ++ } ++} + -+ void pop_ptr(Register r = x10); -+ void pop_i(Register r = x10); -+ void pop_l(Register r = x10); -+ void pop_f(FloatRegister r = f10); -+ void pop_d(FloatRegister r = f10); -+ void push_ptr(Register r = x10); -+ void push_i(Register r = x10); -+ void push_l(Register r = x10); -+ void push_f(FloatRegister r = f10); -+ void push_d(FloatRegister r = f10); ++void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath) { ++ Label done; ++ // Resolve jobject ++ BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); + -+ void pop(TosState state); // transition vtos -> state -+ void push(TosState state); // transition state -> vtos ++ // Check for null. ++ __ beqz(obj, done); + -+ void empty_expression_stack() { -+ ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize)); -+ // NULL last_sp until next java call -+ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ assert(obj != t1, "need t1"); ++ Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); ++ __ lbu(t1, gc_state); ++ ++ // Check for heap in evacuation phase ++ __ andi(t0, t1, ShenandoahHeap::EVACUATION); ++ __ bnez(t0, slowpath); ++ ++ __ bind(done); ++} ++ ++// Special Shenandoah CAS implementation that handles false negatives due ++// to concurrent evacuation. The service is more complex than a ++// traditional CAS operation because the CAS operation is intended to ++// succeed if the reference at addr exactly matches expected or if the ++// reference at addr holds a pointer to a from-space object that has ++// been relocated to the location named by expected. There are two ++// races that must be addressed: ++// a) A parallel thread may mutate the contents of addr so that it points ++// to a different object. In this case, the CAS operation should fail. ++// b) A parallel thread may heal the contents of addr, replacing a ++// from-space pointer held in addr with the to-space pointer ++// representing the new location of the object. ++// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL ++// or it refers to an object that is not being evacuated out of ++// from-space, or it refers to the to-space version of an object that ++// is being evacuated out of from-space. ++// ++// By default the value held in the result register following execution ++// of the generated code sequence is 0 to indicate failure of CAS, ++// non-zero to indicate success. If is_cae, the result is the value most ++// recently fetched from addr rather than a boolean success indicator. ++// ++// Clobbers t0, t1 ++void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, ++ Register addr, ++ Register expected, ++ Register new_val, ++ Assembler::Aqrl acquire, ++ Assembler::Aqrl release, ++ bool is_cae, ++ Register result) { ++ bool is_narrow = UseCompressedOops; ++ Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64; ++ ++ assert_different_registers(addr, expected, t0, t1); ++ assert_different_registers(addr, new_val, t0, t1); ++ ++ Label retry, success, fail, done; ++ ++ __ bind(retry); ++ ++ // Step1: Try to CAS. ++ __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1); ++ ++ // If success, then we are done. ++ __ beq(expected, t1, success); ++ ++ // Step2: CAS failed, check the forwared pointer. ++ __ mv(t0, t1); ++ ++ if (is_narrow) { ++ __ decode_heap_oop(t0, t0); + } ++ resolve_forward_pointer(masm, t0); + -+ // Helpers for swap and dup -+ void load_ptr(int n, Register val); -+ void store_ptr(int n, Register val); ++ __ encode_heap_oop(t0, t0); + -+// Load float value from 'address'. The value is loaded onto the FPU register v0. -+ void load_float(Address src); -+ void load_double(Address src); ++ // Report failure when the forwarded oop was not expected. ++ __ bne(t0, expected, fail); + -+ // Generate a subtype check: branch to ok_is_subtype if sub_klass is -+ // a subtype of super_klass. -+ void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); ++ // Step 3: CAS again using the forwarded oop. ++ __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0); + -+ // Dispatching -+ void dispatch_prolog(TosState state, int step = 0); -+ void dispatch_epilog(TosState state, int step = 0); -+ // dispatch via t0 -+ void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0); -+ // dispatch normal table via t0 (assume t0 is loaded already) -+ void dispatch_only_normal(TosState state, Register Rs = t0); -+ void dispatch_only_noverify(TosState state, Register Rs = t0); -+ // load t0 from [xbcp + step] and dispatch via t0 -+ void dispatch_next(TosState state, int step = 0, bool generate_poll = false); -+ // load t0 from [xbcp] and dispatch via t0 and table -+ void dispatch_via (TosState state, address* table); ++ // Retry when failed. ++ __ bne(t0, t1, retry); + -+ // jump to an invoked target -+ void prepare_to_jump_from_interpreted(); -+ void jump_from_interpreted(Register method); ++ __ bind(success); ++ if (is_cae) { ++ __ mv(result, expected); ++ } else { ++ __ addi(result, zr, 1); ++ } ++ __ j(done); + ++ __ bind(fail); ++ if (is_cae) { ++ __ mv(result, t0); ++ } else { ++ __ mv(result, zr); ++ } + -+ // Returning from interpreted functions -+ // -+ // Removes the current activation (incl. unlocking of monitors) -+ // and sets up the return address. This code is also used for -+ // exception unwindwing. In that case, we do not want to throw -+ // IllegalMonitorStateExceptions, since that might get us into an -+ // infinite rethrow exception loop. -+ // Additionally this code is used for popFrame and earlyReturn. -+ // In popFrame case we want to skip throwing an exception, -+ // installing an exception, and notifying jvmdi. -+ // In earlyReturn case we only want to skip throwing an exception -+ // and installing an exception. -+ void remove_activation(TosState state, -+ bool throw_monitor_exception = true, -+ bool install_monitor_exception = true, -+ bool notify_jvmdi = true); ++ __ bind(done); ++} + -+ // FIXME: Give us a valid frame at a null check. -+ virtual void null_check(Register reg, int offset = -1) { -+ MacroAssembler::null_check(reg, offset); ++#undef __ ++ ++#ifdef COMPILER1 ++ ++#define __ ce->masm()-> ++ ++void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { ++ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ // At this point we know that marking is in progress. ++ // If do_load() is true then we have to emit the ++ // load of the previous value; otherwise it has already ++ // been loaded into _pre_val. ++ __ bind(*stub->entry()); ++ ++ assert(stub->pre_val()->is_register(), "Precondition."); ++ ++ Register pre_val_reg = stub->pre_val()->as_register(); ++ ++ if (stub->do_load()) { ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */); + } ++ __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); ++ ce->store_parameter(stub->pre_val()->as_register(), 0); ++ __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); ++ __ j(*stub->continuation()); ++} + -+ // Object locking -+ void lock_object (Register lock_reg); -+ void unlock_object(Register lock_reg); ++void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ++ ShenandoahLoadReferenceBarrierStub* stub) { ++ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ __ bind(*stub->entry()); + -+ // Interpreter profiling operations -+ void set_method_data_pointer_for_bcp(); -+ void test_method_data_pointer(Register mdp, Label& zero_continue); -+ void verify_method_data_pointer(); ++ DecoratorSet decorators = stub->decorators(); ++ bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); ++ bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); ++ bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); ++ bool is_native = ShenandoahBarrierSet::is_native_access(decorators); + -+ void set_mdp_data_at(Register mdp_in, int constant, Register value); -+ void increment_mdp_data_at(Address data, bool decrement = false); -+ void increment_mdp_data_at(Register mdp_in, int constant, -+ bool decrement = false); -+ void increment_mdp_data_at(Register mdp_in, Register reg, int constant, -+ bool decrement = false); -+ void increment_mask_and_jump(Address counter_addr, -+ int increment, Address mask, -+ Register tmp1, Register tmp2, -+ bool preloaded, Label* where); ++ Register obj = stub->obj()->as_register(); ++ Register res = stub->result()->as_register(); ++ Register addr = stub->addr()->as_pointer_register(); ++ Register tmp1 = stub->tmp1()->as_register(); ++ Register tmp2 = stub->tmp2()->as_register(); + -+ void set_mdp_flag_at(Register mdp_in, int flag_constant); -+ void test_mdp_data_at(Register mdp_in, int offset, Register value, -+ Register test_value_out, -+ Label& not_equal_continue); ++ assert(res == x10, "result must arrive in x10"); ++ assert_different_registers(tmp1, tmp2, t0); + -+ void record_klass_in_profile(Register receiver, Register mdp, -+ Register reg2, bool is_virtual_call); -+ void record_klass_in_profile_helper(Register receiver, Register mdp, -+ Register reg2, -+ Label& done, bool is_virtual_call); -+ void record_item_in_profile_helper(Register item, Register mdp, -+ Register reg2, int start_row, Label& done, int total_rows, -+ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, -+ int non_profiled_offset); ++ if (res != obj) { ++ __ mv(res, obj); ++ } + -+ void update_mdp_by_offset(Register mdp_in, int offset_of_offset); -+ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); -+ void update_mdp_by_constant(Register mdp_in, int constant); -+ void update_mdp_for_ret(Register return_bci); ++ if (is_strong) { ++ // Check for object in cset. ++ __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); ++ __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); ++ __ add(tmp2, tmp2, tmp1); ++ __ lbu(tmp2, Address(tmp2)); ++ __ beqz(tmp2, *stub->continuation(), true /* is_far */); ++ } + -+ // narrow int return value -+ void narrow(Register result); ++ ce->store_parameter(res, 0); ++ ce->store_parameter(addr, 1); + -+ void profile_taken_branch(Register mdp, Register bumped_count); -+ void profile_not_taken_branch(Register mdp); -+ void profile_call(Register mdp); -+ void profile_final_call(Register mdp); -+ void profile_virtual_call(Register receiver, Register mdp, -+ Register t1, -+ bool receiver_can_be_null = false); -+ void profile_ret(Register return_bci, Register mdp); -+ void profile_null_seen(Register mdp); -+ void profile_typecheck(Register mdp, Register klass, Register temp); -+ void profile_typecheck_failed(Register mdp); -+ void profile_switch_default(Register mdp); -+ void profile_switch_case(Register index_in_scratch, Register mdp, -+ Register temp); ++ if (is_strong) { ++ if (is_native) { ++ __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin())); ++ } else { ++ __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin())); ++ } ++ } else if (is_weak) { ++ __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin())); ++ } else { ++ assert(is_phantom, "only remaining strength"); ++ __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin())); ++ } + -+ void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp); -+ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); -+ void profile_return_type(Register mdp, Register ret, Register tmp); -+ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3); ++ __ j(*stub->continuation()); ++} + -+ // Debugging -+ // only if +VerifyFPU && (state == ftos || state == dtos) -+ void verify_FPU(int stack_depth, TosState state = ftos); ++#undef __ + -+ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; ++#define __ sasm-> + -+ // support for jvmti/dtrace -+ void notify_method_entry(); -+ void notify_method_exit(TosState state, NotifyMethodExitMode mode); ++void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("shenandoah_pre_barrier", false); + -+ virtual void _call_Unimplemented(address call_site) { -+ save_bcp(); -+ set_last_Java_frame(esp, fp, (address) pc(), t0); -+ MacroAssembler::_call_Unimplemented(call_site); ++ // arg0 : previous value of memory ++ ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ ++ const Register pre_val = x10; ++ const Register thread = xthread; ++ const Register tmp = t0; ++ ++ Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ Label done; ++ Label runtime; ++ ++ // Is marking still active? ++ Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); ++ __ lb(tmp, gc_state); ++ __ andi(tmp, tmp, ShenandoahHeap::MARKING); ++ __ beqz(tmp, done); ++ ++ // Can we store original value in the thread's buffer? ++ __ ld(tmp, queue_index); ++ __ beqz(tmp, runtime); ++ ++ __ sub(tmp, tmp, wordSize); ++ __ sd(tmp, queue_index); ++ __ ld(t1, buffer); ++ __ add(tmp, tmp, t1); ++ __ load_parameter(0, t1); ++ __ sd(t1, Address(tmp, 0)); ++ __ j(done); ++ ++ __ bind(runtime); ++ __ push_call_clobbered_registers(); ++ __ load_parameter(0, pre_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); ++ __ pop_call_clobbered_registers(); ++ __ bind(done); ++ ++ __ epilogue(); ++} ++ ++void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, ++ DecoratorSet decorators) { ++ __ prologue("shenandoah_load_reference_barrier", false); ++ // arg0 : object to be resolved ++ ++ __ push_call_clobbered_registers(); ++ __ load_parameter(0, x10); ++ __ load_parameter(1, x11); ++ ++ bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); ++ bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); ++ bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); ++ bool is_native = ShenandoahBarrierSet::is_native_access(decorators); ++ if (is_strong) { ++ if (is_native) { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); ++ } else { ++ if (UseCompressedOops) { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); ++ } else { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); ++ } ++ } ++ } else if (is_weak) { ++ assert(!is_native, "weak must not be called off-heap"); ++ if (UseCompressedOops) { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); ++ } else { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); ++ } ++ } else { ++ assert(is_phantom, "only remaining strength"); ++ assert(is_native, "phantom must only be called off-heap"); ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_phantom); + } ++ __ jalr(ra); ++ __ mv(t0, x10); ++ __ pop_call_clobbered_registers(); ++ __ mv(x10, t0); + -+#ifdef ASSERT -+ void verify_access_flags(Register access_flags, uint32_t flag_bits, -+ const char* msg, bool stop_by_hit = true); -+ void verify_frame_setup(); -+#endif -+}; ++ __ epilogue(); ++} + -+#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp ++#undef __ ++ ++#endif // COMPILER1 +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp new file mode 100644 -index 000000000..777f326e3 +index 00000000000..a705f497667 --- /dev/null -+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp -@@ -0,0 +1,296 @@ ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp +@@ -0,0 +1,88 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -17606,455 +17721,369 @@ index 000000000..777f326e3 + * + */ + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "interpreter/interp_masm.hpp" -+#include "interpreter/interpreter.hpp" -+#include "interpreter/interpreterRuntime.hpp" -+#include "memory/allocation.inline.hpp" -+#include "memory/universe.hpp" -+#include "oops/method.hpp" -+#include "oops/oop.inline.hpp" -+#include "runtime/handles.inline.hpp" -+#include "runtime/icache.hpp" -+#include "runtime/interfaceSupport.inline.hpp" -+#include "runtime/signature.hpp" -+ -+#define __ _masm-> -+ -+// Implementation of SignatureHandlerGenerator -+Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; } -+Register InterpreterRuntime::SignatureHandlerGenerator::to() { return sp; } -+Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; } -+ -+Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() { -+ if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { -+ return g_INTArgReg[++_num_reg_int_args]; -+ } -+ return noreg; -+} ++#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP + -+FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() { -+ if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { -+ return g_FPArgReg[_num_reg_fp_args++]; -+ } -+ return fnoreg; -+} ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" ++#ifdef COMPILER1 ++class LIR_Assembler; ++class ShenandoahPreBarrierStub; ++class ShenandoahLoadReferenceBarrierStub; ++class StubAssembler; ++#endif ++class StubCodeGenerator; + -+int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() { -+ int ret = _stack_offset; -+ _stack_offset += wordSize; -+ return ret; -+} ++class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { ++private: + -+InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( -+ const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { -+ _masm = new MacroAssembler(buffer); // allocate on resourse area by default -+ _num_reg_int_args = (method->is_static() ? 1 : 0); -+ _num_reg_fp_args = 0; -+ _stack_offset = 0; -+} ++ void satb_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ void shenandoah_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); + -+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { -+ const Address src(from(), Interpreter::local_offset_in_bytes(offset())); ++ void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg); ++ void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg); ++ void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators); + -+ Register reg = next_gpr(); -+ if (reg != noreg) { -+ __ lw(reg, src); -+ } else { -+ __ lw(x10, src); -+ __ sw(x10, Address(to(), next_stack_offset())); -+ } -+} ++public: + -+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { -+ const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ void iu_barrier(MacroAssembler* masm, Register dst, Register tmp); + -+ Register reg = next_gpr(); -+ if (reg != noreg) { -+ __ ld(reg, src); -+ } else { -+ __ ld(x10, src); -+ __ sd(x10, Address(to(), next_stack_offset())); -+ } -+} ++#ifdef COMPILER1 ++ void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); ++ void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators); ++#endif + -+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { -+ const Address src(from(), Interpreter::local_offset_in_bytes(offset())); ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs); + -+ FloatRegister reg = next_fpr(); -+ if (reg != fnoreg) { -+ __ flw(reg, src); -+ } else { -+ // a floating-point argument is passed according to the integer calling -+ // convention if no floating-point argument register available -+ pass_int(); -+ } -+} ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); + -+void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { -+ const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); + -+ FloatRegister reg = next_fpr(); -+ if (reg != fnoreg) { -+ __ fld(reg, src); -+ } else { -+ // a floating-point argument is passed according to the integer calling -+ // convention if no floating-point argument register available -+ pass_long(); -+ } -+} ++ void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); ++}; + -+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { -+ Register reg = next_gpr(); -+ if (reg == c_rarg1) { -+ assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); -+ __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset())); -+ } else if (reg != noreg) { -+ // c_rarg2-c_rarg7 -+ __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); -+ __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2, 2:c_rarg3... -+ __ ld(temp(), x10); -+ Label L; -+ __ beqz(temp(), L); -+ __ mv(reg, x10); -+ __ bind(L); -+ } else { -+ //to stack -+ __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); -+ __ ld(temp(), x10); -+ Label L; -+ __ bnez(temp(), L); -+ __ mv(x10, zr); -+ __ bind(L); -+ assert(sizeof(jobject) == wordSize, ""); -+ __ sd(x10, Address(to(), next_stack_offset())); -+ } -+} ++#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad +new file mode 100644 +index 00000000000..6c855f23c2a +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad +@@ -0,0 +1,285 @@ ++// ++// Copyright (c) 2018, Red Hat, Inc. All rights reserved. ++// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// + -+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { -+ // generate code to handle arguments -+ iterate(fingerprint); ++source_hpp %{ ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" ++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" ++%} + -+ // return result handler -+ __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type()))); -+ __ ret(); ++instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+ __ flush(); -+} ++ effect(TEMP tmp, KILL cr); + ++ format %{ ++ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah" ++ %} + -+// Implementation of SignatureHandlerLibrary ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} + -+void SignatureHandlerLibrary::pd_set_handler(address handler) {} ++ ins_pipe(pipe_slow); ++%} + ++instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+class SlowSignatureHandler -+ : public NativeSignatureIterator { -+ private: -+ address _from; -+ intptr_t* _to; -+ intptr_t* _int_args; -+ intptr_t* _fp_args; -+ intptr_t* _fp_identifiers; -+ unsigned int _num_reg_int_args; -+ unsigned int _num_reg_fp_args; ++ effect(TEMP tmp, KILL cr); + ++ format %{ ++ "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah" ++ %} + -+ intptr_t* single_slot_addr() { -+ intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); -+ _from -= Interpreter::stackElementSize; -+ return from_addr; -+ } ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} + -+ intptr_t* double_slot_addr() { -+ intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1)); -+ _from -= 2 * Interpreter::stackElementSize; -+ return from_addr; -+ } ++ ins_pipe(pipe_slow); ++%} + -+ int pass_gpr(intptr_t value) { -+ if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { -+ *_int_args++ = value; -+ return _num_reg_int_args++; -+ } -+ return -1; -+ } ++instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ predicate(needs_acquiring_load_reserved(n)); ++ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+ int pass_fpr(intptr_t value) { -+ if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { -+ *_fp_args++ = value; -+ return _num_reg_fp_args++; -+ } -+ return -1; -+ } ++ effect(TEMP tmp, KILL cr); + -+ void pass_stack(intptr_t value) { -+ *_to++ = value; -+ } ++ format %{ ++ "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah" ++ %} + ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} + -+ virtual void pass_int() { -+ jint value = *(jint*)single_slot_addr(); -+ if (pass_gpr(value) < 0) { -+ pass_stack(value); -+ } -+ } ++ ins_pipe(pipe_slow); ++%} + ++instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ predicate(needs_acquiring_load_reserved(n)); ++ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+ virtual void pass_long() { -+ intptr_t value = *double_slot_addr(); -+ if (pass_gpr(value) < 0) { -+ pass_stack(value); -+ } -+ } ++ effect(TEMP tmp, KILL cr); + -+ virtual void pass_object() { -+ intptr_t* addr = single_slot_addr(); -+ intptr_t value = *addr == 0 ? NULL : (intptr_t)addr; -+ if (pass_gpr(value) < 0) { -+ pass_stack(value); -+ } -+ } ++ format %{ ++ "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah" ++ %} + -+ virtual void pass_float() { -+ jint value = *(jint*) single_slot_addr(); -+ // a floating-point argument is passed according to the integer calling -+ // convention if no floating-point argument register available -+ if (pass_fpr(value) < 0 && pass_gpr(value) < 0) { -+ pass_stack(value); -+ } -+ } ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} + -+ virtual void pass_double() { -+ intptr_t value = *double_slot_addr(); -+ int arg = pass_fpr(value); -+ if (0 <= arg) { -+ *_fp_identifiers |= (1ull << arg); // mark as double -+ } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack -+ pass_stack(value); -+ } -+ } ++ ins_pipe(pipe_slow); ++%} + -+ public: -+ SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to) -+ : NativeSignatureIterator(method) -+ { -+ _from = from; -+ _to = to; ++instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); ++ effect(TEMP_DEF res, TEMP tmp, KILL cr); + -+ _int_args = to - (method->is_static() ? 16 : 17); -+ _fp_args = to - 8; -+ _fp_identifiers = to - 9; -+ *(int*) _fp_identifiers = 0; -+ _num_reg_int_args = (method->is_static() ? 1 : 0); -+ _num_reg_fp_args = 0; -+ } -+ ~SlowSignatureHandler() -+ { -+ _from = NULL; -+ _to = NULL; -+ _int_args = NULL; -+ _fp_args = NULL; -+ _fp_identifiers = NULL; -+ } -+}; ++ format %{ ++ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah" ++ %} + ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ true /* is_cae */, $res$$Register); ++ %} + -+IRT_ENTRY(address, -+ InterpreterRuntime::slow_signature_handler(JavaThread* thread, -+ Method* method, -+ intptr_t* from, -+ intptr_t* to)) -+ methodHandle m(thread, (Method*)method); -+ assert(m->is_native(), "sanity check"); ++ ins_pipe(pipe_slow); ++%} + -+ // handle arguments -+ SlowSignatureHandler ssh(m, (address)from, to); -+ ssh.iterate((uint64_t)UCONST64(-1)); ++instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+ // return result handler -+ return Interpreter::result_handler(m->result_type()); -+IRT_END -diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp -new file mode 100644 -index 000000000..06342869f ---- /dev/null -+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ effect(TEMP_DEF res, TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah" ++ %} + -+#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP -+#define CPU_RISCV_INTERPRETERRT_RISCV_HPP ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ true /* is_cae */, $res$$Register); ++ %} + -+// This is included in the middle of class Interpreter. -+// Do not include files here. ++ ins_pipe(pipe_slow); ++%} + -+// native method calls ++instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+class SignatureHandlerGenerator: public NativeSignatureIterator { -+ private: -+ MacroAssembler* _masm; -+ unsigned int _num_reg_fp_args; -+ unsigned int _num_reg_int_args; -+ int _stack_offset; ++ effect(TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah" ++ "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" ++ %} + -+ void pass_int(); -+ void pass_long(); -+ void pass_float(); -+ void pass_double(); -+ void pass_object(); ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} + -+ Register next_gpr(); -+ FloatRegister next_fpr(); -+ int next_stack_offset(); ++ ins_pipe(pipe_slow); ++%} + -+ public: -+ // Creation -+ SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); -+ virtual ~SignatureHandlerGenerator() { -+ _masm = NULL; -+ } ++instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ predicate(needs_acquiring_load_reserved(n)); ++ match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+ // Code generation -+ void generate(uint64_t fingerprint); ++ effect(TEMP_DEF res, TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah" ++ %} + -+ // Code generation support -+ static Register from(); -+ static Register to(); -+ static Register temp(); -+}; ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, ++ true /* is_cae */, $res$$Register); ++ %} + -+#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp -new file mode 100644 -index 000000000..a169b8c5f ---- /dev/null -+++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp -@@ -0,0 +1,89 @@ -+/* -+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ ins_pipe(pipe_slow); ++%} + -+#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP -+#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP ++instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ predicate(needs_acquiring_load_reserved(n)); ++ match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+private: ++ effect(TEMP_DEF res, TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah" ++ %} + -+ // FP value associated with _last_Java_sp: -+ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, ++ true /* is_cae */, $res$$Register); ++ %} + -+public: -+ // Each arch must define reset, save, restore -+ // These are used by objects that only care about: -+ // 1 - initializing a new state (thread creation, javaCalls) -+ // 2 - saving a current state (javaCalls) -+ // 3 - restoring an old state (javaCalls) ++ ins_pipe(pipe_slow); ++%} + -+ void clear(void) { -+ // clearing _last_Java_sp must be first -+ _last_Java_sp = NULL; -+ OrderAccess::release(); -+ _last_Java_fp = NULL; -+ _last_Java_pc = NULL; -+ } ++instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+ void copy(JavaFrameAnchor* src) { -+ // In order to make sure the transition state is valid for "this" -+ // We must clear _last_Java_sp before copying the rest of the new data -+ // -+ // Hack Alert: Temporary bugfix for 4717480/4721647 -+ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp -+ // unless the value is changing -+ // -+ assert(src != NULL, "Src should not be NULL."); -+ if (_last_Java_sp != src->_last_Java_sp) { -+ _last_Java_sp = NULL; -+ OrderAccess::release(); -+ } -+ _last_Java_fp = src->_last_Java_fp; -+ _last_Java_pc = src->_last_Java_pc; -+ // Must be last so profiler will always see valid frame if has_last_frame() is true -+ _last_Java_sp = src->_last_Java_sp; -+ } ++ effect(TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah" ++ %} + -+ bool walkable(void) { return _last_Java_sp != NULL && _last_Java_pc != NULL; } -+ void make_walkable(JavaThread* thread); -+ void capture_last_Java_pc(void); ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} + -+ intptr_t* last_Java_sp(void) const { return _last_Java_sp; } ++ ins_pipe(pipe_slow); ++%} + -+ const address last_Java_pc(void) { return _last_Java_pc; } ++instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ predicate(needs_acquiring_load_reserved(n)); ++ match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+private: ++ effect(TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah" ++ "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" ++ %} + -+ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} + -+public: ++ ins_pipe(pipe_slow); ++%} + -+ void set_last_Java_sp(intptr_t* java_sp) { _last_Java_sp = java_sp; OrderAccess::release(); } ++instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ predicate(needs_acquiring_load_reserved(n)); ++ match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+ intptr_t* last_Java_fp(void) { return _last_Java_fp; } -+ // Assert (last_Java_sp == NULL || fp == NULL) -+ void set_last_Java_fp(intptr_t* java_fp) { OrderAccess::release(); _last_Java_fp = java_fp; } ++ effect(TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah" ++ "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" ++ %} + -+#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} +diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp new file mode 100644 -index 000000000..9bab8e78f +index 00000000000..3d3f4d4d774 --- /dev/null -+++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp -@@ -0,0 +1,193 @@ ++++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp +@@ -0,0 +1,441 @@ +/* -+ * Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -18078,181 +18107,429 @@ index 000000000..9bab8e78f + */ + +#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "gc/shared/barrierSet.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/codeBlob.hpp" ++#include "code/vmreg.inline.hpp" ++#include "gc/z/zBarrier.inline.hpp" ++#include "gc/z/zBarrierSet.hpp" ++#include "gc/z/zBarrierSetAssembler.hpp" ++#include "gc/z/zBarrierSetRuntime.hpp" ++#include "gc/z/zThreadLocalData.hpp" +#include "memory/resourceArea.hpp" -+#include "prims/jniFastGetField.hpp" -+#include "prims/jvm_misc.hpp" -+#include "runtime/safepoint.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "utilities/macros.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/z/c1/zBarrierSetC1.hpp" ++#endif // COMPILER1 ++#ifdef COMPILER2 ++#include "gc/z/c2/zBarrierSetC2.hpp" ++#endif // COMPILER2 + ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++#undef __ +#define __ masm-> + -+#define BUFFER_SIZE 30*wordSize ++void ZBarrierSetAssembler::load_at(MacroAssembler* masm, ++ DecoratorSet decorators, ++ BasicType type, ++ Register dst, ++ Address src, ++ Register tmp1, ++ Register tmp_thread) { ++ if (!ZBarrierSet::barrier_needed(decorators, type)) { ++ // Barrier not needed ++ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ return; ++ } + -+// Instead of issuing a LoadLoad barrier we create an address -+// dependency between loads; this might be more efficient. ++ assert_different_registers(t1, src.base()); ++ assert_different_registers(t0, t1, dst); + -+// Common register usage: -+// x10/f10: result -+// c_rarg0: jni env -+// c_rarg1: obj -+// c_rarg2: jfield id ++ Label done; + -+static const Register robj = x13; -+static const Register rcounter = x14; -+static const Register roffset = x15; -+static const Register rcounter_addr = x16; -+static const Register result = x17; ++ // Load bad mask into temp register. ++ __ la(t0, src); ++ __ ld(t1, address_bad_mask_from_thread(xthread)); ++ __ ld(dst, Address(t0)); + -+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { -+ const char *name; -+ switch (type) { -+ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; -+ case T_BYTE: name = "jni_fast_GetByteField"; break; -+ case T_CHAR: name = "jni_fast_GetCharField"; break; -+ case T_SHORT: name = "jni_fast_GetShortField"; break; -+ case T_INT: name = "jni_fast_GetIntField"; break; -+ case T_LONG: name = "jni_fast_GetLongField"; break; -+ case T_FLOAT: name = "jni_fast_GetFloatField"; break; -+ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; -+ default: ShouldNotReachHere(); -+ name = NULL; // unreachable ++ // Test reference against bad mask. If mask bad, then we need to fix it up. ++ __ andr(t1, dst, t1); ++ __ beqz(t1, done); ++ ++ __ enter(); ++ ++ __ push_call_clobbered_registers_except(RegSet::of(dst)); ++ ++ if (c_rarg0 != dst) { ++ __ mv(c_rarg0, dst); + } -+ ResourceMark rm; -+ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); -+ CodeBuffer cbuf(blob); -+ MacroAssembler* masm = new MacroAssembler(&cbuf); -+ address fast_entry = __ pc(); + -+ Label slow; -+ int32_t offset = 0; -+ __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset); -+ __ addi(rcounter_addr, rcounter_addr, offset); ++ __ mv(c_rarg1, t0); + -+ Address safepoint_counter_addr(rcounter_addr, 0); -+ __ lwu(rcounter, safepoint_counter_addr); -+ // An even value means there are no ongoing safepoint operations -+ __ andi(t0, rcounter, 1); -+ __ bnez(t0, slow); -+ __ xorr(robj, c_rarg1, rcounter); -+ __ xorr(robj, robj, rcounter); // obj, since -+ // robj ^ rcounter ^ rcounter == robj -+ // robj is address dependent on rcounter. ++ __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); + ++ // Make sure dst has the return value. ++ if (dst != x10) { ++ __ mv(dst, x10); ++ } + -+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ assert_cond(bs != NULL); -+ bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow); ++ __ pop_call_clobbered_registers_except(RegSet::of(dst)); ++ __ leave(); + -+ __ srli(roffset, c_rarg2, 2); // offset ++ __ bind(done); ++} + -+ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); -+ speculative_load_pclist[count] = __ pc(); // Used by the segfault handler -+ __ add(roffset, robj, roffset); -+ switch (type) { -+ case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break; -+ case T_BYTE: __ lb(result, Address(roffset, 0)); break; -+ case T_CHAR: __ lhu(result, Address(roffset, 0)); break; -+ case T_SHORT: __ lh(result, Address(roffset, 0)); break; -+ case T_INT: __ lw(result, Address(roffset, 0)); break; -+ case T_LONG: __ ld(result, Address(roffset, 0)); break; -+ case T_FLOAT: { -+ __ flw(f28, Address(roffset, 0)); // f28 as temporaries -+ __ fmv_x_w(result, f28); // f{31--0}-->x -+ break; -+ } -+ case T_DOUBLE: { -+ __ fld(f28, Address(roffset, 0)); // f28 as temporaries -+ __ fmv_x_d(result, f28); // d{63--0}-->x -+ break; ++#ifdef ASSERT ++ ++void ZBarrierSetAssembler::store_at(MacroAssembler* masm, ++ DecoratorSet decorators, ++ BasicType type, ++ Address dst, ++ Register val, ++ Register tmp1, ++ Register tmp2) { ++ // Verify value ++ if (is_reference_type(type)) { ++ // Note that src could be noreg, which means we ++ // are storing null and can skip verification. ++ if (val != noreg) { ++ Label done; ++ ++ // tmp1 and tmp2 are often set to noreg. ++ RegSet savedRegs = RegSet::of(t0); ++ __ push_reg(savedRegs, sp); ++ ++ __ ld(t0, address_bad_mask_from_thread(xthread)); ++ __ andr(t0, val, t0); ++ __ beqz(t0, done); ++ __ stop("Verify oop store failed"); ++ __ should_not_reach_here(); ++ __ bind(done); ++ __ pop_reg(savedRegs, sp); + } -+ default: ShouldNotReachHere(); + } + -+ // counter_addr is address dependent on result. -+ __ xorr(rcounter_addr, rcounter_addr, result); -+ __ xorr(rcounter_addr, rcounter_addr, result); -+ __ lw(t0, safepoint_counter_addr); -+ __ bne(rcounter, t0, slow); ++ // Store value ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++} + -+ switch (type) { -+ case T_FLOAT: __ fmv_w_x(f10, result); break; -+ case T_DOUBLE: __ fmv_d_x(f10, result); break; -+ default: __ mv(x10, result); break; -+ } -+ __ ret(); ++#endif // ASSERT + -+ slowcase_entry_pclist[count++] = __ pc(); -+ __ bind(slow); -+ address slow_case_addr; -+ switch (type) { -+ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; -+ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; -+ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; -+ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; -+ case T_INT: slow_case_addr = jni_GetIntField_addr(); break; -+ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; -+ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; -+ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; -+ default: ShouldNotReachHere(); -+ slow_case_addr = NULL; // unreachable ++void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, ++ DecoratorSet decorators, ++ bool is_oop, ++ Register src, ++ Register dst, ++ Register count, ++ RegSet saved_regs) { ++ if (!is_oop) { ++ // Barrier not needed ++ return; + } + -+ { -+ __ enter(); -+ int32_t tmp_offset = 0; -+ __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset); -+ __ jalr(x1, t0, tmp_offset); -+ __ leave(); -+ __ ret(); ++ BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {"); ++ ++ assert_different_registers(src, count, t0); ++ ++ __ push_reg(saved_regs, sp); ++ ++ if (count == c_rarg0 && src == c_rarg1) { ++ // exactly backwards!! ++ __ xorr(c_rarg0, c_rarg0, c_rarg1); ++ __ xorr(c_rarg1, c_rarg0, c_rarg1); ++ __ xorr(c_rarg0, c_rarg0, c_rarg1); ++ } else { ++ __ mv(c_rarg0, src); ++ __ mv(c_rarg1, count); + } -+ __ flush(); + -+ return fast_entry; -+} ++ __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2); + ++ __ pop_reg(saved_regs, sp); + -+address JNI_FastGetField::generate_fast_get_boolean_field() { -+ return generate_fast_get_int_field0(T_BOOLEAN); ++ BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue"); +} + -+address JNI_FastGetField::generate_fast_get_byte_field() { -+ return generate_fast_get_int_field0(T_BYTE); -+} ++void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, ++ Register jni_env, ++ Register robj, ++ Register tmp, ++ Label& slowpath) { ++ BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {"); + -+address JNI_FastGetField::generate_fast_get_char_field() { -+ return generate_fast_get_int_field0(T_CHAR); ++ assert_different_registers(jni_env, robj, tmp); ++ ++ // Resolve jobject ++ BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath); ++ ++ // Compute the offset of address bad mask from the field of jni_environment ++ long int bad_mask_relative_offset = (long int) (in_bytes(ZThreadLocalData::address_bad_mask_offset()) - ++ in_bytes(JavaThread::jni_environment_offset())); ++ ++ // Load the address bad mask ++ __ ld(tmp, Address(jni_env, bad_mask_relative_offset)); ++ ++ // Check address bad mask ++ __ andr(tmp, robj, tmp); ++ __ bnez(tmp, slowpath); ++ ++ BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native"); +} + -+address JNI_FastGetField::generate_fast_get_short_field() { -+ return generate_fast_get_int_field0(T_SHORT); ++#ifdef COMPILER2 ++ ++OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { ++ if (!OptoReg::is_reg(opto_reg)) { ++ return OptoReg::Bad; ++ } ++ ++ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); ++ if (vm_reg->is_FloatRegister()) { ++ return opto_reg & ~1; ++ } ++ ++ return opto_reg; +} + -+address JNI_FastGetField::generate_fast_get_int_field() { -+ return generate_fast_get_int_field0(T_INT); ++#undef __ ++#define __ _masm-> ++ ++class ZSaveLiveRegisters { ++private: ++ MacroAssembler* const _masm; ++ RegSet _gp_regs; ++ FloatRegSet _fp_regs; ++ VectorRegSet _vp_regs; ++ ++public: ++ void initialize(ZLoadBarrierStubC2* stub) { ++ // Record registers that needs to be saved/restored ++ RegMaskIterator rmi(stub->live()); ++ while (rmi.has_next()) { ++ const OptoReg::Name opto_reg = rmi.next(); ++ if (OptoReg::is_reg(opto_reg)) { ++ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); ++ if (vm_reg->is_Register()) { ++ _gp_regs += RegSet::of(vm_reg->as_Register()); ++ } else if (vm_reg->is_FloatRegister()) { ++ _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister()); ++ } else if (vm_reg->is_VectorRegister()) { ++ const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegisterImpl::max_slots_per_register - 1)); ++ _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister()); ++ } else { ++ fatal("Unknown register type"); ++ } ++ } ++ } ++ ++ // Remove C-ABI SOE registers, tmp regs and _ref register that will be updated ++ _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2) + RegSet::of(x8, x9) + RegSet::of(x5, stub->ref()); ++ } ++ ++ ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : ++ _masm(masm), ++ _gp_regs(), ++ _fp_regs(), ++ _vp_regs() { ++ // Figure out what registers to save/restore ++ initialize(stub); ++ ++ // Save registers ++ __ push_reg(_gp_regs, sp); ++ __ push_fp(_fp_regs, sp); ++ __ push_vp(_vp_regs, sp); ++ } ++ ++ ~ZSaveLiveRegisters() { ++ // Restore registers ++ __ pop_vp(_vp_regs, sp); ++ __ pop_fp(_fp_regs, sp); ++ __ pop_reg(_gp_regs, sp); ++ } ++}; ++ ++class ZSetupArguments { ++private: ++ MacroAssembler* const _masm; ++ const Register _ref; ++ const Address _ref_addr; ++ ++public: ++ ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : ++ _masm(masm), ++ _ref(stub->ref()), ++ _ref_addr(stub->ref_addr()) { ++ ++ // Setup arguments ++ if (_ref_addr.base() == noreg) { ++ // No self healing ++ if (_ref != c_rarg0) { ++ __ mv(c_rarg0, _ref); ++ } ++ __ mv(c_rarg1, zr); ++ } else { ++ // Self healing ++ if (_ref == c_rarg0) { ++ // _ref is already at correct place ++ __ la(c_rarg1, _ref_addr); ++ } else if (_ref != c_rarg1) { ++ // _ref is in wrong place, but not in c_rarg1, so fix it first ++ __ la(c_rarg1, _ref_addr); ++ __ mv(c_rarg0, _ref); ++ } else if (_ref_addr.base() != c_rarg0) { ++ assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0"); ++ __ mv(c_rarg0, _ref); ++ __ la(c_rarg1, _ref_addr); ++ } else { ++ assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0"); ++ if (_ref_addr.base() == c_rarg0) { ++ __ mv(t1, c_rarg1); ++ __ la(c_rarg1, _ref_addr); ++ __ mv(c_rarg0, t1); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } ++ } ++ } ++ ++ ~ZSetupArguments() { ++ // Transfer result ++ if (_ref != x10) { ++ __ mv(_ref, x10); ++ } ++ } ++}; ++ ++#undef __ ++#define __ masm-> ++ ++void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const { ++ BLOCK_COMMENT("ZLoadBarrierStubC2"); ++ ++ // Stub entry ++ __ bind(*stub->entry()); ++ ++ { ++ ZSaveLiveRegisters save_live_registers(masm, stub); ++ ZSetupArguments setup_arguments(masm, stub); ++ int32_t offset = 0; ++ __ la_patchable(t0, stub->slow_path(), offset); ++ __ jalr(x1, t0, offset); ++ } ++ ++ // Stub exit ++ __ j(*stub->continuation()); +} + -+address JNI_FastGetField::generate_fast_get_long_field() { -+ return generate_fast_get_int_field0(T_LONG); ++#undef __ ++ ++#endif // COMPILER2 ++ ++#ifdef COMPILER1 ++#undef __ ++#define __ ce->masm()-> ++ ++void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, ++ LIR_Opr ref) const { ++ assert_different_registers(xthread, ref->as_register(), t1); ++ __ ld(t1, address_bad_mask_from_thread(xthread)); ++ __ andr(t1, t1, ref->as_register()); +} + -+address JNI_FastGetField::generate_fast_get_float_field() { -+ return generate_fast_get_int_field0(T_FLOAT); ++void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce, ++ ZLoadBarrierStubC1* stub) const { ++ // Stub entry ++ __ bind(*stub->entry()); ++ ++ Register ref = stub->ref()->as_register(); ++ Register ref_addr = noreg; ++ Register tmp = noreg; ++ ++ if (stub->tmp()->is_valid()) { ++ // Load address into tmp register ++ ce->leal(stub->ref_addr(), stub->tmp()); ++ ref_addr = tmp = stub->tmp()->as_pointer_register(); ++ } else { ++ // Address already in register ++ ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register(); ++ } ++ ++ assert_different_registers(ref, ref_addr, noreg); ++ ++ // Save x10 unless it is the result or tmp register ++ // Set up SP to accomodate parameters and maybe x10. ++ if (ref != x10 && tmp != x10) { ++ __ sub(sp, sp, 32); ++ __ sd(x10, Address(sp, 16)); ++ } else { ++ __ sub(sp, sp, 16); ++ } ++ ++ // Setup arguments and call runtime stub ++ ce->store_parameter(ref_addr, 1); ++ ce->store_parameter(ref, 0); ++ ++ __ far_call(stub->runtime_stub()); ++ ++ // Verify result ++ __ verify_oop(x10, "Bad oop"); ++ ++ ++ // Move result into place ++ if (ref != x10) { ++ __ mv(ref, x10); ++ } ++ ++ // Restore x10 unless it is the result or tmp register ++ if (ref != x10 && tmp != x10) { ++ __ ld(x10, Address(sp, 16)); ++ __ add(sp, sp, 32); ++ } else { ++ __ add(sp, sp, 16); ++ } ++ ++ // Stub exit ++ __ j(*stub->continuation()); +} + -+address JNI_FastGetField::generate_fast_get_double_field() { -+ return generate_fast_get_int_field0(T_DOUBLE); ++#undef __ ++#define __ sasm-> ++ ++void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, ++ DecoratorSet decorators) const { ++ __ prologue("zgc_load_barrier stub", false); ++ ++ __ push_call_clobbered_registers_except(RegSet::of(x10)); ++ ++ // Setup arguments ++ __ load_parameter(0, c_rarg0); ++ __ load_parameter(1, c_rarg1); ++ ++ __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); ++ ++ __ pop_call_clobbered_registers_except(RegSet::of(x10)); ++ ++ __ epilogue(); +} -diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp ++ ++#undef __ ++#endif // COMPILER1 +diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp new file mode 100644 -index 000000000..96775e0db +index 00000000000..dc07ab635fe --- /dev/null -+++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp -@@ -0,0 +1,108 @@ ++++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp +@@ -0,0 +1,101 @@ +/* -+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -18276,98 +18553,91 @@ index 000000000..96775e0db + * + */ + -+#ifndef CPU_RISCV_JNITYPES_RISCV_HPP -+#define CPU_RISCV_JNITYPES_RISCV_HPP ++#ifndef CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP + -+#include "jni.h" -+#include "memory/allocation.hpp" -+#include "oops/oop.hpp" ++#include "code/vmreg.hpp" ++#include "oops/accessDecorators.hpp" ++#ifdef COMPILER2 ++#include "opto/optoreg.hpp" ++#endif // COMPILER2 + -+// This file holds platform-dependent routines used to write primitive jni -+// types to the array of arguments passed into JavaCalls::call ++#ifdef COMPILER1 ++class LIR_Assembler; ++class LIR_Opr; ++class StubAssembler; ++class ZLoadBarrierStubC1; ++#endif // COMPILER1 + -+class JNITypes : private AllStatic { -+ // These functions write a java primitive type (in native format) -+ // to a java stack slot array to be passed as an argument to JavaCalls:calls. -+ // I.e., they are functionally 'push' operations if they have a 'pos' -+ // formal parameter. Note that jlong's and jdouble's are written -+ // _in reverse_ of the order in which they appear in the interpreter -+ // stack. This is because call stubs (see stubGenerator_sparc.cpp) -+ // reverse the argument list constructed by JavaCallArguments (see -+ // javaCalls.hpp). ++#ifdef COMPILER2 ++class Node; ++class ZLoadBarrierStubC2; ++#endif // COMPILER2 + ++class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { +public: -+ // Ints are stored in native format in one JavaCallArgument slot at *to. -+ static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; } -+ static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; } -+ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; } ++ virtual void load_at(MacroAssembler* masm, ++ DecoratorSet decorators, ++ BasicType type, ++ Register dst, ++ Address src, ++ Register tmp1, ++ Register tmp_thread); + -+ // Longs are stored in native format in one JavaCallArgument slot at -+ // *(to+1). -+ static inline void put_long(jlong from, intptr_t *to) { -+ *(jlong*) (to + 1) = from; -+ } -+ -+ static inline void put_long(jlong from, intptr_t *to, int& pos) { -+ *(jlong*) (to + 1 + pos) = from; -+ pos += 2; -+ } ++#ifdef ASSERT ++ virtual void store_at(MacroAssembler* masm, ++ DecoratorSet decorators, ++ BasicType type, ++ Address dst, ++ Register val, ++ Register tmp1, ++ Register tmp2); ++#endif // ASSERT + -+ static inline void put_long(jlong *from, intptr_t *to, int& pos) { -+ *(jlong*) (to + 1 + pos) = *from; -+ pos += 2; -+ } ++ virtual void arraycopy_prologue(MacroAssembler* masm, ++ DecoratorSet decorators, ++ bool is_oop, ++ Register src, ++ Register dst, ++ Register count, ++ RegSet saved_regs); + -+ // Oops are stored in native format in one JavaCallArgument slot at *to. -+ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } -+ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } -+ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, ++ Register jni_env, ++ Register robj, ++ Register tmp, ++ Label& slowpath); + -+ // Floats are stored in native format in one JavaCallArgument slot at *to. -+ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } -+ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } -+ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } ++#ifdef COMPILER1 ++ void generate_c1_load_barrier_test(LIR_Assembler* ce, ++ LIR_Opr ref) const; + -+#undef _JNI_SLOT_OFFSET -+#define _JNI_SLOT_OFFSET 1 -+ // Doubles are stored in native word format in one JavaCallArgument -+ // slot at *(to+1). -+ static inline void put_double(jdouble from, intptr_t *to) { -+ *(jdouble*) (to + 1) = from; -+ } ++ void generate_c1_load_barrier_stub(LIR_Assembler* ce, ++ ZLoadBarrierStubC1* stub) const; + -+ static inline void put_double(jdouble from, intptr_t *to, int& pos) { -+ *(jdouble*) (to + 1 + pos) = from; -+ pos += 2; -+ } ++ void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, ++ DecoratorSet decorators) const; ++#endif // COMPILER1 + -+ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { -+ *(jdouble*) (to + 1 + pos) = *from; -+ pos += 2; -+ } ++#ifdef COMPILER2 ++ OptoReg::Name refine_register(const Node* node, ++ OptoReg::Name opto_reg); + -+ // The get_xxx routines, on the other hand, actually _do_ fetch -+ // java primitive types from the interpreter stack. -+ // No need to worry about alignment on Intel. -+ static inline jint get_int (intptr_t *from) { return *(jint *) from; } -+ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } -+ static inline oop get_obj (intptr_t *from) { return *(oop *) from; } -+ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } -+ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } -+#undef _JNI_SLOT_OFFSET ++ void generate_c2_load_barrier_stub(MacroAssembler* masm, ++ ZLoadBarrierStubC2* stub) const; ++#endif // COMPILER2 +}; + -+#endif // CPU_RISCV_JNITYPES_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++#endif // CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp new file mode 100644 -index 000000000..5d6078bb3 +index 00000000000..d14997790af --- /dev/null -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -0,0 +1,5861 @@ ++++ b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp +@@ -0,0 +1,212 @@ +/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -18391,8679 +18661,7893 @@ index 000000000..5d6078bb3 + */ + +#include "precompiled.hpp" -+#include "asm/assembler.hpp" -+#include "asm/assembler.inline.hpp" -+#include "compiler/disassembler.hpp" -+#include "gc/shared/barrierSet.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" -+#include "gc/shared/cardTable.hpp" -+#include "gc/shared/cardTableBarrierSet.hpp" -+#include "interpreter/interpreter.hpp" -+#include "memory/resourceArea.hpp" -+#include "nativeInst_riscv.hpp" -+#include "oops/accessDecorators.hpp" -+#include "oops/compressedOops.inline.hpp" -+#include "oops/klass.inline.hpp" -+#include "runtime/biasedLocking.hpp" -+#include "runtime/interfaceSupport.inline.hpp" -+#include "runtime/jniHandles.inline.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/thread.hpp" -+#include "utilities/macros.hpp" -+#ifdef COMPILER1 -+#include "c1/c1_LIRAssembler.hpp" -+#endif -+#ifdef COMPILER2 -+#include "oops/oop.hpp" -+#include "opto/compile.hpp" -+#include "opto/intrinsicnode.hpp" -+#include "opto/subnode.hpp" -+#endif ++#include "gc/shared/gcLogPrecious.hpp" ++#include "gc/shared/gc_globals.hpp" ++#include "gc/z/zGlobals.hpp" ++#include "runtime/globals.hpp" ++#include "runtime/os.hpp" ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/powerOfTwo.hpp" + -+#ifdef PRODUCT -+#define BLOCK_COMMENT(str) /* nothing */ -+#else -+#define BLOCK_COMMENT(str) block_comment(str) -+#endif -+#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") ++#ifdef LINUX ++#include ++#endif // LINUX + -+static void pass_arg0(MacroAssembler* masm, Register arg) { -+ if (c_rarg0 != arg) { -+ masm->mv(c_rarg0, arg); -+ } -+} ++// ++// The heap can have three different layouts, depending on the max heap size. ++// ++// Address Space & Pointer Layout 1 ++// -------------------------------- ++// ++// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) ++// . . ++// . . ++// . . ++// +--------------------------------+ 0x0000014000000000 (20TB) ++// | Remapped View | ++// +--------------------------------+ 0x0000010000000000 (16TB) ++// . . ++// +--------------------------------+ 0x00000c0000000000 (12TB) ++// | Marked1 View | ++// +--------------------------------+ 0x0000080000000000 (8TB) ++// | Marked0 View | ++// +--------------------------------+ 0x0000040000000000 (4TB) ++// . . ++// +--------------------------------+ 0x0000000000000000 ++// ++// 6 4 4 4 4 ++// 3 6 5 2 1 0 ++// +--------------------+----+-----------------------------------------------+ ++// |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111| ++// +--------------------+----+-----------------------------------------------+ ++// | | | ++// | | * 41-0 Object Offset (42-bits, 4TB address space) ++// | | ++// | * 45-42 Metadata Bits (4-bits) 0001 = Marked0 (Address view 4-8TB) ++// | 0010 = Marked1 (Address view 8-12TB) ++// | 0100 = Remapped (Address view 16-20TB) ++// | 1000 = Finalizable (Address view N/A) ++// | ++// * 63-46 Fixed (18-bits, always zero) ++// ++// ++// Address Space & Pointer Layout 2 ++// -------------------------------- ++// ++// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) ++// . . ++// . . ++// . . ++// +--------------------------------+ 0x0000280000000000 (40TB) ++// | Remapped View | ++// +--------------------------------+ 0x0000200000000000 (32TB) ++// . . ++// +--------------------------------+ 0x0000180000000000 (24TB) ++// | Marked1 View | ++// +--------------------------------+ 0x0000100000000000 (16TB) ++// | Marked0 View | ++// +--------------------------------+ 0x0000080000000000 (8TB) ++// . . ++// +--------------------------------+ 0x0000000000000000 ++// ++// 6 4 4 4 4 ++// 3 7 6 3 2 0 ++// +------------------+-----+------------------------------------------------+ ++// |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111| ++// +-------------------+----+------------------------------------------------+ ++// | | | ++// | | * 42-0 Object Offset (43-bits, 8TB address space) ++// | | ++// | * 46-43 Metadata Bits (4-bits) 0001 = Marked0 (Address view 8-16TB) ++// | 0010 = Marked1 (Address view 16-24TB) ++// | 0100 = Remapped (Address view 32-40TB) ++// | 1000 = Finalizable (Address view N/A) ++// | ++// * 63-47 Fixed (17-bits, always zero) ++// ++// ++// Address Space & Pointer Layout 3 ++// -------------------------------- ++// ++// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) ++// . . ++// . . ++// . . ++// +--------------------------------+ 0x0000500000000000 (80TB) ++// | Remapped View | ++// +--------------------------------+ 0x0000400000000000 (64TB) ++// . . ++// +--------------------------------+ 0x0000300000000000 (48TB) ++// | Marked1 View | ++// +--------------------------------+ 0x0000200000000000 (32TB) ++// | Marked0 View | ++// +--------------------------------+ 0x0000100000000000 (16TB) ++// . . ++// +--------------------------------+ 0x0000000000000000 ++// ++// 6 4 4 4 4 ++// 3 8 7 4 3 0 ++// +------------------+----+-------------------------------------------------+ ++// |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111| ++// +------------------+----+-------------------------------------------------+ ++// | | | ++// | | * 43-0 Object Offset (44-bits, 16TB address space) ++// | | ++// | * 47-44 Metadata Bits (4-bits) 0001 = Marked0 (Address view 16-32TB) ++// | 0010 = Marked1 (Address view 32-48TB) ++// | 0100 = Remapped (Address view 64-80TB) ++// | 1000 = Finalizable (Address view N/A) ++// | ++// * 63-48 Fixed (16-bits, always zero) ++// + -+static void pass_arg1(MacroAssembler* masm, Register arg) { -+ if (c_rarg1 != arg) { -+ masm->mv(c_rarg1, arg); ++// Default value if probing is not implemented for a certain platform: 128TB ++static const size_t DEFAULT_MAX_ADDRESS_BIT = 47; ++// Minimum value returned, if probing fails: 64GB ++static const size_t MINIMUM_MAX_ADDRESS_BIT = 36; ++ ++static size_t probe_valid_max_address_bit() { ++#ifdef LINUX ++ size_t max_address_bit = 0; ++ const size_t page_size = os::vm_page_size(); ++ for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) { ++ const uintptr_t base_addr = ((uintptr_t) 1U) << i; ++ if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) { ++ // msync suceeded, the address is valid, and maybe even already mapped. ++ max_address_bit = i; ++ break; ++ } ++ if (errno != ENOMEM) { ++ // Some error occured. This should never happen, but msync ++ // has some undefined behavior, hence ignore this bit. ++#ifdef ASSERT ++ fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); ++#else // ASSERT ++ log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); ++#endif // ASSERT ++ continue; ++ } ++ // Since msync failed with ENOMEM, the page might not be mapped. ++ // Try to map it, to see if the address is valid. ++ void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); ++ if (result_addr != MAP_FAILED) { ++ munmap(result_addr, page_size); ++ } ++ if ((uintptr_t) result_addr == base_addr) { ++ // address is valid ++ max_address_bit = i; ++ break; ++ } + } -+} -+ -+static void pass_arg2(MacroAssembler* masm, Register arg) { -+ if (c_rarg2 != arg) { -+ masm->mv(c_rarg2, arg); ++ if (max_address_bit == 0) { ++ // probing failed, allocate a very high page and take that bit as the maximum ++ const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT; ++ void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); ++ if (result_addr != MAP_FAILED) { ++ max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1; ++ munmap(result_addr, page_size); ++ } + } ++ log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit); ++ return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT); ++#else // LINUX ++ return DEFAULT_MAX_ADDRESS_BIT; ++#endif // LINUX +} + -+static void pass_arg3(MacroAssembler* masm, Register arg) { -+ if (c_rarg3 != arg) { -+ masm->mv(c_rarg3, arg); -+ } ++size_t ZPlatformAddressOffsetBits() { ++ const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1; ++ const size_t max_address_offset_bits = valid_max_address_offset_bits - 3; ++ const size_t min_address_offset_bits = max_address_offset_bits - 2; ++ const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio); ++ const size_t address_offset_bits = log2i_exact(address_offset); ++ return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits); +} + -+void MacroAssembler::align(int modulus) { -+ while (offset() % modulus != 0) { nop(); } ++size_t ZPlatformAddressMetadataShift() { ++ return ZPlatformAddressOffsetBits(); +} +diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp +new file mode 100644 +index 00000000000..f20ecd9b073 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp +@@ -0,0 +1,36 @@ ++/* ++ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { -+ call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); -+} ++#ifndef CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP ++#define CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP + -+// Implementation of call_VM versions ++const size_t ZPlatformGranuleSizeShift = 21; // 2MB ++const size_t ZPlatformHeapViews = 3; ++const size_t ZPlatformCacheLineSize = 64; + -+void MacroAssembler::call_VM(Register oop_result, -+ address entry_point, -+ bool check_exceptions) { -+ call_VM_helper(oop_result, entry_point, 0, check_exceptions); -+} ++size_t ZPlatformAddressOffsetBits(); ++size_t ZPlatformAddressMetadataShift(); + -+void MacroAssembler::call_VM(Register oop_result, -+ address entry_point, -+ Register arg_1, -+ bool check_exceptions) { -+ pass_arg1(this, arg_1); -+ call_VM_helper(oop_result, entry_point, 1, check_exceptions); -+} ++#endif // CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad +new file mode 100644 +index 00000000000..6b6f87814a5 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad +@@ -0,0 +1,233 @@ ++// ++// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// + -+void MacroAssembler::call_VM(Register oop_result, -+ address entry_point, -+ Register arg_1, -+ Register arg_2, -+ bool check_exceptions) { -+ assert(arg_1 != c_rarg2, "smashed arg"); -+ pass_arg2(this, arg_2); -+ pass_arg1(this, arg_1); -+ call_VM_helper(oop_result, entry_point, 2, check_exceptions); -+} ++source_hpp %{ + -+void MacroAssembler::call_VM(Register oop_result, -+ address entry_point, -+ Register arg_1, -+ Register arg_2, -+ Register arg_3, -+ bool check_exceptions) { -+ assert(arg_1 != c_rarg3, "smashed arg"); -+ assert(arg_2 != c_rarg3, "smashed arg"); -+ pass_arg3(this, arg_3); ++#include "gc/shared/gc_globals.hpp" ++#include "gc/z/c2/zBarrierSetC2.hpp" ++#include "gc/z/zThreadLocalData.hpp" + -+ assert(arg_1 != c_rarg2, "smashed arg"); -+ pass_arg2(this, arg_2); ++%} + -+ pass_arg1(this, arg_1); -+ call_VM_helper(oop_result, entry_point, 3, check_exceptions); -+} ++source %{ + -+void MacroAssembler::call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ int number_of_arguments, -+ bool check_exceptions) { -+ call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); -+} -+ -+void MacroAssembler::call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ Register arg_1, -+ bool check_exceptions) { -+ pass_arg1(this, arg_1); -+ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); -+} -+ -+void MacroAssembler::call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ Register arg_1, -+ Register arg_2, -+ bool check_exceptions) { -+ -+ assert(arg_1 != c_rarg2, "smashed arg"); -+ pass_arg2(this, arg_2); -+ pass_arg1(this, arg_1); -+ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); ++static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) { ++ if (barrier_data == ZLoadBarrierElided) { ++ return; ++ } ++ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data); ++ __ ld(tmp, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(tmp, tmp, ref); ++ __ bnez(tmp, *stub->entry(), true /* far */); ++ __ bind(*stub->continuation()); +} + -+void MacroAssembler::call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ Register arg_1, -+ Register arg_2, -+ Register arg_3, -+ bool check_exceptions) { -+ assert(arg_1 != c_rarg3, "smashed arg"); -+ assert(arg_2 != c_rarg3, "smashed arg"); -+ pass_arg3(this, arg_3); -+ assert(arg_1 != c_rarg2, "smashed arg"); -+ pass_arg2(this, arg_2); -+ pass_arg1(this, arg_1); -+ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); ++static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { ++ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong); ++ __ j(*stub->entry()); ++ __ bind(*stub->continuation()); +} + -+// these are no-ops overridden by InterpreterMacroAssembler -+void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} -+void MacroAssembler::check_and_handle_popframe(Register java_thread) {} ++%} + -+// Calls to C land -+// -+// When entering C land, the fp, & esp of the last Java frame have to be recorded -+// in the (thread-local) JavaThread object. When leaving C land, the last Java fp -+// has to be reset to 0. This is required to allow proper stack traversal. -+void MacroAssembler::set_last_Java_frame(Register last_java_sp, -+ Register last_java_fp, -+ Register last_java_pc, -+ Register tmp) { ++// Load Pointer ++instruct zLoadP(iRegPNoSp dst, memory mem) ++%{ ++ match(Set dst (LoadP mem)); ++ predicate(UseZGC && (n->as_Load()->barrier_data() != 0)); ++ effect(TEMP dst); + -+ if (last_java_pc->is_valid()) { -+ sd(last_java_pc, Address(xthread, -+ JavaThread::frame_anchor_offset() + -+ JavaFrameAnchor::last_Java_pc_offset())); -+ } ++ ins_cost(4 * DEFAULT_COST); + -+ // determine last_java_sp register -+ if (last_java_sp == sp) { -+ mv(tmp, sp); -+ last_java_sp = tmp; -+ } else if (!last_java_sp->is_valid()) { -+ last_java_sp = esp; -+ } ++ format %{ "ld $dst, $mem, #@zLoadP" %} + -+ sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); ++ ins_encode %{ ++ const Address ref_addr (as_Register($mem$$base), $mem$$disp); ++ __ ld($dst$$Register, ref_addr); ++ z_load_barrier(_masm, this, ref_addr, $dst$$Register, t0 /* tmp */, barrier_data()); ++ %} + -+ // last_java_fp is optional -+ if (last_java_fp->is_valid()) { -+ sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); -+ } -+} ++ ins_pipe(iload_reg_mem); ++%} + -+void MacroAssembler::set_last_Java_frame(Register last_java_sp, -+ Register last_java_fp, -+ address last_java_pc, -+ Register tmp) { -+ assert(last_java_pc != NULL, "must provide a valid PC"); ++instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); ++ effect(KILL cr, TEMP_DEF res); + -+ la(tmp, last_java_pc); -+ sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ins_cost(2 * VOLATILE_REF_COST); + -+ set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp); -+} ++ format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapP\n\t" ++ "mv $res, $res == $oldval" %} + -+void MacroAssembler::set_last_Java_frame(Register last_java_sp, -+ Register last_java_fp, -+ Label &L, -+ Register tmp) { -+ if (L.is_bound()) { -+ set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); -+ } else { -+ L.add_patch_at(code(), locator()); -+ set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); -+ } -+} ++ ins_encode %{ ++ Label failed; ++ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result_as_bool */); ++ __ beqz($res$$Register, failed); ++ __ mv(t0, $oldval$$Register); ++ __ bind(failed); ++ if (barrier_data() != ZLoadBarrierElided) { ++ Label good; ++ __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); ++ __ andr(t1, t1, t0); ++ __ beqz(t1, good); ++ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result_as_bool */); ++ __ bind(good); ++ } ++ %} + -+void MacroAssembler::reset_last_Java_frame(bool clear_fp) { -+ // we must set sp to zero to clear frame -+ sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); ++ ins_pipe(pipe_slow); ++%} + -+ // must clear fp, so that compiled frames are not confused; it is -+ // possible that we need it only for debugging -+ if (clear_fp) { -+ sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); -+ } ++instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); ++ effect(KILL cr, TEMP_DEF res); + -+ // Always clear the pc because it could have been set by make_walkable() -+ sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); -+} ++ ins_cost(2 * VOLATILE_REF_COST); + -+void MacroAssembler::call_VM_base(Register oop_result, -+ Register java_thread, -+ Register last_java_sp, -+ address entry_point, -+ int number_of_arguments, -+ bool check_exceptions) { -+ // determine java_thread register -+ if (!java_thread->is_valid()) { -+ java_thread = xthread; -+ } -+ // determine last_java_sp register -+ if (!last_java_sp->is_valid()) { -+ last_java_sp = esp; -+ } ++ format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapPAcq\n\t" ++ "mv $res, $res == $oldval" %} + -+ // debugging support -+ assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); -+ assert(java_thread == xthread, "unexpected register"); ++ ins_encode %{ ++ Label failed; ++ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result_as_bool */); ++ __ beqz($res$$Register, failed); ++ __ mv(t0, $oldval$$Register); ++ __ bind(failed); ++ if (barrier_data() != ZLoadBarrierElided) { ++ Label good; ++ __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); ++ __ andr(t1, t1, t0); ++ __ beqz(t1, good); ++ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result_as_bool */); ++ __ bind(good); ++ } ++ %} + -+ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); -+ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); ++ ins_pipe(pipe_slow); ++%} + -+ // push java thread (becomes first argument of C function) -+ mv(c_rarg0, java_thread); ++instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); ++ effect(TEMP_DEF res); + -+ // set last Java frame before call -+ assert(last_java_sp != fp, "can't use fp"); ++ ins_cost(2 * VOLATILE_REF_COST); + -+ Label l; -+ set_last_Java_frame(last_java_sp, fp, l, t0); ++ format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangeP" %} + -+ // do the call, remove parameters -+ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); ++ ins_encode %{ ++ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); ++ if (barrier_data() != ZLoadBarrierElided) { ++ Label good; ++ __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(t0, t0, $res$$Register); ++ __ beqz(t0, good); ++ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); ++ __ bind(good); ++ } ++ %} + -+ // reset last Java frame -+ // Only interpreter should have to clear fp -+ reset_last_Java_frame(true); ++ ins_pipe(pipe_slow); ++%} + -+ // C++ interp handles this in the interpreter -+ check_and_handle_popframe(java_thread); -+ check_and_handle_earlyret(java_thread); ++instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ predicate(UseZGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); ++ effect(TEMP_DEF res); + -+ if (check_exceptions) { -+ // check for pending exceptions (java_thread is set upon return) -+ ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); -+ Label ok; -+ beqz(t0, ok); -+ int32_t offset = 0; -+ la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset); -+ jalr(x0, t0, offset); -+ bind(ok); -+ } ++ ins_cost(2 * VOLATILE_REF_COST); + -+ // get oop result if there is one and reset the value in the thread -+ if (oop_result->is_valid()) { -+ get_vm_result(oop_result, java_thread); -+ } -+} ++ format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangePAcq" %} + -+void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { -+ ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); -+ sd(zr, Address(java_thread, JavaThread::vm_result_offset())); -+ verify_oop(oop_result, "broken oop in call_VM_base"); -+} ++ ins_encode %{ ++ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); ++ if (barrier_data() != ZLoadBarrierElided) { ++ Label good; ++ __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(t0, t0, $res$$Register); ++ __ beqz(t0, good); ++ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); ++ __ bind(good); ++ } ++ %} + -+void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { -+ ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); -+ sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); -+} ++ ins_pipe(pipe_slow); ++%} + ++instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ ++ match(Set prev (GetAndSetP mem newv)); ++ predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); ++ effect(TEMP_DEF prev, KILL cr); + -+void MacroAssembler::verify_oop(Register reg, const char* s) { -+ if (!VerifyOops) { return; } ++ ins_cost(2 * VOLATILE_REF_COST); + -+ // Pass register number to verify_oop_subroutine -+ const char* b = NULL; -+ { -+ ResourceMark rm; -+ stringStream ss; -+ ss.print("verify_oop: %s: %s", reg->name(), s); -+ b = code_string(ss.as_string()); -+ } -+ BLOCK_COMMENT("verify_oop {"); ++ format %{ "atomic_xchg $prev, $newv, [$mem], #@zGetAndSetP" %} + -+ push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); ++ ins_encode %{ ++ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); ++ %} + -+ mv(c_rarg0, reg); // c_rarg0 : x10 -+ if(b != NULL) { -+ movptr(t0, (uintptr_t)(address)b); -+ } else { -+ ShouldNotReachHere(); -+ } ++ ins_pipe(pipe_serial); ++%} + -+ // call indirectly to solve generation ordering problem -+ int32_t offset = 0; -+ la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); -+ ld(t1, Address(t1, offset)); -+ jalr(t1); ++instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ ++ match(Set prev (GetAndSetP mem newv)); ++ predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0)); ++ effect(TEMP_DEF prev, KILL cr); + -+ pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); ++ ins_cost(VOLATILE_REF_COST); + -+ BLOCK_COMMENT("} verify_oop"); -+} ++ format %{ "atomic_xchg_acq $prev, $newv, [$mem], #@zGetAndSetPAcq" %} + -+void MacroAssembler::verify_oop_addr(Address addr, const char* s) { -+ if (!VerifyOops) { -+ return; -+ } ++ ins_encode %{ ++ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); ++ %} ++ ins_pipe(pipe_serial); ++%} +diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +new file mode 100644 +index 00000000000..2936837d951 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ const char* b = NULL; -+ { -+ ResourceMark rm; -+ stringStream ss; -+ ss.print("verify_oop_addr: %s", s); -+ b = code_string(ss.as_string()); -+ } -+ BLOCK_COMMENT("verify_oop_addr {"); ++#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP ++#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP + -+ push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); ++const int StackAlignmentInBytes = 16; + -+ if (addr.uses(sp)) { -+ la(x10, addr); -+ ld(x10, Address(x10, 4 * wordSize)); -+ } else { -+ ld(x10, addr); -+ } -+ if(b != NULL) { -+ movptr(t0, (uintptr_t)(address)b); -+ } else { -+ ShouldNotReachHere(); -+ } ++// Indicates whether the C calling conventions require that ++// 32-bit integer argument values are extended to 64 bits. ++const bool CCallingConventionRequiresIntsAsLongs = false; + -+ // call indirectly to solve generation ordering problem -+ int32_t offset = 0; -+ la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); -+ ld(t1, Address(t1, offset)); -+ jalr(t1); ++// RISCV has adopted a multicopy atomic model closely following ++// that of ARMv8. ++#define CPU_MULTI_COPY_ATOMIC + -+ pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); ++// To be safe, we deoptimize when we come across an access that needs ++// patching. This is similar to what is done on aarch64. ++#define DEOPTIMIZE_WHEN_PATCHING + -+ BLOCK_COMMENT("} verify_oop_addr"); -+} ++#define SUPPORTS_NATIVE_CX8 + -+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, -+ int extra_slot_offset) { -+ // cf. TemplateTable::prepare_invoke(), if (load_receiver). -+ int stackElementSize = Interpreter::stackElementSize; -+ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); -+#ifdef ASSERT -+ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); -+ assert(offset1 - offset == stackElementSize, "correct arithmetic"); -+#endif -+ if (arg_slot.is_constant()) { -+ return Address(esp, arg_slot.as_constant() * stackElementSize + offset); -+ } else { -+ assert_different_registers(t0, arg_slot.as_register()); -+ shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); -+ return Address(t0, offset); -+ } -+} ++#define SUPPORT_RESERVED_STACK_AREA + -+#ifndef PRODUCT -+extern "C" void findpc(intptr_t x); -+#endif ++#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false + -+void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) -+{ -+ // In order to get locks to work, we need to fake a in_VM state -+ if (ShowMessageBoxOnError) { -+ JavaThread* thread = JavaThread::current(); -+ JavaThreadState saved_state = thread->thread_state(); -+ thread->set_thread_state(_thread_in_vm); -+#ifndef PRODUCT -+ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { -+ ttyLocker ttyl; -+ BytecodeCounter::print(); -+ } -+#endif -+ if (os::message_box(msg, "Execution stopped, print registers?")) { -+ ttyLocker ttyl; -+ tty->print_cr(" pc = 0x%016" PRIX64, pc); -+#ifndef PRODUCT -+ tty->cr(); -+ findpc(pc); -+ tty->cr(); -+#endif -+ tty->print_cr(" x0 = 0x%016" PRIx64, regs[0]); -+ tty->print_cr(" x1 = 0x%016" PRIx64, regs[1]); -+ tty->print_cr(" x2 = 0x%016" PRIx64, regs[2]); -+ tty->print_cr(" x3 = 0x%016" PRIx64, regs[3]); -+ tty->print_cr(" x4 = 0x%016" PRIx64, regs[4]); -+ tty->print_cr(" x5 = 0x%016" PRIx64, regs[5]); -+ tty->print_cr(" x6 = 0x%016" PRIx64, regs[6]); -+ tty->print_cr(" x7 = 0x%016" PRIx64, regs[7]); -+ tty->print_cr(" x8 = 0x%016" PRIx64, regs[8]); -+ tty->print_cr(" x9 = 0x%016" PRIx64, regs[9]); -+ tty->print_cr("x10 = 0x%016" PRIx64, regs[10]); -+ tty->print_cr("x11 = 0x%016" PRIx64, regs[11]); -+ tty->print_cr("x12 = 0x%016" PRIx64, regs[12]); -+ tty->print_cr("x13 = 0x%016" PRIx64, regs[13]); -+ tty->print_cr("x14 = 0x%016" PRIx64, regs[14]); -+ tty->print_cr("x15 = 0x%016" PRIx64, regs[15]); -+ tty->print_cr("x16 = 0x%016" PRIx64, regs[16]); -+ tty->print_cr("x17 = 0x%016" PRIx64, regs[17]); -+ tty->print_cr("x18 = 0x%016" PRIx64, regs[18]); -+ tty->print_cr("x19 = 0x%016" PRIx64, regs[19]); -+ tty->print_cr("x20 = 0x%016" PRIx64, regs[20]); -+ tty->print_cr("x21 = 0x%016" PRIx64, regs[21]); -+ tty->print_cr("x22 = 0x%016" PRIx64, regs[22]); -+ tty->print_cr("x23 = 0x%016" PRIx64, regs[23]); -+ tty->print_cr("x24 = 0x%016" PRIx64, regs[24]); -+ tty->print_cr("x25 = 0x%016" PRIx64, regs[25]); -+ tty->print_cr("x26 = 0x%016" PRIx64, regs[26]); -+ tty->print_cr("x27 = 0x%016" PRIx64, regs[27]); -+ tty->print_cr("x28 = 0x%016" PRIx64, regs[28]); -+ tty->print_cr("x30 = 0x%016" PRIx64, regs[30]); -+ tty->print_cr("x31 = 0x%016" PRIx64, regs[31]); -+ BREAKPOINT; -+ } -+ ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); -+ } else { -+ ttyLocker ttyl; -+ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); -+ assert(false, "DEBUG MESSAGE: %s", msg); -+ } -+} ++#define USE_POINTERS_TO_REGISTER_IMPL_ARRAY + -+void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) { -+ Label done, not_weak; -+ beqz(value, done); // Use NULL as-is. ++#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +new file mode 100644 +index 00000000000..cbfc0583883 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -0,0 +1,99 @@ ++/* ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // Test for jweak tag. -+ andi(t0, value, JNIHandles::weak_tag_mask); -+ beqz(t0, not_weak); ++#ifndef CPU_RISCV_GLOBALS_RISCV_HPP ++#define CPU_RISCV_GLOBALS_RISCV_HPP + -+ // Resolve jweak. -+ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, -+ Address(value, -JNIHandles::weak_tag_value), tmp, thread); -+ verify_oop(value); -+ j(done); ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" + -+ bind(not_weak); -+ // Resolve (untagged) jobject. -+ access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); -+ verify_oop(value); -+ bind(done); -+} ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) + -+void MacroAssembler::stop(const char* msg) { -+ address ip = pc(); -+ push_reg(RegSet::range(x0, x31), sp); -+ if(msg != NULL && ip != NULL) { -+ mv(c_rarg0, (uintptr_t)(address)msg); -+ mv(c_rarg1, (uintptr_t)(address)ip); -+ } else { -+ ShouldNotReachHere(); -+ } -+ mv(c_rarg2, sp); -+ mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); -+ jalr(c_rarg3); -+ ebreak(); -+} ++define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks ++define_pd_global(bool, TrapBasedNullChecks, false); ++define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast + -+void MacroAssembler::unimplemented(const char* what) { -+ const char* buf = NULL; -+ { -+ ResourceMark rm; -+ stringStream ss; -+ ss.print("unimplemented: %s", what); -+ buf = code_string(ss.as_string()); -+ } -+ stop(buf); -+} ++define_pd_global(uintx, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment. ++define_pd_global(intx, CodeEntryAlignment, 64); ++define_pd_global(intx, OptoLoopAlignment, 16); + -+void MacroAssembler::emit_static_call_stub() { -+ // CompiledDirectStaticCall::set_to_interpreted knows the -+ // exact layout of this stub. ++#define DEFAULT_STACK_YELLOW_PAGES (2) ++#define DEFAULT_STACK_RED_PAGES (1) ++// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the ++// stack if compiled for unix and LP64. To pass stack overflow tests we need ++// 20 shadow pages. ++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5)) ++#define DEFAULT_STACK_RESERVED_PAGES (1) + -+ mov_metadata(xmethod, (Metadata*)NULL); ++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES ++#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES ++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES ++#define MIN_STACK_RESERVED_PAGES (0) + -+ // Jump to the entry point of the i2c stub. -+ int32_t offset = 0; -+ movptr_with_offset(t0, 0, offset); -+ jalr(x0, t0, offset); -+} ++define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); ++define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); ++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); ++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); + -+void MacroAssembler::call_VM_leaf_base(address entry_point, -+ int number_of_arguments, -+ Label *retaddr) { -+ int32_t offset = 0; -+ push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp -+ movptr_with_offset(t0, entry_point, offset); -+ jalr(x1, t0, offset); -+ if (retaddr != NULL) { -+ bind(*retaddr); -+ } -+ pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp -+} ++define_pd_global(bool, RewriteBytecodes, true); ++define_pd_global(bool, RewriteFrequentPairs, true); + -+void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { -+ call_VM_leaf_base(entry_point, number_of_arguments); -+} ++define_pd_global(bool, PreserveFramePointer, false); + -+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { -+ pass_arg0(this, arg_0); -+ call_VM_leaf_base(entry_point, 1); -+} ++define_pd_global(uintx, TypeProfileLevel, 111); + -+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { -+ pass_arg0(this, arg_0); -+ pass_arg1(this, arg_1); -+ call_VM_leaf_base(entry_point, 2); -+} ++define_pd_global(bool, CompactStrings, true); + -+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, -+ Register arg_1, Register arg_2) { -+ pass_arg0(this, arg_0); -+ pass_arg1(this, arg_1); -+ pass_arg2(this, arg_2); -+ call_VM_leaf_base(entry_point, 3); -+} ++// Clear short arrays bigger than one word in an arch-specific way ++define_pd_global(intx, InitArrayShortSize, BytesPerLong); + -+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { -+ pass_arg0(this, arg_0); -+ MacroAssembler::call_VM_leaf_base(entry_point, 1); -+} ++define_pd_global(intx, InlineSmallCode, 1000); + -+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++#define ARCH_FLAGS(develop, \ ++ product, \ ++ notproduct, \ ++ range, \ ++ constraint) \ ++ \ ++ product(bool, NearCpool, true, \ ++ "constant pool is close to instructions") \ ++ product(intx, BlockZeroingLowLimit, 256, \ ++ "Minimum size in bytes when block zeroing will be used") \ ++ range(1, max_jint) \ ++ product(bool, TraceTraps, false, "Trace all traps the signal handler") \ ++ /* For now we're going to be safe and add the I/O bits to userspace fences. */ \ ++ product(bool, UseConservativeFence, true, \ ++ "Extend i for r and o for w in the pred/succ flags of fence;" \ ++ "Extend fence.i to fence.i + fence.") \ ++ product(bool, AvoidUnalignedAccesses, true, \ ++ "Avoid generating unaligned memory accesses") \ ++ product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions") \ ++ product(bool, UseRVB, false, EXPERIMENTAL, "Use RVB instructions") \ ++ product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions") \ ++ product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \ ++ "Use RVV instructions for left/right shift of BigInteger") + -+ assert(arg_0 != c_rarg1, "smashed arg"); -+ pass_arg1(this, arg_1); -+ pass_arg0(this, arg_0); -+ MacroAssembler::call_VM_leaf_base(entry_point, 2); -+} ++#endif // CPU_RISCV_GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp +new file mode 100644 +index 00000000000..cc93103dc55 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp +@@ -0,0 +1,79 @@ ++/* ++ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { -+ assert(arg_0 != c_rarg2, "smashed arg"); -+ assert(arg_1 != c_rarg2, "smashed arg"); -+ pass_arg2(this, arg_2); -+ assert(arg_0 != c_rarg1, "smashed arg"); -+ pass_arg1(this, arg_1); -+ pass_arg0(this, arg_0); -+ MacroAssembler::call_VM_leaf_base(entry_point, 3); -+} ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/icBuffer.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/bytecodes.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/oop.inline.hpp" + -+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { -+ assert(arg_0 != c_rarg3, "smashed arg"); -+ assert(arg_1 != c_rarg3, "smashed arg"); -+ assert(arg_2 != c_rarg3, "smashed arg"); -+ pass_arg3(this, arg_3); -+ assert(arg_0 != c_rarg2, "smashed arg"); -+ assert(arg_1 != c_rarg2, "smashed arg"); -+ pass_arg2(this, arg_2); -+ assert(arg_0 != c_rarg1, "smashed arg"); -+ pass_arg1(this, arg_1); -+ pass_arg0(this, arg_0); -+ MacroAssembler::call_VM_leaf_base(entry_point, 4); ++int InlineCacheBuffer::ic_stub_code_size() { ++ // 6: auipc + ld + auipc + jalr + address(2 * instruction_size) ++ // 5: auipc + ld + j + address(2 * instruction_size) ++ return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size; +} + -+void MacroAssembler::nop() { -+ addi(x0, x0, 0); -+} ++#define __ masm-> + -+void MacroAssembler::mv(Register Rd, Register Rs) { -+ if (Rd != Rs) { -+ addi(Rd, Rs, 0); -+ } -+} ++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { ++ assert_cond(code_begin != NULL && entry_point != NULL); ++ ResourceMark rm; ++ CodeBuffer code(code_begin, ic_stub_code_size()); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ // Note: even though the code contains an embedded value, we do not need reloc info ++ // because ++ // (1) the value is old (i.e., doesn't matter for scavenges) ++ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear + -+void MacroAssembler::notr(Register Rd, Register Rs) { -+ xori(Rd, Rs, -1); ++ address start = __ pc(); ++ Label l; ++ __ ld(t1, l); ++ __ far_jump(ExternalAddress(entry_point)); ++ __ align(wordSize); ++ __ bind(l); ++ __ emit_int64((intptr_t)cached_value); ++ // Only need to invalidate the 1st two instructions - not the whole ic stub ++ ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size()); ++ assert(__ pc() - start == ic_stub_code_size(), "must be"); +} + -+void MacroAssembler::neg(Register Rd, Register Rs) { -+ sub(Rd, x0, Rs); ++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object ++ NativeJump* jump = nativeJump_at(move->next_instruction_address()); ++ return jump->jump_destination(); +} + -+void MacroAssembler::negw(Register Rd, Register Rs) { -+ subw(Rd, x0, Rs); -+} + -+void MacroAssembler::sext_w(Register Rd, Register Rs) { -+ addiw(Rd, Rs, 0); ++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { ++ // The word containing the cached value is at the end of this IC buffer ++ uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize); ++ void* o = (void*)*p; ++ return o; +} +diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp +new file mode 100644 +index 00000000000..922a80f9f3e +--- /dev/null ++++ b/src/hotspot/cpu/riscv/icache_riscv.cpp +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+void MacroAssembler::zext_b(Register Rd, Register Rs) { -+ andi(Rd, Rs, 0xFF); -+} ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "runtime/icache.hpp" + -+void MacroAssembler::seqz(Register Rd, Register Rs) { -+ sltiu(Rd, Rs, 1); -+} ++#define __ _masm-> + -+void MacroAssembler::snez(Register Rd, Register Rs) { -+ sltu(Rd, x0, Rs); ++static int icache_flush(address addr, int lines, int magic) { ++ os::icache_flush((long int) addr, (long int) (addr + (lines << ICache::log2_line_size))); ++ return magic; +} + -+void MacroAssembler::sltz(Register Rd, Register Rs) { -+ slt(Rd, Rs, x0); -+} ++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) { ++ address start = (address)icache_flush; ++ *flush_icache_stub = (ICache::flush_icache_stub_t)start; + -+void MacroAssembler::sgtz(Register Rd, Register Rs) { -+ slt(Rd, x0, Rs); -+} ++ // ICache::invalidate_range() contains explicit condition that the first ++ // call is invoked on the generated icache flush stub code range. ++ ICache::invalidate_range(start, 0); + -+void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) { -+ if (Rd != Rs) { -+ fsgnj_s(Rd, Rs, Rs); ++ { ++ StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush"); ++ __ ret(); + } +} + -+void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) { -+ fsgnjx_s(Rd, Rs, Rs); -+} ++#undef __ +diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp +new file mode 100644 +index 00000000000..5bf40ca8204 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/icache_riscv.hpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) { -+ fsgnjn_s(Rd, Rs, Rs); -+} ++#ifndef CPU_RISCV_ICACHE_RISCV_HPP ++#define CPU_RISCV_ICACHE_RISCV_HPP + -+void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) { -+ if (Rd != Rs) { -+ fsgnj_d(Rd, Rs, Rs); -+ } -+} ++// Interface for updating the instruction cache. Whenever the VM ++// modifies code, part of the processor instruction cache potentially ++// has to be flushed. + -+void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) { -+ fsgnjx_d(Rd, Rs, Rs); -+} ++class ICache : public AbstractICache { ++public: ++ enum { ++ stub_size = 16, // Size of the icache flush stub in bytes ++ line_size = BytesPerWord, // conservative ++ log2_line_size = LogBytesPerWord // log2(line_size) ++ }; ++}; + -+void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) { -+ fsgnjn_d(Rd, Rs, Rs); -+} ++#endif // CPU_RISCV_ICACHE_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +new file mode 100644 +index 00000000000..d12dcb2af19 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -0,0 +1,1940 @@ ++/* ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) { -+ vmnand_mm(vd, vs, vs); -+} ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interp_masm_riscv.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "logging/log.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/markWord.hpp" ++#include "oops/method.hpp" ++#include "oops/methodData.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.inline.hpp" ++#include "utilities/powerOfTwo.hpp" + -+void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) { -+ vnsrl_wx(vd, vs, x0, vm); ++void InterpreterMacroAssembler::narrow(Register result) { ++ // Get method->_constMethod->_result_type ++ ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize)); ++ ld(t0, Address(t0, Method::const_offset())); ++ lbu(t0, Address(t0, ConstMethod::result_type_offset())); ++ ++ Label done, notBool, notByte, notChar; ++ ++ // common case first ++ mv(t1, T_INT); ++ beq(t0, t1, done); ++ ++ // mask integer result to narrower return type. ++ mv(t1, T_BOOLEAN); ++ bne(t0, t1, notBool); ++ ++ andi(result, result, 0x1); ++ j(done); ++ ++ bind(notBool); ++ mv(t1, T_BYTE); ++ bne(t0, t1, notByte); ++ sign_extend(result, result, 8); ++ j(done); ++ ++ bind(notByte); ++ mv(t1, T_CHAR); ++ bne(t0, t1, notChar); ++ zero_extend(result, result, 16); ++ j(done); ++ ++ bind(notChar); ++ sign_extend(result, result, 16); ++ ++ // Nothing to do for T_INT ++ bind(done); ++ addw(result, result, zr); +} + -+void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) { -+ vfsgnjn_vv(vd, vs, vs); ++void InterpreterMacroAssembler::jump_to_entry(address entry) { ++ assert(entry != NULL, "Entry must have been generated by now"); ++ j(entry); +} + -+void MacroAssembler::la(Register Rd, const address &dest) { -+ int64_t offset = dest - pc(); -+ if (is_offset_in_range(offset, 32)) { -+ auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit -+ addi(Rd, Rd, ((int64_t)offset << 52) >> 52); -+ } else { -+ movptr(Rd, dest); ++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { ++ if (JvmtiExport::can_pop_frame()) { ++ Label L; ++ // Initiate popframe handling only if it is not already being ++ // processed. If the flag has the popframe_processing bit set, ++ // it means that this code is called *during* popframe handling - we ++ // don't want to reenter. ++ // This method is only called just after the call into the vm in ++ // call_VM_base, so the arg registers are available. ++ lwu(t1, Address(xthread, JavaThread::popframe_condition_offset())); ++ andi(t0, t1, JavaThread::popframe_pending_bit); ++ beqz(t0, L); ++ andi(t0, t1, JavaThread::popframe_processing_bit); ++ bnez(t0, L); ++ // Call Interpreter::remove_activation_preserving_args_entry() to get the ++ // address of the same-named entrypoint in the generated interpreter code. ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); ++ jr(x10); ++ bind(L); + } +} + -+void MacroAssembler::la(Register Rd, const Address &adr) { -+ code_section()->relocate(pc(), adr.rspec()); -+ relocInfo::relocType rtype = adr.rspec().reloc()->type(); + -+ switch(adr.getMode()) { -+ case Address::literal: { -+ if (rtype == relocInfo::none) { -+ mv(Rd, (intptr_t)(adr.target())); -+ } else { -+ movptr(Rd, adr.target()); -+ } ++void InterpreterMacroAssembler::load_earlyret_value(TosState state) { ++ ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset())); ++ const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset()); ++ const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset()); ++ const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset()); ++ switch (state) { ++ case atos: ++ ld(x10, oop_addr); ++ sd(zr, oop_addr); ++ verify_oop(x10); + break; -+ } -+ case Address::base_plus_offset:{ -+ Register base = adr.base(); -+ int64_t offset = adr.offset(); -+ if (offset == 0 && Rd != base) { -+ mv(Rd, base); -+ } else if (offset != 0 && Rd != base) { -+ add(Rd, base, offset, Rd); -+ } else if (offset != 0 && Rd == base) { -+ Register tmp = (Rd == t0) ? t1 : t0; -+ add(base, base, offset, tmp); -+ } ++ case ltos: ++ ld(x10, val_addr); ++ break; ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ lwu(x10, val_addr); ++ break; ++ case ftos: ++ flw(f10, val_addr); ++ break; ++ case dtos: ++ fld(f10, val_addr); ++ break; ++ case vtos: ++ /* nothing to do */ + break; -+ } + default: + ShouldNotReachHere(); + } ++ // Clean up tos value in the thread object ++ mvw(t0, (int) ilgl); ++ sw(t0, tos_addr); ++ sw(zr, val_addr); +} + -+void MacroAssembler::la(Register Rd, Label &label) { -+ la(Rd, target(label)); -+} -+ -+#define INSN(NAME) \ -+ void MacroAssembler::NAME##z(Register Rs, const address &dest) { \ -+ NAME(Rs, zr, dest); \ -+ } \ -+ void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ -+ NAME(Rs, zr, l, is_far); \ -+ } \ -+ -+ INSN(beq); -+ INSN(bne); -+ INSN(blt); -+ INSN(ble); -+ INSN(bge); -+ INSN(bgt); -+ -+#undef INSN -+ -+// Float compare branch instructions -+ -+#define INSN(NAME, FLOATCMP, BRANCH) \ -+ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ -+ FLOATCMP##_s(t0, Rs1, Rs2); \ -+ BRANCH(t0, l, is_far); \ -+ } \ -+ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ -+ FLOATCMP##_d(t0, Rs1, Rs2); \ -+ BRANCH(t0, l, is_far); \ -+ } + -+ INSN(beq, feq, bnez); -+ INSN(bne, feq, beqz); -+#undef INSN ++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { ++ if (JvmtiExport::can_force_early_return()) { ++ Label L; ++ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); ++ beqz(t0, L); // if [thread->jvmti_thread_state() == NULL] then exit + ++ // Initiate earlyret handling only if it is not already being processed. ++ // If the flag has the earlyret_processing bit set, it means that this code ++ // is called *during* earlyret handling - we don't want to reenter. ++ lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset())); ++ mv(t1, JvmtiThreadState::earlyret_pending); ++ bne(t0, t1, L); + -+#define INSN(NAME, FLOATCMP1, FLOATCMP2) \ -+ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ -+ bool is_far, bool is_unordered) { \ -+ if(is_unordered) { \ -+ FLOATCMP2##_s(t0, Rs2, Rs1); \ -+ beqz(t0, l, is_far); \ -+ } else { \ -+ FLOATCMP1##_s(t0, Rs1, Rs2); \ -+ bnez(t0, l, is_far); \ -+ } \ -+ } \ -+ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ -+ bool is_far, bool is_unordered) { \ -+ if(is_unordered) { \ -+ FLOATCMP2##_d(t0, Rs2, Rs1); \ -+ beqz(t0, l, is_far); \ -+ } else { \ -+ FLOATCMP1##_d(t0, Rs1, Rs2); \ -+ bnez(t0, l, is_far); \ -+ } \ ++ // Call Interpreter::remove_activation_early_entry() to get the address of the ++ // same-named entrypoint in the generated interpreter code. ++ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); ++ lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset())); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0); ++ jr(x10); ++ bind(L); + } ++} + -+ INSN(ble, fle, flt); -+ INSN(blt, flt, fle); ++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) { ++ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); ++ lhu(reg, Address(xbcp, bcp_offset)); ++ revb_h(reg, reg); ++} + -+#undef INSN ++void InterpreterMacroAssembler::get_dispatch() { ++ int32_t offset = 0; ++ la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset); ++ addi(xdispatch, xdispatch, offset); ++} + -+#define INSN(NAME, CMP) \ -+ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ -+ bool is_far, bool is_unordered) { \ -+ float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ -+ } \ -+ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ -+ bool is_far, bool is_unordered) { \ -+ double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ ++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ if (index_size == sizeof(u2)) { ++ load_unsigned_short(index, Address(xbcp, bcp_offset)); ++ } else if (index_size == sizeof(u4)) { ++ lwu(index, Address(xbcp, bcp_offset)); ++ // Check if the secondary index definition is still ~x, otherwise ++ // we have to change the following assembler code to calculate the ++ // plain index. ++ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); ++ xori(index, index, -1); ++ addw(index, index, zr); ++ } else if (index_size == sizeof(u1)) { ++ load_unsigned_byte(index, Address(xbcp, bcp_offset)); ++ } else { ++ ShouldNotReachHere(); + } ++} + -+ INSN(bgt, blt); -+ INSN(bge, ble); -+ -+#undef INSN -+ -+ -+#define INSN(NAME, CSR) \ -+ void MacroAssembler::NAME(Register Rd) { \ -+ csrr(Rd, CSR); \ -+ } -+ -+ INSN(rdinstret, CSR_INSTERT); -+ INSN(rdcycle, CSR_CYCLE); -+ INSN(rdtime, CSR_TIME); -+ INSN(frcsr, CSR_FCSR); -+ INSN(frrm, CSR_FRM); -+ INSN(frflags, CSR_FFLAGS); ++// Return ++// Rindex: index into constant pool ++// Rcache: address of cache entry - ConstantPoolCache::base_offset() ++// ++// A caller must add ConstantPoolCache::base_offset() to Rcache to get ++// the true address of the cache entry. ++// ++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, ++ Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert_different_registers(cache, index); ++ assert_different_registers(cache, xcpool); ++ get_cache_index_at_bcp(index, bcp_offset, index_size); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ // Convert from field index to ConstantPoolCacheEntry ++ // riscv already has the cache in xcpool so there is no need to ++ // install it in cache. Instead we pre-add the indexed offset to ++ // xcpool and return it in cache. All clients of this method need to ++ // be modified accordingly. ++ shadd(cache, index, xcpool, cache, 5); ++} + -+#undef INSN + -+void MacroAssembler::csrr(Register Rd, unsigned csr) { -+ csrrs(Rd, csr, x0); ++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, ++ Register index, ++ Register bytecode, ++ int byte_no, ++ int bcp_offset, ++ size_t index_size) { ++ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); ++ // We use a 32-bit load here since the layout of 64-bit words on ++ // little-endian machines allow us that. ++ // n.b. unlike x86 cache already includes the index offset ++ la(bytecode, Address(cache, ++ ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::indices_offset())); ++ membar(MacroAssembler::AnyAny); ++ lwu(bytecode, bytecode); ++ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ const int shift_count = (1 + byte_no) * BitsPerByte; ++ slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte)); ++ srli(bytecode, bytecode, XLEN - BitsPerByte); +} + -+#define INSN(NAME, OPFUN) \ -+ void MacroAssembler::NAME(unsigned csr, Register Rs) { \ -+ OPFUN(x0, csr, Rs); \ -+ } -+ -+ INSN(csrw, csrrw); -+ INSN(csrs, csrrs); -+ INSN(csrc, csrrc); ++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, ++ Register tmp, ++ int bcp_offset, ++ size_t index_size) { ++ assert(cache != tmp, "must use different register"); ++ get_cache_index_at_bcp(tmp, bcp_offset, index_size); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ // Convert from field index to ConstantPoolCacheEntry index ++ // and from word offset to byte offset ++ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, ++ "else change next line"); ++ ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); ++ // skip past the header ++ add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); ++ // construct pointer to cache entry ++ shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord); ++} + -+#undef INSN ++// Load object from cpool->resolved_references(index) ++void InterpreterMacroAssembler::load_resolved_reference_at_index( ++ Register result, Register index, Register tmp) { ++ assert_different_registers(result, index); + -+#define INSN(NAME, OPFUN) \ -+ void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ -+ OPFUN(x0, csr, imm); \ -+ } ++ get_constant_pool(result); ++ // Load pointer for resolved_references[] objArray ++ ld(result, Address(result, ConstantPool::cache_offset_in_bytes())); ++ ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes())); ++ resolve_oop_handle(result, tmp); ++ // Add in the index ++ addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); ++ shadd(result, index, result, index, LogBytesPerHeapOop); ++ load_heap_oop(result, Address(result, 0)); ++} + -+ INSN(csrwi, csrrwi); -+ INSN(csrsi, csrrsi); -+ INSN(csrci, csrrci); ++void InterpreterMacroAssembler::load_resolved_klass_at_offset( ++ Register cpool, Register index, Register klass, Register temp) { ++ shadd(temp, index, cpool, temp, LogBytesPerWord); ++ lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index ++ ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses ++ shadd(klass, temp, klass, temp, LogBytesPerWord); ++ ld(klass, Address(klass, Array::base_offset_in_bytes())); ++} + -+#undef INSN ++void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no, ++ Register method, ++ Register cache) { ++ const int method_offset = in_bytes( ++ ConstantPoolCache::base_offset() + ++ ((byte_no == TemplateTable::f2_byte) ++ ? ConstantPoolCacheEntry::f2_offset() ++ : ConstantPoolCacheEntry::f1_offset())); + -+#define INSN(NAME, CSR) \ -+ void MacroAssembler::NAME(Register Rd, Register Rs) { \ -+ csrrw(Rd, CSR, Rs); \ -+ } ++ ld(method, Address(cache, method_offset)); // get f1 Method* ++} + -+ INSN(fscsr, CSR_FCSR); -+ INSN(fsrm, CSR_FRM); -+ INSN(fsflags, CSR_FFLAGS); ++// Generate a subtype check: branch to ok_is_subtype if sub_klass is a ++// subtype of super_klass. ++// ++// Args: ++// x10: superklass ++// Rsub_klass: subklass ++// ++// Kills: ++// x12, x15 ++void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, ++ Label& ok_is_subtype) { ++ assert(Rsub_klass != x10, "x10 holds superklass"); ++ assert(Rsub_klass != x12, "x12 holds 2ndary super array length"); ++ assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr"); + -+#undef INSN ++ // Profile the not-null value's klass. ++ profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15 + -+#define INSN(NAME) \ -+ void MacroAssembler::NAME(Register Rs) { \ -+ NAME(x0, Rs); \ -+ } ++ // Do the check. ++ check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12 + -+ INSN(fscsr); -+ INSN(fsrm); -+ INSN(fsflags); ++ // Profile the failure of the check. ++ profile_typecheck_failed(x12); // blows x12 ++} + -+#undef INSN ++// Java Expression Stack + -+void MacroAssembler::fsrmi(Register Rd, unsigned imm) { -+ guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); -+ csrrwi(Rd, CSR_FRM, imm); ++void InterpreterMacroAssembler::pop_ptr(Register r) { ++ ld(r, Address(esp, 0)); ++ addi(esp, esp, wordSize); +} + -+void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { -+ csrrwi(Rd, CSR_FFLAGS, imm); ++void InterpreterMacroAssembler::pop_i(Register r) { ++ lw(r, Address(esp, 0)); // lw do signed extended ++ addi(esp, esp, wordSize); +} + -+#define INSN(NAME) \ -+ void MacroAssembler::NAME(unsigned imm) { \ -+ NAME(x0, imm); \ -+ } -+ -+ INSN(fsrmi); -+ INSN(fsflagsi); -+ -+#undef INSN -+ -+#ifdef COMPILER2 ++void InterpreterMacroAssembler::pop_l(Register r) { ++ ld(r, Address(esp, 0)); ++ addi(esp, esp, 2 * Interpreter::stackElementSize); ++} + -+typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); -+typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, -+ bool is_far, bool is_unordered); ++void InterpreterMacroAssembler::push_ptr(Register r) { ++ addi(esp, esp, -wordSize); ++ sd(r, Address(esp, 0)); ++} + -+static conditional_branch_insn conditional_branches[] = -+{ -+ /* SHORT branches */ -+ (conditional_branch_insn)&Assembler::beq, -+ (conditional_branch_insn)&Assembler::bgt, -+ NULL, // BoolTest::overflow -+ (conditional_branch_insn)&Assembler::blt, -+ (conditional_branch_insn)&Assembler::bne, -+ (conditional_branch_insn)&Assembler::ble, -+ NULL, // BoolTest::no_overflow -+ (conditional_branch_insn)&Assembler::bge, ++void InterpreterMacroAssembler::push_i(Register r) { ++ addi(esp, esp, -wordSize); ++ addw(r, r, zr); // signed extended ++ sd(r, Address(esp, 0)); ++} + -+ /* UNSIGNED branches */ -+ (conditional_branch_insn)&Assembler::beq, -+ (conditional_branch_insn)&Assembler::bgtu, -+ NULL, -+ (conditional_branch_insn)&Assembler::bltu, -+ (conditional_branch_insn)&Assembler::bne, -+ (conditional_branch_insn)&Assembler::bleu, -+ NULL, -+ (conditional_branch_insn)&Assembler::bgeu -+}; ++void InterpreterMacroAssembler::push_l(Register r) { ++ addi(esp, esp, -2 * wordSize); ++ sd(zr, Address(esp, wordSize)); ++ sd(r, Address(esp)); ++} + -+static float_conditional_branch_insn float_conditional_branches[] = -+{ -+ /* FLOAT SHORT branches */ -+ (float_conditional_branch_insn)&MacroAssembler::float_beq, -+ (float_conditional_branch_insn)&MacroAssembler::float_bgt, -+ NULL, // BoolTest::overflow -+ (float_conditional_branch_insn)&MacroAssembler::float_blt, -+ (float_conditional_branch_insn)&MacroAssembler::float_bne, -+ (float_conditional_branch_insn)&MacroAssembler::float_ble, -+ NULL, // BoolTest::no_overflow -+ (float_conditional_branch_insn)&MacroAssembler::float_bge, ++void InterpreterMacroAssembler::pop_f(FloatRegister r) { ++ flw(r, esp, 0); ++ addi(esp, esp, wordSize); ++} + -+ /* DOUBLE SHORT branches */ -+ (float_conditional_branch_insn)&MacroAssembler::double_beq, -+ (float_conditional_branch_insn)&MacroAssembler::double_bgt, -+ NULL, -+ (float_conditional_branch_insn)&MacroAssembler::double_blt, -+ (float_conditional_branch_insn)&MacroAssembler::double_bne, -+ (float_conditional_branch_insn)&MacroAssembler::double_ble, -+ NULL, -+ (float_conditional_branch_insn)&MacroAssembler::double_bge -+}; ++void InterpreterMacroAssembler::pop_d(FloatRegister r) { ++ fld(r, esp, 0); ++ addi(esp, esp, 2 * Interpreter::stackElementSize); ++} + -+void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { -+ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), -+ "invalid conditional branch index"); -+ (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); ++void InterpreterMacroAssembler::push_f(FloatRegister r) { ++ addi(esp, esp, -wordSize); ++ fsw(r, Address(esp, 0)); +} + -+// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use -+// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). -+void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { -+ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), -+ "invalid float conditional branch index"); -+ int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask); -+ (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, -+ (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); ++void InterpreterMacroAssembler::push_d(FloatRegister r) { ++ addi(esp, esp, -2 * wordSize); ++ fsd(r, Address(esp, 0)); +} + -+void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { -+ switch (cmpFlag) { -+ case BoolTest::eq: -+ case BoolTest::le: -+ beqz(op1, L, is_far); ++void InterpreterMacroAssembler::pop(TosState state) { ++ switch (state) { ++ case atos: ++ pop_ptr(); ++ verify_oop(x10); + break; -+ case BoolTest::ne: -+ case BoolTest::gt: -+ bnez(op1, L, is_far); ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ pop_i(); ++ break; ++ case ltos: ++ pop_l(); ++ break; ++ case ftos: ++ pop_f(); ++ break; ++ case dtos: ++ pop_d(); ++ break; ++ case vtos: ++ /* nothing to do */ + break; + default: + ShouldNotReachHere(); + } +} + -+void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { -+ switch (cmpFlag) { -+ case BoolTest::eq: -+ beqz(op1, L, is_far); ++void InterpreterMacroAssembler::push(TosState state) { ++ switch (state) { ++ case atos: ++ verify_oop(x10); ++ push_ptr(); + break; -+ case BoolTest::ne: -+ bnez(op1, L, is_far); ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ push_i(); ++ break; ++ case ltos: ++ push_l(); ++ break; ++ case ftos: ++ push_f(); ++ break; ++ case dtos: ++ push_d(); ++ break; ++ case vtos: ++ /* nothing to do */ + break; + default: + ShouldNotReachHere(); + } +} + -+void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { -+ Label L; -+ cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); -+ mv(dst, src); -+ bind(L); ++// Helpers for swap and dup ++void InterpreterMacroAssembler::load_ptr(int n, Register val) { ++ ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); +} -+#endif + -+void MacroAssembler::push_reg(Register Rs) -+{ -+ addi(esp, esp, 0 - wordSize); -+ sd(Rs, Address(esp, 0)); ++void InterpreterMacroAssembler::store_ptr(int n, Register val) { ++ sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); +} + -+void MacroAssembler::pop_reg(Register Rd) -+{ -+ ld(Rd, esp, 0); -+ addi(esp, esp, wordSize); ++void InterpreterMacroAssembler::load_float(Address src) { ++ flw(f10, src); +} + -+int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { -+ int count = 0; -+ // Scan bitset to accumulate register pairs -+ for (int reg = 31; reg >= 0; reg --) { -+ if ((1U << 31) & bitset) { -+ regs[count++] = reg; -+ } -+ bitset <<= 1; -+ } -+ return count; ++void InterpreterMacroAssembler::load_double(Address src) { ++ fld(f10, src); +} + -+// Push lots of registers in the bit set supplied. Don't push sp. -+// Return the number of words pushed -+int MacroAssembler::push_reg(unsigned int bitset, Register stack) { -+ DEBUG_ONLY(int words_pushed = 0;) -+ -+ unsigned char regs[32]; -+ int count = bitset_to_regs(bitset, regs); -+ // reserve one slot to align for odd count -+ int offset = is_even(count) ? 0 : wordSize; -+ -+ if (count) { -+ addi(stack, stack, - count * wordSize - offset); -+ } -+ for (int i = count - 1; i >= 0; i--) { -+ sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); -+ DEBUG_ONLY(words_pushed ++;) -+ } -+ -+ assert(words_pushed == count, "oops, pushed != count"); -+ -+ return count; ++void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { ++ // set sender sp ++ mv(x30, sp); ++ // record last_sp ++ sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); +} + -+int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { -+ DEBUG_ONLY(int words_popped = 0;) -+ -+ unsigned char regs[32]; -+ int count = bitset_to_regs(bitset, regs); -+ // reserve one slot to align for odd count -+ int offset = is_even(count) ? 0 : wordSize; -+ -+ for (int i = count - 1; i >= 0; i--) { -+ ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); -+ DEBUG_ONLY(words_popped ++;) -+ } -+ -+ if (count) { -+ addi(stack, stack, count * wordSize + offset); ++// Jump to from_interpreted entry of a call unless single stepping is possible ++// in this thread in which case we must call the i2i entry ++void InterpreterMacroAssembler::jump_from_interpreted(Register method) { ++ prepare_to_jump_from_interpreted(); ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset())); ++ beqz(t0, run_compiled_code); ++ ld(t0, Address(method, Method::interpreter_entry_offset())); ++ jr(t0); ++ bind(run_compiled_code); + } -+ assert(words_popped == count, "oops, popped != count"); + -+ return count; ++ ld(t0, Address(method, Method::from_interpreted_offset())); ++ jr(t0); +} + -+RegSet MacroAssembler::call_clobbered_registers() { -+ // Push integer registers x7, x10-x17, x28-x31. -+ return RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31); ++// The following two routines provide a hook so that an implementation ++// can schedule the dispatch in two parts. amd64 does not do this. ++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { +} + -+void MacroAssembler::push_call_clobbered_registers() { -+ push_reg(call_clobbered_registers(), sp); -+ -+ // Push float registers f0-f7, f10-f17, f28-f31. -+ addi(sp, sp, - wordSize * 20); -+ int offset = 0; -+ for (int i = 0; i < 32; i++) { -+ if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { -+ fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++))); -+ } -+ } ++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { ++ dispatch_next(state, step); +} + -+void MacroAssembler::pop_call_clobbered_registers() { -+ int offset = 0; -+ for (int i = 0; i < 32; i++) { -+ if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { -+ fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++))); -+ } ++void InterpreterMacroAssembler::dispatch_base(TosState state, ++ address* table, ++ bool verifyoop, ++ bool generate_poll, ++ Register Rs) { ++ // Pay attention to the argument Rs, which is acquiesce in t0. ++ if (VerifyActivationFrameSize) { ++ Unimplemented(); + } -+ addi(sp, sp, wordSize * 20); -+ -+ pop_reg(call_clobbered_registers(), sp); -+} -+ -+void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { -+ // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) -+ push_reg(RegSet::range(x5, x31), sp); -+ -+ // float registers -+ addi(sp, sp, - 32 * wordSize); -+ for (int i = 0; i < 32; i++) { -+ fsd(as_FloatRegister(i), Address(sp, i * wordSize)); ++ if (verifyoop && state == atos) { ++ verify_oop(x10); + } + -+ // vector registers -+ if (save_vectors) { -+ sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers); -+ vsetvli(t0, x0, Assembler::e64, Assembler::m8); -+ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { -+ add(t0, sp, vector_size_in_bytes * i); -+ vse64_v(as_VectorRegister(i), t0); -+ } -+ } -+} ++ Label safepoint; ++ address* const safepoint_table = Interpreter::safept_table(state); ++ bool needs_thread_local_poll = generate_poll && table != safepoint_table; + -+void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { -+ // vector registers -+ if (restore_vectors) { -+ vsetvli(t0, x0, Assembler::e64, Assembler::m8); -+ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { -+ vle64_v(as_VectorRegister(i), sp); -+ add(sp, sp, vector_size_in_bytes * 8); -+ } ++ if (needs_thread_local_poll) { ++ NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); ++ ld(t1, Address(xthread, JavaThread::polling_word_offset())); ++ andi(t1, t1, SafepointMechanism::poll_bit()); ++ bnez(t1, safepoint); ++ } ++ if (table == Interpreter::dispatch_table(state)) { ++ li(t1, Interpreter::distance_from_dispatch_table(state)); ++ add(t1, Rs, t1); ++ shadd(t1, t1, xdispatch, t1, 3); ++ } else { ++ mv(t1, (address)table); ++ shadd(t1, Rs, t1, Rs, 3); + } ++ ld(t1, Address(t1)); ++ jr(t1); + -+ // float registers -+ for (int i = 0; i < 32; i++) { -+ fld(as_FloatRegister(i), Address(sp, i * wordSize)); ++ if (needs_thread_local_poll) { ++ bind(safepoint); ++ la(t1, ExternalAddress((address)safepoint_table)); ++ shadd(t1, Rs, t1, Rs, 3); ++ ld(t1, Address(t1)); ++ jr(t1); + } -+ addi(sp, sp, 32 * wordSize); ++} + -+ // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) -+ pop_reg(RegSet::range(x5, x31), sp); ++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) { ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs); +} + -+static int patch_offset_in_jal(address branch, int64_t offset) { -+ assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n"); -+ Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] -+ Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] -+ Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] -+ Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] -+ return NativeInstruction::instruction_size; // only one instruction ++void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) { ++ dispatch_base(state, Interpreter::normal_table(state), Rs); +} + -+static int patch_offset_in_conditional_branch(address branch, int64_t offset) { -+ assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n"); -+ Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] -+ Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] -+ Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] -+ Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] -+ return NativeInstruction::instruction_size; // only one instruction ++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) { ++ dispatch_base(state, Interpreter::normal_table(state), false, Rs); +} + -+static int patch_offset_in_pc_relative(address branch, int64_t offset) { -+ const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load -+ Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] -+ Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] -+ return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size; ++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { ++ // load next bytecode ++ load_unsigned_byte(t0, Address(xbcp, step)); ++ add(xbcp, xbcp, step); ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); +} + -+static int patch_addr_in_movptr(address branch, address target) { -+ const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load -+ int32_t lower = ((intptr_t)target << 35) >> 35; -+ int64_t upper = ((intptr_t)target - lower) >> 29; -+ Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] -+ Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] -+ Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] -+ Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] -+ return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; ++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { ++ // load current bytecode ++ lbu(t0, Address(xbcp, 0)); ++ dispatch_base(state, table); +} + -+static int patch_imm_in_li64(address branch, address target) { -+ const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi -+ int64_t lower = (intptr_t)target & 0xffffffff; -+ lower = lower - ((lower << 44) >> 44); -+ int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower; -+ int32_t upper = (tmp_imm - (int32_t)lower) >> 32; -+ int64_t tmp_upper = upper, tmp_lower = upper; -+ tmp_lower = (tmp_lower << 52) >> 52; -+ tmp_upper -= tmp_lower; -+ tmp_upper >>= 12; -+ // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1), -+ // upper = target[63:32] + 1. -+ Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui. -+ Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi. -+ // Load the rest 32 bits. -+ Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi. -+ Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi. -+ Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi. -+ return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; -+} ++// remove activation ++// ++// Apply stack watermark barrier. ++// Unlock the receiver if this is a synchronized method. ++// Unlock any Java monitors from syncronized blocks. ++// Remove the activation from the stack. ++// ++// If there are locked Java monitors ++// If throw_monitor_exception ++// throws IllegalMonitorStateException ++// Else if install_monitor_exception ++// installs IllegalMonitorStateException ++// Else ++// no error processing ++void InterpreterMacroAssembler::remove_activation( ++ TosState state, ++ bool throw_monitor_exception, ++ bool install_monitor_exception, ++ bool notify_jvmdi) { ++ // Note: Registers x13 may be in use for the ++ // result check if synchronized method ++ Label unlocked, unlock, no_unlock; + -+static int patch_imm_in_li32(address branch, int32_t target) { -+ const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw -+ int64_t upper = (intptr_t)target; -+ int32_t lower = (((int32_t)target) << 20) >> 20; -+ upper -= lower; -+ upper = (int32_t)upper; -+ Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. -+ Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. -+ return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; -+} ++ // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, ++ // that would normally not be safe to use. Such bad returns into unsafe territory of ++ // the stack, will call InterpreterRuntime::at_unwind. ++ Label slow_path; ++ Label fast_path; ++ safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */); ++ j(fast_path); ++ ++ bind(slow_path); ++ push(state); ++ set_last_Java_frame(esp, fp, (address)pc(), t0); ++ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread); ++ reset_last_Java_frame(true); ++ pop(state); + -+static long get_offset_of_jal(address insn_addr) { -+ assert_cond(insn_addr != NULL); -+ long offset = 0; -+ unsigned insn = *(unsigned*)insn_addr; -+ long val = (long)Assembler::sextract(insn, 31, 12); -+ offset |= ((val >> 19) & 0x1) << 20; -+ offset |= (val & 0xff) << 12; -+ offset |= ((val >> 8) & 0x1) << 11; -+ offset |= ((val >> 9) & 0x3ff) << 1; -+ offset = (offset << 43) >> 43; -+ return offset; -+} ++ bind(fast_path); + -+static long get_offset_of_conditional_branch(address insn_addr) { -+ long offset = 0; -+ assert_cond(insn_addr != NULL); -+ unsigned insn = *(unsigned*)insn_addr; -+ offset = (long)Assembler::sextract(insn, 31, 31); -+ offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); -+ offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); -+ offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); -+ offset = (offset << 41) >> 41; -+ return offset; -+} ++ // get the value of _do_not_unlock_if_synchronized into x13 ++ const Address do_not_unlock_if_synchronized(xthread, ++ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ lbu(x13, do_not_unlock_if_synchronized); ++ sb(zr, do_not_unlock_if_synchronized); // reset the flag + -+static long get_offset_of_pc_relative(address insn_addr) { -+ long offset = 0; -+ assert_cond(insn_addr != NULL); -+ offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12; // Auipc. -+ offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addi/Jalr/Load. -+ offset = (offset << 32) >> 32; -+ return offset; -+} ++ // get method access flags ++ ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize)); ++ ld(x12, Address(x11, Method::access_flags_offset())); ++ andi(t0, x12, JVM_ACC_SYNCHRONIZED); ++ beqz(t0, unlocked); + -+static address get_target_of_movptr(address insn_addr) { -+ assert_cond(insn_addr != NULL); -+ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29; // Lui. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17; // Addi. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6; // Addi. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)); // Addi/Jalr/Load. -+ return (address) target_address; -+} ++ // Don't unlock anything if the _do_not_unlock_if_synchronized flag ++ // is set. ++ bnez(x13, no_unlock); + -+static address get_target_of_li64(address insn_addr) { -+ assert_cond(insn_addr != NULL); -+ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44; // Lui. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32; // Addi. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20; // Addi. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8; // Addi. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20)); // Addi. -+ return (address)target_address; -+} ++ // unlock monitor ++ push(state); // save result + -+static address get_target_of_li32(address insn_addr) { -+ assert_cond(insn_addr != NULL); -+ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12; // Lui. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addiw. -+ return (address)target_address; -+} ++ // BasicObjectLock will be first in list, since this is a ++ // synchronized method. However, need to check that the object has ++ // not been unlocked by an explicit monitorexit bytecode. ++ const Address monitor(fp, frame::interpreter_frame_initial_sp_offset * ++ wordSize - (int) sizeof(BasicObjectLock)); ++ // We use c_rarg1 so that if we go slow path it will be the correct ++ // register for unlock_object to pass to VM directly ++ la(c_rarg1, monitor); // address of first monitor + -+// Patch any kind of instruction; there may be several instructions. -+// Return the total length (in bytes) of the instructions. -+int MacroAssembler::pd_patch_instruction_size(address branch, address target) { -+ assert_cond(branch != NULL); -+ int64_t offset = target - branch; -+ if (NativeInstruction::is_jal_at(branch)) { // jal -+ return patch_offset_in_jal(branch, offset); -+ } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne -+ return patch_offset_in_conditional_branch(branch, offset); -+ } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load -+ return patch_offset_in_pc_relative(branch, offset); -+ } else if (NativeInstruction::is_movptr_at(branch)) { // movptr -+ return patch_addr_in_movptr(branch, target); -+ } else if (NativeInstruction::is_li64_at(branch)) { // li64 -+ return patch_imm_in_li64(branch, target); -+ } else if (NativeInstruction::is_li32_at(branch)) { // li32 -+ int64_t imm = (intptr_t)target; -+ return patch_imm_in_li32(branch, (int32_t)imm); -+ } else { -+ tty->print_cr("pd_patch_instruction_size: instruction 0x%x could not be patched!\n", *(unsigned*)branch); -+ ShouldNotReachHere(); -+ } -+ return -1; -+} ++ ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); ++ bnez(x10, unlock); + -+address MacroAssembler::target_addr_for_insn(address insn_addr) { -+ long offset = 0; -+ assert_cond(insn_addr != NULL); -+ if (NativeInstruction::is_jal_at(insn_addr)) { // jal -+ offset = get_offset_of_jal(insn_addr); -+ } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne -+ offset = get_offset_of_conditional_branch(insn_addr); -+ } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load -+ offset = get_offset_of_pc_relative(insn_addr); -+ } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr -+ return get_target_of_movptr(insn_addr); -+ } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64 -+ return get_target_of_li64(insn_addr); -+ } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32 -+ return get_target_of_li32(insn_addr); ++ pop(state); ++ if (throw_monitor_exception) { ++ // Entry already unlocked, need to throw exception ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); + } else { -+ ShouldNotReachHere(); ++ // Monitor already unlocked during a stack unroll. If requested, ++ // install an illegal_monitor_state_exception. Continue with ++ // stack unrolling. ++ if (install_monitor_exception) { ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ } ++ j(unlocked); + } -+ return address(((uintptr_t)insn_addr + offset)); -+} + -+int MacroAssembler::patch_oop(address insn_addr, address o) { -+ // OOPs are either narrow (32 bits) or wide (48 bits). We encode -+ // narrow OOPs by setting the upper 16 bits in the first -+ // instruction. -+ if (NativeInstruction::is_li32_at(insn_addr)) { -+ // Move narrow OOP -+ narrowOop n = CompressedOops::encode((oop)o); -+ return patch_imm_in_li32(insn_addr, (int32_t)n); -+ } else if (NativeInstruction::is_movptr_at(insn_addr)) { -+ // Move wide OOP -+ return patch_addr_in_movptr(insn_addr, o); -+ } -+ ShouldNotReachHere(); -+ return -1; -+} ++ bind(unlock); ++ unlock_object(c_rarg1); ++ pop(state); + -+void MacroAssembler::reinit_heapbase() { -+ if (UseCompressedOops) { -+ if (Universe::is_fully_initialized()) { -+ mv(xheapbase, Universe::narrow_ptrs_base()); -+ } else { -+ int32_t offset = 0; -+ la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset); -+ ld(xheapbase, Address(xheapbase, offset)); -+ } -+ } -+} ++ // Check that for block-structured locking (i.e., that all locked ++ // objects has been unlocked) ++ bind(unlocked); + -+void MacroAssembler::mv(Register Rd, Address dest) { -+ assert(dest.getMode() == Address::literal, "Address mode should be Address::literal"); -+ code_section()->relocate(pc(), dest.rspec()); -+ movptr(Rd, dest.target()); -+} -+void MacroAssembler::mv(Register Rd, RegisterOrConstant src) { -+ if (src.is_register()) { -+ mv(Rd, src.as_register()); -+ } else { -+ mv(Rd, src.as_constant()); -+ } -+} ++ // x10: Might contain return value + -+void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { -+ andr(Rd, Rs1, Rs2); -+ // addw: The result is clipped to 32 bits, then the sign bit is extended, -+ // and the result is stored in Rd -+ addw(Rd, Rd, zr); -+} ++ // Check that all monitors are unlocked ++ { ++ Label loop, exception, entry, restart; ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ const Address monitor_block_top( ++ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ const Address monitor_block_bot( ++ fp, frame::interpreter_frame_initial_sp_offset * wordSize); + -+void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { -+ orr(Rd, Rs1, Rs2); -+ // addw: The result is clipped to 32 bits, then the sign bit is extended, -+ // and the result is stored in Rd -+ addw(Rd, Rd, zr); -+} ++ bind(restart); ++ // We use c_rarg1 so that if we go slow path it will be the correct ++ // register for unlock_object to pass to VM directly ++ ld(c_rarg1, monitor_block_top); // points to current entry, starting ++ // with top-most entry ++ la(x9, monitor_block_bot); // points to word before bottom of ++ // monitor block + -+void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { -+ xorr(Rd, Rs1, Rs2); -+ // addw: The result is clipped to 32 bits, then the sign bit is extended, -+ // and the result is stored in Rd -+ addw(Rd, Rd, zr); -+} ++ j(entry); + -+// Note: load_unsigned_short used to be called load_unsigned_word. -+int MacroAssembler::load_unsigned_short(Register dst, Address src) { -+ int off = offset(); -+ lhu(dst, src); -+ return off; -+} ++ // Entry already locked, need to throw exception ++ bind(exception); + -+int MacroAssembler::load_unsigned_byte(Register dst, Address src) { -+ int off = offset(); -+ lbu(dst, src); -+ return off; -+} ++ if (throw_monitor_exception) { ++ // Throw exception ++ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime:: ++ throw_illegal_monitor_state_exception)); + -+int MacroAssembler::load_signed_short(Register dst, Address src) { -+ int off = offset(); -+ lh(dst, src); -+ return off; -+} ++ should_not_reach_here(); ++ } else { ++ // Stack unrolling. Unlock object and install illegal_monitor_exception. ++ // Unlock does not block, so don't have to worry about the frame. ++ // We don't have to preserve c_rarg1 since we are going to throw an exception. + -+int MacroAssembler::load_signed_byte(Register dst, Address src) { -+ int off = offset(); -+ lb(dst, src); -+ return off; -+} ++ push(state); ++ unlock_object(c_rarg1); ++ pop(state); + -+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { -+ switch (size_in_bytes) { -+ case 8: ld(dst, src); break; -+ case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; -+ case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; -+ case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; -+ default: ShouldNotReachHere(); -+ } -+} ++ if (install_monitor_exception) { ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime:: ++ new_illegal_monitor_state_exception)); ++ } + -+void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { -+ switch (size_in_bytes) { -+ case 8: sd(src, dst); break; -+ case 4: sw(src, dst); break; -+ case 2: sh(src, dst); break; -+ case 1: sb(src, dst); break; -+ default: ShouldNotReachHere(); -+ } -+} ++ j(restart); ++ } + -+// rotate right with imm bits -+void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) -+{ -+ if (UseZbb) { -+ rori(dst, src, shift); -+ return; ++ bind(loop); ++ // check if current entry is used ++ add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes()); ++ ld(t0, Address(t0, 0)); ++ bnez(t0, exception); ++ ++ add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry ++ bind(entry); ++ bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry + } + -+ assert_different_registers(dst, tmp); -+ assert_different_registers(src, tmp); -+ assert(shift < 64, "shift amount must be < 64"); -+ slli(tmp, src, 64 - shift); -+ srli(dst, src, shift); -+ orr(dst, dst, tmp); -+} ++ bind(no_unlock); + -+// reverse bytes in halfword in lower 16 bits and sign-extend -+// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) -+void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { -+ if (UseZbb) { -+ rev8(Rd, Rs); -+ srai(Rd, Rd, 48); -+ return; -+ } -+ assert_different_registers(Rs, tmp); -+ assert_different_registers(Rd, tmp); -+ srli(tmp, Rs, 8); -+ andi(tmp, tmp, 0xFF); -+ slli(Rd, Rs, 56); -+ srai(Rd, Rd, 48); // sign-extend -+ orr(Rd, Rd, tmp); -+} ++ // jvmti support ++ if (notify_jvmdi) { ++ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA + -+// reverse bytes in lower word and sign-extend -+// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) -+void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ if (UseZbb) { -+ rev8(Rd, Rs); -+ srai(Rd, Rd, 32); -+ return; ++ } else { ++ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA + } -+ assert_different_registers(Rs, tmp1, tmp2); -+ assert_different_registers(Rd, tmp1, tmp2); -+ revb_h_w_u(Rd, Rs, tmp1, tmp2); -+ slli(tmp2, Rd, 48); -+ srai(tmp2, tmp2, 32); // sign-extend -+ srli(Rd, Rd, 16); -+ orr(Rd, Rd, tmp2); -+} + -+// reverse bytes in halfword in lower 16 bits and zero-extend -+// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) -+void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { -+ if (UseZbb) { -+ rev8(Rd, Rs); -+ srli(Rd, Rd, 48); -+ return; -+ } -+ assert_different_registers(Rs, tmp); -+ assert_different_registers(Rd, tmp); -+ srli(tmp, Rs, 8); -+ andi(tmp, tmp, 0xFF); -+ andi(Rd, Rs, 0xFF); -+ slli(Rd, Rd, 8); -+ orr(Rd, Rd, tmp); -+} ++ // remove activation ++ // get sender esp ++ ld(t1, ++ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ if (StackReservedPages > 0) { ++ // testing if reserved zone needs to be re-enabled ++ Label no_reserved_zone_enabling; + -+// reverse bytes in halfwords in lower 32 bits and zero-extend -+// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) -+void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ if (UseZbb) { -+ rev8(Rd, Rs); -+ rori(Rd, Rd, 32); -+ roriw(Rd, Rd, 16); -+ zero_extend(Rd, Rd, 32); -+ return; -+ } -+ assert_different_registers(Rs, tmp1, tmp2); -+ assert_different_registers(Rd, tmp1, tmp2); -+ srli(tmp2, Rs, 16); -+ revb_h_h_u(tmp2, tmp2, tmp1); -+ revb_h_h_u(Rd, Rs, tmp1); -+ slli(tmp2, tmp2, 16); -+ orr(Rd, Rd, tmp2); -+} ++ ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); ++ ble(t1, t0, no_reserved_zone_enabling); + -+// This method is only used for revb_h -+// Rd = Rs[47:0] Rs[55:48] Rs[63:56] -+void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ assert_different_registers(Rs, tmp1, tmp2); -+ assert_different_registers(Rd, tmp1); -+ srli(tmp1, Rs, 48); -+ andi(tmp2, tmp1, 0xFF); -+ slli(tmp2, tmp2, 8); -+ srli(tmp1, tmp1, 8); -+ orr(tmp1, tmp1, tmp2); -+ slli(Rd, Rs, 16); -+ orr(Rd, Rd, tmp1); -+} -+// reverse bytes in each halfword -+// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] -+void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ if (UseZbb) { -+ assert_different_registers(Rs, tmp1); -+ assert_different_registers(Rd, tmp1); -+ rev8(Rd, Rs); -+ zero_extend(tmp1, Rd, 32); -+ roriw(tmp1, tmp1, 16); -+ slli(tmp1, tmp1, 32); -+ srli(Rd, Rd, 32); -+ roriw(Rd, Rd, 16); -+ zero_extend(Rd, Rd, 32); -+ orr(Rd, Rd, tmp1); -+ return; -+ } -+ assert_different_registers(Rs, tmp1, tmp2); -+ assert_different_registers(Rd, tmp1, tmp2); -+ revb_h_helper(Rd, Rs, tmp1, tmp2); -+ for (int i = 0; i < 3; ++i) { -+ revb_h_helper(Rd, Rd, tmp1, tmp2); -+ } -+} ++ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_delayed_StackOverflowError)); ++ should_not_reach_here(); + -+// reverse bytes in each word -+// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] -+void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ if (UseZbb) { -+ rev8(Rd, Rs); -+ rori(Rd, Rd, 32); -+ return; ++ bind(no_reserved_zone_enabling); + } -+ assert_different_registers(Rs, tmp1, tmp2); -+ assert_different_registers(Rd, tmp1, tmp2); -+ revb(Rd, Rs, tmp1, tmp2); -+ ror_imm(Rd, Rd, 32); ++ ++ // restore sender esp ++ mv(esp, t1); ++ ++ // remove frame anchor ++ leave(); ++ // If we're returning to interpreted code we will shortly be ++ // adjusting SP to allow some space for ESP. If we're returning to ++ // compiled code the saved sender SP was saved in sender_sp, so this ++ // restores it. ++ andi(sp, esp, -16); +} + -+// reverse bytes in doubleword -+// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] -+void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ if (UseZbb) { -+ rev8(Rd, Rs); -+ return; -+ } -+ assert_different_registers(Rs, tmp1, tmp2); -+ assert_different_registers(Rd, tmp1, tmp2); -+ andi(tmp1, Rs, 0xFF); -+ slli(tmp1, tmp1, 8); -+ for (int step = 8; step < 56; step += 8) { -+ srli(tmp2, Rs, step); -+ andi(tmp2, tmp2, 0xFF); -+ orr(tmp1, tmp1, tmp2); -+ slli(tmp1, tmp1, 8); -+ } -+ srli(Rd, Rs, 56); -+ andi(Rd, Rd, 0xFF); -+ orr(Rd, tmp1, Rd); -+} -+ -+void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { -+ if (is_imm_in_range(imm, 12, 0)) { -+ and_imm12(Rd, Rn, imm); ++// Lock object ++// ++// Args: ++// c_rarg1: BasicObjectLock to be used for locking ++// ++// Kills: ++// x10 ++// c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs) ++// t0, t1 (temp regs) ++void InterpreterMacroAssembler::lock_object(Register lock_reg) ++{ ++ assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1"); ++ if (UseHeavyMonitors) { ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), ++ lock_reg); + } else { -+ assert_different_registers(Rn, tmp); -+ mv(tmp, imm); -+ andr(Rd, Rn, tmp); -+ } -+} ++ Label done; + -+void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { -+ ld(tmp1, adr); -+ if (src.is_register()) { -+ orr(tmp1, tmp1, src.as_register()); -+ } else { -+ if(is_imm_in_range(src.as_constant(), 12, 0)) { -+ ori(tmp1, tmp1, src.as_constant()); -+ } else { -+ assert_different_registers(tmp1, tmp2); -+ mv(tmp2, src.as_constant()); -+ orr(tmp1, tmp1, tmp2); -+ } -+ } -+ sd(tmp1, adr); -+} ++ const Register swap_reg = x10; ++ const Register tmp = c_rarg2; ++ const Register obj_reg = c_rarg3; // Will contain the oop + -+void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) { -+ if (UseCompressedClassPointers) { -+ lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); -+ if (Universe::narrow_klass_base() == NULL) { -+ slli(tmp, tmp, Universe::narrow_klass_shift()); -+ beq(trial_klass, tmp, L); -+ return; -+ } -+ decode_klass_not_null(tmp); -+ } else { -+ ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); -+ } -+ beq(trial_klass, tmp, L); -+} ++ const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); ++ const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); ++ const int mark_offset = lock_offset + ++ BasicLock::displaced_header_offset_in_bytes(); + -+// Move an oop into a register. immediate is true if we want -+// immediate instrcutions, i.e. we are not going to patch this -+// instruction while the code is being executed by another thread. In -+// that case we can use move immediates rather than the constant pool. -+void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { -+ int oop_index; -+ if (obj == NULL) { -+ oop_index = oop_recorder()->allocate_oop_index(obj); -+ } else { -+#ifdef ASSERT -+ { -+ ThreadInVMfromUnknown tiv; -+ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); -+ } -+#endif -+ oop_index = oop_recorder()->find_index(obj); -+ } -+ RelocationHolder rspec = oop_Relocation::spec(oop_index); -+ if (!immediate) { -+ address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address -+ ld_constant(dst, Address(dummy, rspec)); -+ } else -+ mv(dst, Address((address)obj, rspec)); -+} ++ Label slow_case; + -+// Move a metadata address into a register. -+void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { -+ int oop_index; -+ if (obj == NULL) { -+ oop_index = oop_recorder()->allocate_metadata_index(obj); -+ } else { -+ oop_index = oop_recorder()->find_index(obj); -+ } -+ RelocationHolder rspec = metadata_Relocation::spec(oop_index); -+ mv(dst, Address((address)obj, rspec)); -+} ++ // Load object pointer into obj_reg c_rarg3 ++ ld(obj_reg, Address(lock_reg, obj_offset)); + -+// Writes to stack successive pages until offset reached to check for -+// stack overflow + shadow pages. This clobbers tmp. -+void MacroAssembler::bang_stack_size(Register size, Register tmp) { -+ assert_different_registers(tmp, size, t0); -+ // Bang stack for total size given plus shadow page size. -+ // Bang one page at a time because large size can bang beyond yellow and -+ // red zones. -+ mv(t0, os::vm_page_size()); -+ Label loop; -+ bind(loop); -+ sub(tmp, sp, t0); -+ subw(size, size, t0); -+ sd(size, Address(tmp)); -+ bgtz(size, loop); ++ if (DiagnoseSyncOnValueBasedClasses != 0) { ++ load_klass(tmp, obj_reg); ++ lwu(tmp, Address(tmp, Klass::access_flags_offset())); ++ andi(tmp, tmp, JVM_ACC_IS_VALUE_BASED_CLASS); ++ bnez(tmp, slow_case); ++ } + -+ // Bang down shadow pages too. -+ // At this point, (tmp-0) is the last address touched, so don't -+ // touch it again. (It was touched as (tmp-pagesize) but then tmp -+ // was post-decremented.) Skip this address by starting at i=1, and -+ // touch a few more pages below. N.B. It is important to touch all -+ // the way down to and including i=StackShadowPages. -+ for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { -+ // this could be any sized move but this is can be a debugging crumb -+ // so the bigger the better. -+ sub(tmp, tmp, os::vm_page_size()); -+ sd(size, Address(tmp, 0)); -+ } -+} ++ // Load (object->mark() | 1) into swap_reg ++ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ ori(swap_reg, t0, 1); + -+SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { -+ int32_t offset = 0; -+ _masm = masm; -+ _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset); -+ _masm->lbu(t0, Address(t0, offset)); -+ _masm->beqz(t0, _label); -+} ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ sd(swap_reg, Address(lock_reg, mark_offset)); + -+SkipIfEqual::~SkipIfEqual() { -+ _masm->bind(_label); -+ _masm = NULL; -+} ++ assert(lock_offset == 0, ++ "displached header must be first word in BasicObjectLock"); + -+void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) { -+ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); -+ ld(dst, Address(xmethod, Method::const_offset())); -+ ld(dst, Address(dst, ConstMethod::constants_offset())); -+ ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes())); -+ ld(dst, Address(dst, mirror_offset)); -+ resolve_oop_handle(dst, tmp); -+} ++ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL); + -+void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { -+ // OopHandle::resolve is an indirection. -+ assert_different_registers(result, tmp); -+ access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg); -+} ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 7) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (7 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 3 bits clear. ++ // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg ++ sub(swap_reg, swap_reg, sp); ++ li(t0, (int64_t)(7 - os::vm_page_size())); ++ andr(swap_reg, swap_reg, t0); + -+void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, -+ Register dst, Address src, -+ Register tmp1, Register thread_tmp) { -+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ decorators = AccessInternal::decorator_fixup(decorators); -+ bool as_raw = (decorators & AS_RAW) != 0; -+ if (as_raw) { -+ bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); -+ } else { -+ bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); -+ } -+} ++ // Save the test result, for recursive case, the result is zero ++ sd(swap_reg, Address(lock_reg, mark_offset)); ++ beqz(swap_reg, done); + -+void MacroAssembler::null_check(Register reg, int offset) { -+ if (needs_explicit_null_check(offset)) { -+ // provoke OS NULL exception if reg = NULL by -+ // accessing M[reg] w/o changing any registers -+ // NOTE: this is plenty to provoke a segv -+ ld(zr, Address(reg, 0)); -+ } else { -+ // nothing to do, (later) access of M[reg + offset] -+ // will provoke OS NULL exception if reg = NULL -+ } -+} ++ bind(slow_case); + -+void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, -+ Address dst, Register src, -+ Register tmp1, Register tmp2, Register tmp3) { -+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ decorators = AccessInternal::decorator_fixup(decorators); -+ bool as_raw = (decorators & AS_RAW) != 0; -+ if (as_raw) { -+ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); -+ } else { -+ bs->store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); -+ } -+} ++ // Call the runtime routine for slow case ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), ++ lock_reg); + -+// Algorithm must match CompressedOops::encode. -+void MacroAssembler::encode_heap_oop(Register d, Register s) { -+ verify_oop(s, "broken oop in encode_heap_oop"); -+ if (Universe::narrow_oop_base() == NULL) { -+ if (Universe::narrow_oop_shift() != 0) { -+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); -+ srli(d, s, LogMinObjAlignmentInBytes); -+ } else { -+ mv(d, s); -+ } -+ } else { -+ Label notNull; -+ sub(d, s, xheapbase); -+ bgez(d, notNull); -+ mv(d, zr); -+ bind(notNull); -+ if (Universe::narrow_oop_shift() != 0) { -+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); -+ srli(d, d, Universe::narrow_oop_shift()); -+ } ++ bind(done); + } +} + -+void MacroAssembler::load_klass(Register dst, Register src) { -+ if (UseCompressedClassPointers) { -+ lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); -+ decode_klass_not_null(dst); -+ } else { -+ ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); -+ } -+} + -+void MacroAssembler::store_klass(Register dst, Register src) { -+ // FIXME: Should this be a store release? concurrent gcs assumes -+ // klass length is valid if klass field is not null. -+ if (UseCompressedClassPointers) { -+ encode_klass_not_null(src); -+ sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); ++// Unlocks an object. Used in monitorexit bytecode and ++// remove_activation. Throws an IllegalMonitorException if object is ++// not locked by current thread. ++// ++// Args: ++// c_rarg1: BasicObjectLock for lock ++// ++// Kills: ++// x10 ++// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs) ++// t0, t1 (temp regs) ++void InterpreterMacroAssembler::unlock_object(Register lock_reg) ++{ ++ assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); ++ ++ if (UseHeavyMonitors) { ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + } else { -+ sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); -+ } -+} ++ Label done; + -+void MacroAssembler::store_klass_gap(Register dst, Register src) { -+ if (UseCompressedClassPointers) { -+ // Store to klass gap in destination -+ sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); -+ } -+} ++ const Register swap_reg = x10; ++ const Register header_reg = c_rarg2; // Will contain the old oopMark ++ const Register obj_reg = c_rarg3; // Will contain the oop + -+void MacroAssembler::decode_klass_not_null(Register r) { -+ decode_klass_not_null(r, r); -+} ++ save_bcp(); // Save in case of exception + -+void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { -+ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ // Convert from BasicObjectLock structure to object and BasicLock ++ // structure Store the BasicLock address into x10 ++ la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + -+ if (Universe::narrow_klass_base() == NULL) { -+ if (Universe::narrow_klass_shift() != 0) { -+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); -+ slli(dst, src, LogKlassAlignmentInBytes); -+ } else { -+ mv(dst, src); -+ } -+ return; -+ } ++ // Load oop into obj_reg(c_rarg3) ++ ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + -+ Register xbase = dst; -+ if (dst == src) { -+ xbase = tmp; -+ } ++ // Free entry ++ sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + -+ assert_different_registers(src, xbase); -+ mv(xbase, (uintptr_t)Universe::narrow_klass_base()); -+ if (Universe::narrow_klass_shift() != 0) { -+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); -+ assert_different_registers(t0, xbase); -+ shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); -+ } else { -+ add(dst, xbase, src); -+ } -+ if (xbase == xheapbase) { reinit_heapbase(); } ++ // Load the old header from BasicLock structure ++ ld(header_reg, Address(swap_reg, ++ BasicLock::displaced_header_offset_in_bytes())); + -+} ++ // Test for recursion ++ beqz(header_reg, done); + -+void MacroAssembler::encode_klass_not_null(Register r) { -+ encode_klass_not_null(r, r); -+} ++ // Atomic swap back the old header ++ cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL); + -+void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { -+ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ // Call the runtime routine for slow case. ++ sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + -+ if (Universe::narrow_klass_base() == NULL) { -+ if (Universe::narrow_klass_shift() != 0) { -+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); -+ srli(dst, src, LogKlassAlignmentInBytes); -+ } else { -+ mv(dst, src); -+ } -+ return; -+ } ++ bind(done); + -+ if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 && -+ Universe::narrow_klass_shift() == 0) { -+ zero_extend(dst, src, 32); -+ return; ++ restore_bcp(); + } ++} + -+ Register xbase = dst; -+ if (dst == src) { -+ xbase = tmp; -+ } + -+ assert_different_registers(src, xbase); -+ mv(xbase, (intptr_t)Universe::narrow_klass_base()); -+ sub(dst, src, xbase); -+ if (Universe::narrow_klass_shift() != 0) { -+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); -+ srli(dst, dst, LogKlassAlignmentInBytes); -+ } -+ if (xbase == xheapbase) { -+ reinit_heapbase(); -+ } ++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, ++ Label& zero_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++ beqz(mdp, zero_continue); +} + -+void MacroAssembler::decode_heap_oop_not_null(Register r) { -+ decode_heap_oop_not_null(r, r); -+} ++// Set the method data pointer for the current bcp. ++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Label set_mdp; ++ push_reg(0xc00, sp); // save x10, x11 + -+void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { -+ assert(UseCompressedOops, "should only be used for compressed headers"); -+ assert(Universe::heap() != NULL, "java heap should be initialized"); -+ // Cannot assert, unverified entry point counts instructions (see .ad file) -+ // vtableStubs also counts instructions in pd_code_size_limit. -+ // Also do not verify_oop as this is called by verify_oop. -+ if (Universe::narrow_oop_shift() != 0) { -+ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); -+ slli(dst, src, LogMinObjAlignmentInBytes); -+ if (Universe::narrow_oop_base() != NULL) { -+ add(dst, xheapbase, dst); -+ } -+ } else { -+ assert(Universe::narrow_oop_base() == NULL, "sanity"); -+ mv(dst, src); -+ } ++ // Test MDO to avoid the call if it is NULL. ++ ld(x10, Address(xmethod, in_bytes(Method::method_data_offset()))); ++ beqz(x10, set_mdp); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp); ++ // x10: mdi ++ // mdo is guaranteed to be non-zero here, we checked for it before the call. ++ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); ++ la(x11, Address(x11, in_bytes(MethodData::data_offset()))); ++ add(x10, x11, x10); ++ sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++ bind(set_mdp); ++ pop_reg(0xc00, sp); +} + -+void MacroAssembler::decode_heap_oop(Register d, Register s) { -+ if (Universe::narrow_oop_base() == NULL) { -+ if (Universe::narrow_oop_shift() != 0 || d != s) { -+ slli(d, s, Universe::narrow_oop_shift()); -+ } -+ } else { -+ Label done; -+ mv(d, s); -+ beqz(s, done); -+ shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); -+ bind(done); -+ } -+ verify_oop(d, "broken oop in decode_heap_oop"); -+} ++void InterpreterMacroAssembler::verify_method_data_pointer() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++#ifdef ASSERT ++ Label verify_continue; ++ add(sp, sp, -4 * wordSize); ++ sd(x10, Address(sp, 0)); ++ sd(x11, Address(sp, wordSize)); ++ sd(x12, Address(sp, 2 * wordSize)); ++ sd(x13, Address(sp, 3 * wordSize)); ++ test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue ++ get_method(x11); + -+void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, -+ Register tmp2, Register tmp3, DecoratorSet decorators) { -+ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2, tmp3); ++ // If the mdp is valid, it will point to a DataLayout header which is ++ // consistent with the bcp. The converse is highly probable also. ++ lh(x12, Address(x13, in_bytes(DataLayout::bci_offset()))); ++ ld(t0, Address(x11, Method::const_offset())); ++ add(x12, x12, t0); ++ la(x12, Address(x12, ConstMethod::codes_offset())); ++ beq(x12, xbcp, verify_continue); ++ // x10: method ++ // xbcp: bcp // xbcp == 22 ++ // x13: mdp ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), ++ x11, xbcp, x13); ++ bind(verify_continue); ++ ld(x10, Address(sp, 0)); ++ ld(x11, Address(sp, wordSize)); ++ ld(x12, Address(sp, 2 * wordSize)); ++ ld(x13, Address(sp, 3 * wordSize)); ++ add(sp, sp, 4 * wordSize); ++#endif // ASSERT +} + -+void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, -+ Register thread_tmp, DecoratorSet decorators) { -+ access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); -+} + -+void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, -+ Register thread_tmp, DecoratorSet decorators) { -+ access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp); ++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, ++ int constant, ++ Register value) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Address data(mdp_in, constant); ++ sd(value, data); +} + -+// Used for storing NULLs. -+void MacroAssembler::store_heap_oop_null(Address dst) { -+ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg); ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ int constant, ++ bool decrement) { ++ increment_mdp_data_at(mdp_in, noreg, constant, decrement); +} + -+int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, -+ bool want_remainder) -+{ -+ // Full implementation of Java idiv and irem. The function -+ // returns the (pc) offset of the div instruction - may be needed -+ // for implicit exceptions. -+ // -+ // input : rs1: dividend -+ // rs2: divisor -+ // -+ // result: either -+ // quotient (= rs1 idiv rs2) -+ // remainder (= rs1 irem rs2) ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ Register reg, ++ int constant, ++ bool decrement) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ // %%% this does 64bit counters at best it is wasting space ++ // at worst it is a rare bug when counters overflow + ++ assert_different_registers(t1, t0, mdp_in, reg); + -+ int idivl_offset = offset(); -+ if (!want_remainder) { -+ divw(result, rs1, rs2); -+ } else { -+ remw(result, rs1, rs2); // result = rs1 % rs2; ++ Address addr1(mdp_in, constant); ++ Address addr2(t1, 0); ++ Address &addr = addr1; ++ if (reg != noreg) { ++ la(t1, addr1); ++ add(t1, t1, reg); ++ addr = addr2; + } -+ return idivl_offset; -+} + -+int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, -+ bool want_remainder) -+{ -+ // Full implementation of Java ldiv and lrem. The function -+ // returns the (pc) offset of the div instruction - may be needed -+ // for implicit exceptions. -+ // -+ // input : rs1: dividend -+ // rs2: divisor -+ // -+ // result: either -+ // quotient (= rs1 idiv rs2) -+ // remainder (= rs1 irem rs2) -+ -+ int idivq_offset = offset(); -+ if (!want_remainder) { -+ div(result, rs1, rs2); ++ if (decrement) { ++ ld(t0, addr); ++ addi(t0, t0, -DataLayout::counter_increment); ++ Label L; ++ bltz(t0, L); // skip store if counter underflow ++ sd(t0, addr); ++ bind(L); + } else { -+ rem(result, rs1, rs2); // result = rs1 % rs2; ++ assert(DataLayout::counter_increment == 1, ++ "flow-free idiom only works with 1"); ++ ld(t0, addr); ++ addi(t0, t0, DataLayout::counter_increment); ++ Label L; ++ blez(t0, L); // skip store if counter overflow ++ sd(t0, addr); ++ bind(L); + } -+ return idivq_offset; +} + -+// Look up the method for a megamorpic invkkeinterface call. -+// The target method is determined by . -+// The receiver klass is in recv_klass. -+// On success, the result will be in method_result, and execution falls through. -+// On failure, execution transfers to the given label. -+void MacroAssembler::lookup_interface_method(Register recv_klass, -+ Register intf_klass, -+ RegisterOrConstant itable_index, -+ Register method_result, -+ Register scan_tmp, -+ Label& L_no_such_interface, -+ bool return_method) { -+ assert_different_registers(recv_klass, intf_klass, scan_tmp); -+ assert_different_registers(method_result, intf_klass, scan_tmp); -+ assert(recv_klass != method_result || !return_method, -+ "recv_klass can be destroyed when mehtid isn't needed"); -+ assert(itable_index.is_constant() || itable_index.as_register() == method_result, -+ "caller must be same register for non-constant itable index as for method"); -+ -+ // Compute start of first itableOffsetEntry (which is at the end of the vtable). -+ int vtable_base = in_bytes(Klass::vtable_start_offset()); -+ int itentry_off = itableMethodEntry::method_offset_in_bytes(); -+ int scan_step = itableOffsetEntry::size() * wordSize; -+ int vte_size = vtableEntry::size_in_bytes(); -+ assert(vte_size == wordSize, "else adjust times_vte_scale"); -+ -+ lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); -+ -+ // %%% Could store the aligned, prescaled offset in the klassoop. -+ shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); -+ add(scan_tmp, scan_tmp, vtable_base); -+ -+ if (return_method) { -+ // Adjust recv_klass by scaled itable_index, so we can free itable_index. -+ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); -+ if (itable_index.is_register()) { -+ slli(t0, itable_index.as_register(), 3); -+ } else { -+ mv(t0, itable_index.as_constant() << 3); -+ } -+ add(recv_klass, recv_klass, t0); -+ if (itentry_off) { -+ add(recv_klass, recv_klass, itentry_off); -+ } -+ } -+ -+ Label search, found_method; -+ -+ ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); -+ beq(intf_klass, method_result, found_method); -+ bind(search); -+ // Check that the previous entry is non-null. A null entry means that -+ // the receiver class doens't implement the interface, and wasn't the -+ // same as when the caller was compiled. -+ beqz(method_result, L_no_such_interface, /* is_far */ true); -+ addi(scan_tmp, scan_tmp, scan_step); -+ ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); -+ bne(intf_klass, method_result, search); -+ -+ bind(found_method); -+ -+ // Got a hit. -+ if (return_method) { -+ lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes())); -+ add(method_result, recv_klass, scan_tmp); -+ ld(method_result, Address(method_result)); -+ } ++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, ++ int flag_byte_constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ int flags_offset = in_bytes(DataLayout::flags_offset()); ++ // Set the flag ++ lbu(t1, Address(mdp_in, flags_offset)); ++ ori(t1, t1, flag_byte_constant); ++ sb(t1, Address(mdp_in, flags_offset)); +} + -+// virtual method calling -+void MacroAssembler::lookup_virtual_method(Register recv_klass, -+ RegisterOrConstant vtable_index, -+ Register method_result) { -+ const int base = in_bytes(Klass::vtable_start_offset()); -+ assert(vtableEntry::size() * wordSize == 8, -+ "adjust the scaling in the code below"); -+ int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes(); + -+ if (vtable_index.is_register()) { -+ shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); -+ ld(method_result, Address(method_result, vtable_offset_in_bytes)); ++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, ++ int offset, ++ Register value, ++ Register test_value_out, ++ Label& not_equal_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if (test_value_out == noreg) { ++ ld(t1, Address(mdp_in, offset)); ++ bne(value, t1, not_equal_continue); + } else { -+ vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; -+ Address addr = form_address(recv_klass, /* base */ -+ vtable_offset_in_bytes, /* offset */ -+ 12, /* expect offset bits */ -+ method_result); /* temp reg */ -+ ld(method_result, addr); ++ // Put the test value into a register, so caller can use it: ++ ld(test_value_out, Address(mdp_in, offset)); ++ bne(value, test_value_out, not_equal_continue); + } +} + -+void MacroAssembler::membar(uint32_t order_constraint) { -+ if (!os::is_MP()) { return; } -+ -+ address prev = pc() - NativeMembar::instruction_size; -+ address last = code()->last_insn(); -+ -+ if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) { -+ NativeMembar *bar = NativeMembar_at(prev); -+ // We are merging two memory barrier instructions. On RISCV we -+ // can do this simply by ORing them together. -+ bar->set_kind(bar->get_kind() | order_constraint); -+ BLOCK_COMMENT("merged membar"); -+ } else { -+ code()->set_last_insn(pc()); -+ -+ uint32_t predecessor = 0; -+ uint32_t successor = 0; + -+ membar_mask_to_pred_succ(order_constraint, predecessor, successor); -+ fence(predecessor, successor); -+ } ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ld(t1, Address(mdp_in, offset_of_disp)); ++ add(mdp_in, mdp_in, t1); ++ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); +} + -+void MacroAssembler::check_klass_subtype(Register sub_klass, -+ Register super_klass, -+ Register tmp_reg, -+ Label& L_success) { -+ Label L_failure; -+ check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL); -+ check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL); -+ bind(L_failure); ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ Register reg, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ add(t1, mdp_in, reg); ++ ld(t1, Address(t1, offset_of_disp)); ++ add(mdp_in, mdp_in, t1); ++ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); +} + -+// Write serialization page so VM thread can do a pseudo remote membar. -+// We use the current thread pointer to calculate a thread specific -+// offset to write to within the page. This minimizes bus traffic -+// due to cache line collision. -+void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) { -+ srli(tmp2, thread, os::get_serialize_page_shift_count()); -+ -+ int mask = os::vm_page_size() - sizeof(int); -+ andi(tmp2, tmp2, mask, tmp1); + -+ add(tmp1, tmp2, (intptr_t)os::get_memory_serialize_page()); -+ membar(MacroAssembler::AnyAny); -+ sw(zr, Address(tmp1)); ++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, ++ int constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ addi(mdp_in, mdp_in, (unsigned)constant); ++ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); +} + -+void MacroAssembler::safepoint_poll(Label& slow_path) { -+ if (SafepointMechanism::uses_thread_local_poll()) { -+ ld(t1, Address(xthread, Thread::polling_page_offset())); -+ andi(t0, t1, SafepointMechanism::poll_bit()); -+ bnez(t0, slow_path); -+ } else { -+ int32_t offset = 0; -+ la_patchable(t0, ExternalAddress(SafepointSynchronize::address_of_state()), offset); -+ lwu(t0, Address(t0, offset)); -+ assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code"); -+ bnez(t0, slow_path); -+ } -+} + -+// Just like safepoint_poll, but use an acquiring load for thread- -+// local polling. -+// -+// We need an acquire here to ensure that any subsequent load of the -+// global SafepointSynchronize::_state flag is ordered after this load -+// of the local Thread::_polling page. We don't want this poll to -+// return false (i.e. not safepointing) and a later poll of the global -+// SafepointSynchronize::_state spuriously to return true. -+// -+// This is to avoid a race when we're in a native->Java transition -+// racing the code which wakes up from a safepoint. -+// -+void MacroAssembler::safepoint_poll_acquire(Label& slow_path) { -+ if (SafepointMechanism::uses_thread_local_poll()) { -+ membar(MacroAssembler::AnyAny); -+ ld(t1, Address(xthread, Thread::polling_page_offset())); -+ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -+ andi(t0, t1, SafepointMechanism::poll_bit()); -+ bnez(t0, slow_path); -+ } else { -+ safepoint_poll(slow_path); -+ } -+} ++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); + -+void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, -+ Label &succeed, Label *fail) { -+ // oldv holds comparison value -+ // newv holds value to write in exchange -+ // addr identifies memory word to compare against/update -+ Label retry_load, nope; -+ bind(retry_load); -+ // flush and load exclusive from the memory location -+ // and fail if it is not what we expect -+ lr_d(tmp, addr, Assembler::aqrl); -+ bne(tmp, oldv, nope); -+ // if we store+flush with no intervening write tmp wil be zero -+ sc_d(tmp, newv, addr, Assembler::rl); -+ beqz(tmp, succeed); -+ // retry so we only ever return after a load fails to compare -+ // ensures we don't return a stale value after a failed write. -+ j(retry_load); -+ // if the memory word differs we return it in oldv and signal a fail -+ bind(nope); -+ membar(AnyAny); -+ mv(oldv, tmp); -+ if (fail != NULL) { -+ j(*fail); -+ } ++ // save/restore across call_VM ++ addi(sp, sp, -2 * wordSize); ++ sd(zr, Address(sp, 0)); ++ sd(return_bci, Address(sp, wordSize)); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), ++ return_bci); ++ ld(zr, Address(sp, 0)); ++ ld(return_bci, Address(sp, wordSize)); ++ addi(sp, sp, 2 * wordSize); +} + -+void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, -+ Label &succeed, Label *fail) { -+ assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); -+ cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); -+} ++void InterpreterMacroAssembler::profile_taken_branch(Register mdp, ++ Register bumped_count) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+void MacroAssembler::load_reserved(Register addr, -+ enum operand_size size, -+ Assembler::Aqrl acquire) { -+ switch (size) { -+ case int64: -+ lr_d(t0, addr, acquire); -+ break; -+ case int32: -+ lr_w(t0, addr, acquire); -+ break; -+ case uint32: -+ lr_w(t0, addr, acquire); -+ zero_extend(t0, t0, 32); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+} ++ // If no method data exists, go to profile_continue. ++ // Otherwise, assign to mdp ++ test_method_data_pointer(mdp, profile_continue); + -+void MacroAssembler::store_conditional(Register addr, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl release) { -+ switch (size) { -+ case int64: -+ sc_d(t0, new_val, addr, release); -+ break; -+ case int32: -+ case uint32: -+ sc_w(t0, new_val, addr, release); -+ break; -+ default: -+ ShouldNotReachHere(); ++ // We are taking a branch. Increment the taken count. ++ Address data(mdp, in_bytes(JumpData::taken_offset())); ++ ld(bumped_count, data); ++ assert(DataLayout::counter_increment == 1, ++ "flow-free idiom only works with 1"); ++ addi(bumped_count, bumped_count, DataLayout::counter_increment); ++ Label L; ++ // eg: bumped_count=0x7fff ffff ffff ffff + 1 < 0. so we use <= 0; ++ blez(bumped_count, L); // skip store if counter overflow, ++ sd(bumped_count, data); ++ bind(L); ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); ++ bind(profile_continue); + } +} + ++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Register tmp1, Register tmp2, Register tmp3) { -+ assert(size == int8 || size == int16, "unsupported operand size"); -+ -+ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; -+ -+ andi(shift, addr, 3); -+ slli(shift, shift, 3); ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ andi(aligned_addr, addr, ~3); ++ // We are taking a branch. Increment the not taken count. ++ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); + -+ if (size == int8) { -+ mv(mask, 0xff); -+ } else { -+ mv(mask, -1); -+ zero_extend(mask, mask, 16); ++ // The method data pointer needs to be updated to correspond to ++ // the next bytecode ++ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); ++ bind(profile_continue); + } -+ sll(mask, mask, shift); -+ -+ xori(not_mask, mask, -1); -+ -+ sll(expected, expected, shift); -+ andr(expected, expected, mask); -+ -+ sll(new_val, new_val, shift); -+ andr(new_val, new_val, mask); +} + -+// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. -+// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w, -+// which are forced to work with 4-byte aligned address. -+void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result, bool result_as_bool, -+ Register tmp1, Register tmp2, Register tmp3) { -+ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; -+ assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); -+ cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); -+ -+ Label retry, fail, done; ++void InterpreterMacroAssembler::profile_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+ bind(retry); -+ lr_w(old, aligned_addr, acquire); -+ andr(tmp, old, mask); -+ bne(tmp, expected, fail); ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ andr(tmp, old, not_mask); -+ orr(tmp, tmp, new_val); -+ sc_w(tmp, tmp, aligned_addr, release); -+ bnez(tmp, retry); ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + -+ if (result_as_bool) { -+ mv(result, 1); -+ j(done); ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); ++ bind(profile_continue); ++ } ++} + -+ bind(fail); -+ mv(result, zr); ++void InterpreterMacroAssembler::profile_final_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+ bind(done); -+ } else { -+ andr(tmp, old, mask); ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ bind(fail); -+ srl(result, tmp, shift); -+ } ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + -+ if (size == int8) { -+ sign_extend(result, result, 8); -+ } else if (size == int16) { -+ sign_extend(result, result, 16); ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); + } +} + -+// weak cmpxchg narrow value will kill t0, t1, expected, new_val and tmps. -+// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement -+// the weak CAS stuff. The major difference is that it just failed when store conditional -+// failed. -+void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result, -+ Register tmp1, Register tmp2, Register tmp3) { -+ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; -+ assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); -+ cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); + -+ Label fail, done; ++void InterpreterMacroAssembler::profile_virtual_call(Register receiver, ++ Register mdp, ++ Register reg2, ++ bool receiver_can_be_null) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+ lr_w(old, aligned_addr, acquire); -+ andr(tmp, old, mask); -+ bne(tmp, expected, fail); ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ andr(tmp, old, not_mask); -+ orr(tmp, tmp, new_val); -+ sc_w(tmp, tmp, aligned_addr, release); -+ bnez(tmp, fail); ++ Label skip_receiver_profile; ++ if (receiver_can_be_null) { ++ Label not_null; ++ // We are making a call. Increment the count for null receiver. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ j(skip_receiver_profile); ++ bind(not_null); ++ } + -+ // Success -+ mv(result, 1); -+ j(done); ++ // Record the receiver type. ++ record_klass_in_profile(receiver, mdp, reg2, true); ++ bind(skip_receiver_profile); + -+ // Fail -+ bind(fail); -+ mv(result, zr); ++ // The method data pointer needs to be updated to reflect the new target. + -+ bind(done); ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } +} + -+void MacroAssembler::cmpxchg(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result, bool result_as_bool) { -+ assert(size != int8 && size != int16, "unsupported operand size"); -+ -+ Label retry_load, done, ne_done; -+ bind(retry_load); -+ load_reserved(addr, size, acquire); -+ bne(t0, expected, ne_done); -+ store_conditional(addr, new_val, size, release); -+ bnez(t0, retry_load); ++// This routine creates a state machine for updating the multi-row ++// type profile at a virtual call site (or other type-sensitive bytecode). ++// The machine visits each row (of receiver/count) until the receiver type ++// is found, or until it runs out of rows. At the same time, it remembers ++// the location of the first empty row. (An empty row records null for its ++// receiver, and can be allocated for a newly-observed receiver type.) ++// Because there are two degrees of freedom in the state, a simple linear ++// search will not work; it must be a decision tree. Hence this helper ++// function is recursive, to generate the required tree structured code. ++// It's the interpreter, so we are trading off code space for speed. ++// See below for example code. ++void InterpreterMacroAssembler::record_klass_in_profile_helper( ++ Register receiver, Register mdp, ++ Register reg2, ++ Label& done, bool is_virtual_call) { ++ if (TypeProfileWidth == 0) { ++ if (is_virtual_call) { ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ } + -+ // equal, succeed -+ if (result_as_bool) { -+ mv(result, 1); + } else { -+ mv(result, expected); -+ } -+ j(done); ++ int non_profiled_offset = -1; ++ if (is_virtual_call) { ++ non_profiled_offset = in_bytes(CounterData::count_offset()); ++ } + -+ // not equal, failed -+ bind(ne_done); -+ if (result_as_bool) { -+ mv(result, zr); -+ } else { -+ mv(result, t0); ++ record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth, ++ &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); + } -+ -+ bind(done); +} + -+void MacroAssembler::cmpxchg_weak(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result) { -+ assert(size != int8 && size != int16, "unsupported operand size"); -+ -+ Label fail, done; -+ load_reserved(addr, size, acquire); -+ bne(t0, expected, fail); -+ store_conditional(addr, new_val, size, release); -+ bnez(t0, fail); -+ -+ // Success -+ mv(result, 1); -+ j(done); ++void InterpreterMacroAssembler::record_item_in_profile_helper( ++ Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows, ++ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) { ++ int last_row = total_rows - 1; ++ assert(start_row <= last_row, "must be work left to do"); ++ // Test this row for both the item and for null. ++ // Take any of three different outcomes: ++ // 1. found item => increment count and goto done ++ // 2. found null => keep looking for case 1, maybe allocate this cell ++ // 3. found something else => keep looking for cases 1 and 2 ++ // Case 3 is handled by a recursive call. ++ for (int row = start_row; row <= last_row; row++) { ++ Label next_test; ++ bool test_for_null_also = (row == start_row); + -+ // Fail -+ bind(fail); -+ mv(result, zr); ++ // See if the item is item[n]. ++ int item_offset = in_bytes(item_offset_fn(row)); ++ test_mdp_data_at(mdp, item_offset, item, ++ (test_for_null_also ? reg2 : noreg), ++ next_test); ++ // (Reg2 now contains the item from the CallData.) + -+ bind(done); -+} ++ // The item is item[n]. Increment count[n]. ++ int count_offset = in_bytes(item_count_offset_fn(row)); ++ increment_mdp_data_at(mdp, count_offset); ++ j(done); ++ bind(next_test); + -+#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ -+void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ -+ prev = prev->is_valid() ? prev : zr; \ -+ if (incr.is_register()) { \ -+ AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ -+ } else { \ -+ mv(t0, incr.as_constant()); \ -+ AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ -+ } \ -+ return; \ -+} ++ if (test_for_null_also) { ++ Label found_null; ++ // Failed the equality check on item[n]... Test for null. ++ if (start_row == last_row) { ++ // The only thing left to do is handle the null case. ++ if (non_profiled_offset >= 0) { ++ beqz(reg2, found_null); ++ // Item did not match any saved item and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ increment_mdp_data_at(mdp, non_profiled_offset); ++ j(done); ++ bind(found_null); ++ } else { ++ bnez(reg2, done); ++ } ++ break; ++ } ++ // Since null is rare, make it be the branch-taken case. ++ beqz(reg2, found_null); + -+ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) -+ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) -+ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) -+ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) ++ // Put all the "Case 3" tests here. ++ record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows, ++ item_offset_fn, item_count_offset_fn, non_profiled_offset); + -+#undef ATOMIC_OP ++ // Found a null. Keep searching for a matching item, ++ // but remember that this is an empty (unused) slot. ++ bind(found_null); ++ } ++ } + -+#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ -+void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ -+ prev = prev->is_valid() ? prev : zr; \ -+ AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ -+ return; \ ++ // In the fall-through case, we found no matching item, but we ++ // observed the item[start_row] is NULL. ++ // Fill in the item field and increment the count. ++ int item_offset = in_bytes(item_offset_fn(start_row)); ++ set_mdp_data_at(mdp, item_offset, item); ++ int count_offset = in_bytes(item_count_offset_fn(start_row)); ++ mv(reg2, DataLayout::counter_increment); ++ set_mdp_data_at(mdp, count_offset, reg2); ++ if (start_row > 0) { ++ j(done); ++ } +} + -+ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) -+ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) -+ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) -+ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) ++// Example state machine code for three profile rows: ++// # main copy of decision tree, rooted at row[1] ++// if (row[0].rec == rec) then [ ++// row[0].incr() ++// goto done ++// ] ++// if (row[0].rec != NULL) then [ ++// # inner copy of decision tree, rooted at row[1] ++// if (row[1].rec == rec) then [ ++// row[1].incr() ++// goto done ++// ] ++// if (row[1].rec != NULL) then [ ++// # degenerate decision tree, rooted at row[2] ++// if (row[2].rec == rec) then [ ++// row[2].incr() ++// goto done ++// ] ++// if (row[2].rec != NULL) then [ ++// count.incr() ++// goto done ++// ] # overflow ++// row[2].init(rec) ++// goto done ++// ] else [ ++// # remember row[1] is empty ++// if (row[2].rec == rec) then [ ++// row[2].incr() ++// goto done ++// ] ++// row[1].init(rec) ++// goto done ++// ] ++// else [ ++// # remember row[0] is empty ++// if (row[1].rec == rec) then [ ++// row[1].incr() ++// goto done ++// ] ++// if (row[2].rec == rec) then [ ++// row[2].incr() ++// goto done ++// ] ++// row[0].init(rec) ++// goto done ++// ] ++// done: + -+#undef ATOMIC_XCHG ++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, ++ Register mdp, Register reg2, ++ bool is_virtual_call) { ++ assert(ProfileInterpreter, "must be profiling"); ++ Label done; + -+#define ATOMIC_XCHGU(OP1, OP2) \ -+void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ -+ atomic_##OP2(prev, newv, addr); \ -+ zero_extend(prev, prev, 32); \ -+ return; \ -+} ++ record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call); + -+ATOMIC_XCHGU(xchgwu, xchgw) -+ATOMIC_XCHGU(xchgalwu, xchgalw) ++ bind(done); ++} + -+#undef ATOMIC_XCHGU ++void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) { -+ assert(UseBiasedLocking, "why call this otherwise?"); ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ // Check for biased locking unlock case, which is a no-op -+ // Note: we do not have to check the thread ID for two reasons. -+ // First, the interpreter checks for IllegalMonitorStateException at -+ // a higher level. Second, if the bias was revoked while we held the -+ // lock, the object could not be rebiased toward another thread, so -+ // the bias bit would be clear. -+ ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); -+ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); // 1 << 3 -+ sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern); -+ if (flag->is_valid()) { mv(flag, tmp_reg); } -+ beqz(tmp_reg, done); -+} ++ // Update the total ret count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + -+void MacroAssembler::load_prototype_header(Register dst, Register src) { -+ load_klass(dst, src); -+ ld(dst, Address(dst, Klass::prototype_header_offset())); -+} ++ for (uint row = 0; row < RetData::row_limit(); row++) { ++ Label next_test; + -+int MacroAssembler::biased_locking_enter(Register lock_reg, -+ Register obj_reg, -+ Register swap_reg, -+ Register tmp_reg, -+ bool swap_reg_contains_mark, -+ Label& done, -+ Label* slow_case, -+ BiasedLockingCounters* counters, -+ Register flag) { -+ assert(UseBiasedLocking, "why call this otherwise?"); -+ assert_different_registers(lock_reg, obj_reg, swap_reg); ++ // See if return_bci is equal to bci[n]: ++ test_mdp_data_at(mdp, ++ in_bytes(RetData::bci_offset(row)), ++ return_bci, noreg, ++ next_test); + -+ if (PrintBiasedLockingStatistics && counters == NULL) { -+ counters = BiasedLocking::counters(); -+ } ++ // return_bci is equal to bci[n]. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); + -+ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0, flag); -+ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); -+ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, ++ in_bytes(RetData::bci_displacement_offset(row))); ++ j(profile_continue); ++ bind(next_test); ++ } + -+ // Biased locking -+ // See whether the lock is currently biased toward our thread and -+ // whether the epoch is still valid -+ // Note that the runtime guarantees sufficient alignment of JavaThread -+ // pointers to allow age to be placed into low bits -+ // First check to see whether biasing is even enabled for this object -+ Label cas_label; -+ int null_check_offset = -1; -+ if (!swap_reg_contains_mark) { -+ null_check_offset = offset(); -+ ld(swap_reg, mark_addr); -+ } -+ andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place); -+ xori(t0, tmp_reg, markOopDesc::biased_lock_pattern); -+ bnez(t0, cas_label); // don't care flag unless jumping to done -+ // The bias pattern is present in the object's header. Need to check -+ // whether the bias owner and the epoch are both still current. -+ load_prototype_header(tmp_reg, obj_reg); -+ orr(tmp_reg, tmp_reg, xthread); -+ xorr(tmp_reg, swap_reg, tmp_reg); -+ andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place)); -+ if (flag->is_valid()) { -+ mv(flag, tmp_reg); -+ } ++ update_mdp_for_ret(return_bci); + -+ if (counters != NULL) { -+ Label around; -+ bnez(tmp_reg, around); -+ atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0); -+ j(done); -+ bind(around); -+ } else { -+ beqz(tmp_reg, done); ++ bind(profile_continue); + } ++} + -+ Label try_revoke_bias; -+ Label try_rebias; -+ -+ // At this point we know that the header has the bias pattern and -+ // that we are not the bias owner in the current epoch. We need to -+ // figure out more details about the state of the header in order to -+ // know what operations can be legally performed on the object's -+ // header. -+ -+ // If the low three bits in the xor result aren't clear, that means -+ // the prototype header is no longer biased and we have to revoke -+ // the bias on this object. -+ andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place); -+ bnez(t0, try_revoke_bias); ++void InterpreterMacroAssembler::profile_null_seen(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+ // Biasing is still enabled for this data type. See whether the -+ // epoch of the current bias is still valid, meaning that the epoch -+ // bits of the mark word are equal to the epoch bits of the -+ // prototype header. (Note that the prototype header's epoch bits -+ // only change at a safepoint.) If not, attempt to rebias the object -+ // toward the current thread. Note that we must be absolutely sure -+ // that the current epoch is invalid in order to do this because -+ // otherwise the manipulations it performs on the mark word are -+ // illegal. -+ andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place); -+ bnez(t0, try_rebias); ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ // The epoch of the current bias is still valid but we know nothing -+ // about the owner; it might be set or it might be clear. Try to -+ // acquire the bias of the object using an atomic operation. If this -+ // fails we will go in to the runtime to revoke the object's bias. -+ // Note that we first construct the presumed unbiased header so we -+ // don't accidentally blow away another thread's valid bias. -+ { -+ Label cas_success; -+ Label counter; -+ mv(t0, (int64_t)(markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); -+ andr(swap_reg, swap_reg, t0); -+ orr(tmp_reg, swap_reg, xthread); -+ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); -+ // cas failed here if slow_cass == NULL -+ if (flag->is_valid()) { -+ mv(flag, 1); -+ j(counter); -+ } ++ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + -+ // If the biasing toward our thread failed, this means that -+ // another thread succeeded in biasing it toward itself and we -+ // need to revoke that bias. The revocation will occur in the -+ // interpreter runtime in the slow case. -+ bind(cas_success); -+ if (flag->is_valid()) { -+ mv(flag, 0); -+ bind(counter); ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + } ++ update_mdp_by_constant(mdp, mdp_delta); + -+ if (counters != NULL) { -+ atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), -+ tmp_reg, t0); -+ } ++ bind(profile_continue); + } -+ j(done); ++} + -+ bind(try_rebias); -+ // At this point we know the epoch has expired, meaning that the -+ // current "bias owner", if any, is actually invalid. Under these -+ // circumstances _only_, we are allowed to use the current header's -+ // value as the comparison value when doing the cas to acquire the -+ // bias in the current epoch. In other words, we allow transfer of -+ // the bias from one thread to another directly in this situation. -+ // -+ // FIXME: due to a lack of registers we currently blow away the age -+ // bits in this situation. Should attempt to preserve them. -+ { -+ Label cas_success; -+ Label counter; -+ load_prototype_header(tmp_reg, obj_reg); -+ orr(tmp_reg, xthread, tmp_reg); -+ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); -+ // cas failed here if slow_cass == NULL -+ if (flag->is_valid()) { -+ mv(flag, 1); -+ j(counter); -+ } ++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { ++ if (ProfileInterpreter && TypeProfileCasts) { ++ Label profile_continue; + -+ // If the biasing toward our thread failed, then another thread -+ // succeeded in biasing it toward itself and we need to revoke that -+ // bias. The revocation will occur in the runtime in the slow case. -+ bind(cas_success); -+ if (flag->is_valid()) { -+ mv(flag, 0); -+ bind(counter); -+ } ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ if (counters != NULL) { -+ atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), -+ tmp_reg, t0); -+ } -+ } -+ j(done); ++ int count_offset = in_bytes(CounterData::count_offset()); ++ // Back up the address, since we have already bumped the mdp. ++ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); + -+ // don't care flag unless jumping to done -+ bind(try_revoke_bias); -+ // The prototype mark in the klass doesn't have the bias bit set any -+ // more, indicating that objects of this data type are not supposed -+ // to be biased any more. We are going to try to reset the mark of -+ // this object to the prototype value and fall through to the -+ // CAS-based locking scheme. Note that if our CAS fails, it means -+ // that another thread raced us for the privilege of revoking the -+ // bias of this particular object, so it's okay to continue in the -+ // normal locking code. -+ // -+ // FIXME: due to a lack of registers we currently blow away the age -+ // bits in this situation. Should attempt to preserve them. -+ { -+ Label cas_success, nope; -+ load_prototype_header(tmp_reg, obj_reg); -+ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope); -+ bind(cas_success); ++ // *Decrement* the counter. We expect to see zero or small negatives. ++ increment_mdp_data_at(mdp, count_offset, true); + -+ // Fall through to the normal CAS-based lock, because no matter what -+ // the result of the above CAS, some thread must have succeeded in -+ // removing the bias bit from the object's header. -+ if (counters != NULL) { -+ atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, -+ t0); -+ } -+ bind(nope); ++ bind (profile_continue); + } ++} + -+ bind(cas_label); ++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+ return null_check_offset; -+} ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { -+ Label retry_load; -+ bind(retry_load); -+ // flush and load exclusive from the memory location -+ lr_w(tmp, counter_addr); -+ addw(tmp, tmp, 1); -+ // if we store+flush with no intervening write tmp wil be zero -+ sc_w(tmp, tmp, counter_addr); -+ bnez(tmp, retry_load); -+} ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + -+void MacroAssembler::far_jump(Address entry, Register tmp) { -+ assert(ReservedCodeCacheSize < 4*G, "branch out of range"); -+ assert(CodeCache::find_blob(entry.target()) != NULL, -+ "destination of far call not found in code cache"); -+ int32_t offset = 0; -+ if (far_branches()) { -+ // We can use auipc + jalr here because we know that the total size of -+ // the code cache cannot exceed 2Gb. -+ la_patchable(tmp, entry, offset); -+ jalr(x0, tmp, offset); -+ } else { -+ j(entry); ++ // Record the object type. ++ record_klass_in_profile(klass, mdp, reg2, false); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); + } +} + -+void MacroAssembler::far_call(Address entry, Register tmp) { -+ assert(ReservedCodeCacheSize < 4*G, "branch out of range"); -+ assert(CodeCache::find_blob(entry.target()) != NULL, -+ "destination of far call not found in code cache"); -+ int32_t offset = 0; -+ if (far_branches()) { -+ // We can use auipc + jalr here because we know that the total size of -+ // the code cache cannot exceed 2Gb. -+ la_patchable(tmp, entry, offset); -+ jalr(x1, tmp, offset); // link -+ } else { -+ jal(entry); // link ++void InterpreterMacroAssembler::profile_switch_default(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the default case count ++ increment_mdp_data_at(mdp, ++ in_bytes(MultiBranchData::default_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ in_bytes(MultiBranchData:: ++ default_displacement_offset())); ++ ++ bind(profile_continue); + } +} + -+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, -+ Register super_klass, -+ Register tmp_reg, -+ Label* L_success, -+ Label* L_failure, -+ Label* L_slow_path, -+ Register super_check_offset) { -+ assert_different_registers(sub_klass, super_klass, tmp_reg); -+ bool must_load_sco = (super_check_offset == noreg); -+ if (must_load_sco) { -+ assert(tmp_reg != noreg, "supply either a tmp or a register offset"); -+ } else { -+ assert_different_registers(sub_klass, super_klass, super_check_offset); -+ } ++void InterpreterMacroAssembler::profile_switch_case(Register index, ++ Register mdp, ++ Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+ Label L_fallthrough; -+ int label_nulls = 0; -+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } -+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } -+ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } -+ assert(label_nulls <= 1, "at most one NULL in batch"); ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); -+ int sco_offset = in_bytes(Klass::super_check_offset_offset()); -+ Address super_check_offset_addr(super_klass, sco_offset); ++ // Build the base (index * per_case_size_in_bytes()) + ++ // case_array_offset_in_bytes() ++ mvw(reg2, in_bytes(MultiBranchData::per_case_size())); ++ mvw(t0, in_bytes(MultiBranchData::case_array_offset())); ++ Assembler::mul(index, index, reg2); ++ Assembler::add(index, index, t0); + -+ // Hacked jmp, which may only be used just before L_fallthrough. -+#define final_jmp(label) \ -+ if (&(label) == &L_fallthrough) { /*do nothing*/ } \ -+ else j(label) /*omit semi*/ ++ // Update the case count ++ increment_mdp_data_at(mdp, ++ index, ++ in_bytes(MultiBranchData::relative_count_offset())); + -+ // If the pointers are equal, we are done (e.g., String[] elements). -+ // This self-check enables sharing of secondary supertype arrays among -+ // non-primary types such as array-of-interface. Otherwise, each such -+ // type would need its own customized SSA. -+ // We move this check to the front fo the fast path because many -+ // type checks are in fact trivially successful in this manner, -+ // so we get a nicely predicted branch right at the start of the check. -+ beq(sub_klass, super_klass, *L_success); ++ // The method data pointer need to be updated. ++ update_mdp_by_offset(mdp, ++ index, ++ in_bytes(MultiBranchData:: ++ relative_displacement_offset())); + -+ // Check the supertype display: -+ if (must_load_sco) { -+ lwu(tmp_reg, super_check_offset_addr); -+ super_check_offset = tmp_reg; ++ bind(profile_continue); + } -+ add(t0, sub_klass, super_check_offset); -+ Address super_check_addr(t0); -+ ld(t0, super_check_addr); // load displayed supertype ++} + -+ // Ths check has worked decisively for primary supers. -+ // Secondary supers are sought in the super_cache ('super_cache_addr'). -+ // (Secondary supers are interfaces and very deeply nested subtypes.) -+ // This works in the same check above because of a tricky aliasing -+ // between the super_Cache and the primary super dispaly elements. -+ // (The 'super_check_addr' can address either, as the case requires.) -+ // Note that the cache is updated below if it does not help us find -+ // what we need immediately. -+ // So if it was a primary super, we can just fail immediately. -+ // Otherwise, it's the slow path for us (no success at this point). ++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; } + -+ beq(super_klass, t0, *L_success); -+ mv(t1, sc_offset); -+ if (L_failure == &L_fallthrough) { -+ beq(super_check_offset, t1, *L_slow_path); -+ } else { -+ bne(super_check_offset, t1, *L_failure, /* is_far */ true); -+ final_jmp(*L_slow_path); ++void InterpreterMacroAssembler::notify_method_entry() { ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label L; ++ lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset())); ++ beqz(x13, L); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_method_entry)); ++ bind(L); + } + -+ bind(L_fallthrough); -+ -+#undef final_jmp -+} -+ -+// Scans count pointer sized words at [addr] for occurence of value, -+// generic -+void MacroAssembler::repne_scan(Register addr, Register value, Register count, -+ Register tmp) { -+ Label Lloop, Lexit; -+ beqz(count, Lexit); -+ bind(Lloop); -+ ld(tmp, addr); -+ beq(value, tmp, Lexit); -+ add(addr, addr, wordSize); -+ sub(count, count, 1); -+ bnez(count, Lloop); -+ bind(Lexit); -+} -+ -+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, -+ Register super_klass, -+ Register tmp_reg, -+ Register tmp2_reg, -+ Label* L_success, -+ Label* L_failure) { -+ assert_different_registers(sub_klass, super_klass, tmp_reg); -+ if (tmp2_reg != noreg) { -+ assert_different_registers(sub_klass, super_klass, tmp_reg, tmp2_reg, t0); ++ { ++ SkipIfEqual skip(this, &DTraceMethodProbes, false); ++ get_method(c_rarg1); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ xthread, c_rarg1); + } -+#define IS_A_TEMP(reg) ((reg) == tmp_reg || (reg) == tmp2_reg) + -+ Label L_fallthrough; -+ int label_nulls = 0; -+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } -+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ // RedefineClasses() tracing support for obsolete method entry ++ if (log_is_enabled(Trace, redefine, class, obsolete)) { ++ get_method(c_rarg1); ++ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), ++ xthread, c_rarg1); ++ } ++} + -+ assert(label_nulls <= 1, "at most one NULL in the batch"); + -+ // A couple of useful fields in sub_klass: -+ int ss_offset = in_bytes(Klass::secondary_supers_offset()); -+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); -+ Address secondary_supers_addr(sub_klass, ss_offset); -+ Address super_cache_addr( sub_klass, sc_offset); ++void InterpreterMacroAssembler::notify_method_exit( ++ TosState state, NotifyMethodExitMode mode) { ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { ++ Label L; ++ // Note: frame::interpreter_frame_result has a dependency on how the ++ // method result is saved across the call to post_method_exit. If this ++ // is changed then the interpreter_frame_result implementation will ++ // need to be updated too. + -+ BLOCK_COMMENT("check_klass_subtype_slow_path"); ++ // template interpreter will leave the result on the top of the stack. ++ push(state); ++ lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset())); ++ beqz(x13, L); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); ++ bind(L); ++ pop(state); ++ } + -+ // Do a linear scan of the secondary super-klass chain. -+ // This code is rarely used, so simplicity is a virtue here. -+ // The repne_scan instruction uses fixed registers, which we must spill. -+ // Don't worry too much about pre-existing connecitons with the input regs. ++ { ++ SkipIfEqual skip(this, &DTraceMethodProbes, false); ++ push(state); ++ get_method(c_rarg1); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ xthread, c_rarg1); ++ pop(state); ++ } ++} + -+ assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) -+ assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) + -+ RegSet pushed_registers; -+ if (!IS_A_TEMP(x12)) { -+ pushed_registers += x12; ++// Jump if ((*counter_addr += increment) & mask) satisfies the condition. ++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, ++ int increment, Address mask, ++ Register tmp1, Register tmp2, ++ bool preloaded, Label* where) { ++ Label done; ++ if (!preloaded) { ++ lwu(tmp1, counter_addr); + } -+ if (!IS_A_TEMP(x15)) { -+ pushed_registers += x15; ++ add(tmp1, tmp1, increment); ++ sw(tmp1, counter_addr); ++ lwu(tmp2, mask); ++ andr(tmp1, tmp1, tmp2); ++ bnez(tmp1, done); ++ j(*where); // offset is too large so we have to use j instead of beqz here ++ bind(done); ++} ++ ++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, ++ int number_of_arguments) { ++ // interpreter specific ++ // ++ // Note: No need to save/restore rbcp & rlocals pointer since these ++ // are callee saved registers and no blocking/ GC can happen ++ // in leaf calls. ++#ifdef ASSERT ++ { ++ Label L; ++ ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ beqz(t0, L); ++ stop("InterpreterMacroAssembler::call_VM_leaf_base:" ++ " last_sp != NULL"); ++ bind(L); + } ++#endif /* ASSERT */ ++ // super call ++ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); ++} + -+ if (super_klass != x10 || UseCompressedOops) { -+ if (!IS_A_TEMP(x10)) { -+ pushed_registers += x10; -+ } ++void InterpreterMacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // interpreter specific ++ // ++ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't ++ // really make a difference for these runtime calls, since they are ++ // slow anyway. Btw., bcp must be saved/restored since it may change ++ // due to GC. ++ save_bcp(); ++#ifdef ASSERT ++ { ++ Label L; ++ ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ beqz(t0, L); ++ stop("InterpreterMacroAssembler::call_VM_base:" ++ " last_sp != NULL"); ++ bind(L); + } ++#endif /* ASSERT */ ++ // super call ++ MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp, ++ entry_point, number_of_arguments, ++ check_exceptions); ++// interpreter specific ++ restore_bcp(); ++ restore_locals(); ++} + -+ push_reg(pushed_registers, sp); ++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) { ++ assert_different_registers(obj, tmp, t0, mdo_addr.base()); ++ Label update, next, none; + -+ // Get super_klass value into x10 (even if it was in x15 or x12) -+ mv(x10, super_klass); ++ verify_oop(obj); + -+#ifndef PRODUCT -+ mv(t1, (address)&SharedRuntime::_partial_subtype_ctr); -+ Address pst_counter_addr(t1); -+ ld(t0, pst_counter_addr); -+ add(t0, t0, 1); -+ sd(t0, pst_counter_addr); -+#endif // PRODUCT ++ bnez(obj, update); ++ orptr(mdo_addr, TypeEntries::null_seen, t0, tmp); ++ j(next); + -+ // We will consult the secondary-super array. -+ ld(x15, secondary_supers_addr); -+ // Load the array length. -+ lwu(x12, Address(x15, Array::length_offset_in_bytes())); -+ // Skip to start of data. -+ add(x15, x15, Array::base_offset_in_bytes()); ++ bind(update); ++ load_klass(obj, obj); + -+ // Set t0 to an obvious invalid value, falling through by default -+ mv(t0, -1); -+ // Scan X12 words at [X15] for an occurrence of X10. -+ repne_scan(x15, x10, x12, t0); ++ ld(t0, mdo_addr); ++ xorr(obj, obj, t0); ++ andi(t0, obj, TypeEntries::type_klass_mask); ++ beqz(t0, next); // klass seen before, nothing to ++ // do. The unknown bit may have been ++ // set already but no need to check. + -+ // pop will restore x10, so we should use a temp register to keep its value -+ mv(t1, x10); ++ andi(t0, obj, TypeEntries::type_unknown); ++ bnez(t0, next); ++ // already unknown. Nothing to do anymore. + -+ // Unspill the temp. registers: -+ pop_reg(pushed_registers, sp); ++ ld(t0, mdo_addr); ++ beqz(t0, none); ++ li(tmp, (u1)TypeEntries::null_seen); ++ beq(t0, tmp, none); ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ ld(t0, mdo_addr); ++ xorr(obj, obj, t0); ++ andi(t0, obj, TypeEntries::type_klass_mask); ++ beqz(t0, next); + -+ bne(t1, t0, *L_failure); ++ // different than before. Cannot keep accurate profile. ++ orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp); ++ j(next); + -+ // Success. Cache the super we found an proceed in triumph. -+ sd(super_klass, super_cache_addr); ++ bind(none); ++ // first time here. Set profile type. ++ sd(obj, mdo_addr); + -+ if (L_success != &L_fallthrough) { -+ j(*L_success); ++ bind(next); ++} ++ ++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { ++ if (!ProfileInterpreter) { ++ return; + } + -+#undef IS_A_TEMP ++ if (MethodData::profile_arguments() || MethodData::profile_return()) { ++ Label profile_continue; + -+ bind(L_fallthrough); -+} ++ test_method_data_pointer(mdp, profile_continue); + -+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. -+void MacroAssembler::tlab_allocate(Register obj, -+ Register var_size_in_bytes, -+ int con_size_in_bytes, -+ Register tmp1, -+ Register tmp2, -+ Label& slow_case, -+ bool is_far) { -+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); -+} ++ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + -+// Defines obj, preserves var_size_in_bytes -+void MacroAssembler::eden_allocate(Register obj, -+ Register var_size_in_bytes, -+ int con_size_in_bytes, -+ Register tmp1, -+ Label& slow_case, -+ bool is_far) { -+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, slow_case, is_far); -+} ++ lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start)); ++ if (is_virtual) { ++ li(tmp, (u1)DataLayout::virtual_call_type_data_tag); ++ bne(t0, tmp, profile_continue); ++ } else { ++ li(tmp, (u1)DataLayout::call_type_data_tag); ++ bne(t0, tmp, profile_continue); ++ } + ++ // calculate slot step ++ static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0)); ++ static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0; + -+// get_thread() can be called anywhere inside generated code so we -+// need to save whatever non-callee save context might get clobbered -+// by the call to Thread::current() or, indeed, the call setup code -+void MacroAssembler::get_thread(Register thread) { -+ // save all call-clobbered regs except thread -+ RegSet saved_regs = RegSet::of(x10) + ra - thread; -+ push_reg(saved_regs, sp); ++ // calculate type step ++ static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0)); ++ static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0; + -+ mv(ra, CAST_FROM_FN_PTR(address, Thread::current)); -+ jalr(ra); -+ if (thread != c_rarg0) { -+ mv(thread, c_rarg0); -+ } ++ if (MethodData::profile_arguments()) { ++ Label done, loop, loopEnd, profileArgument, profileReturnType; ++ RegSet pushed_registers; ++ pushed_registers += x15; ++ pushed_registers += x16; ++ pushed_registers += x17; ++ Register mdo_addr = x15; ++ Register index = x16; ++ Register off_to_args = x17; ++ push_reg(pushed_registers, sp); + -+ // restore pushed registers -+ pop_reg(saved_regs, sp); -+} ++ mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset())); ++ mv(t0, TypeProfileArgsLimit); ++ beqz(t0, loopEnd); + -+void MacroAssembler::load_byte_map_base(Register reg) { -+ jbyte *byte_map_base = ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); -+ mv(reg, (uint64_t)byte_map_base); -+} ++ mv(index, zr); // index < TypeProfileArgsLimit ++ bind(loop); ++ bgtz(index, profileReturnType); ++ li(t0, (int)MethodData::profile_return()); ++ beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false ++ bind(profileReturnType); ++ // If return value type is profiled we may have no argument to profile ++ ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); ++ mv(t1, - TypeStackSlotEntries::per_arg_count()); ++ mul(t1, index, t1); ++ add(tmp, tmp, t1); ++ li(t1, TypeStackSlotEntries::per_arg_count()); ++ add(t0, mdp, off_to_args); ++ blt(tmp, t1, done); + -+void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) { -+ relocInfo::relocType rtype = dest.rspec().reloc()->type(); -+ unsigned long low_address = (uintptr_t)CodeCache::low_bound(); -+ unsigned long high_address = (uintptr_t)CodeCache::high_bound(); -+ unsigned long dest_address = (uintptr_t)dest.target(); -+ long offset_low = dest_address - low_address; -+ long offset_high = dest_address - high_address; ++ bind(profileArgument); + -+ assert(is_valid_riscv64_address(dest.target()), "bad address"); -+ assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address"); ++ ld(tmp, Address(callee, Method::const_offset())); ++ load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset())); ++ // stack offset o (zero based) from the start of the argument ++ // list, for n arguments translates into offset n - o - 1 from ++ // the end of the argument list ++ li(t0, stack_slot_offset0); ++ li(t1, slot_step); ++ mul(t1, index, t1); ++ add(t0, t0, t1); ++ add(t0, mdp, t0); ++ ld(t0, Address(t0)); ++ sub(tmp, tmp, t0); ++ addi(tmp, tmp, -1); ++ Address arg_addr = argument_address(tmp); ++ ld(tmp, arg_addr); + -+ code_section()->relocate(pc(), dest.rspec()); -+ // RISC-V doesn't compute a page-aligned address, in order to partially -+ // compensate for the use of *signed* offsets in its base+disp12 -+ // addressing mode (RISC-V's PC-relative reach remains asymmetric -+ // [-(2G + 2K), 2G - 2K)). -+ if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) { -+ int64_t distance = dest.target() - pc(); -+ auipc(reg1, (int32_t)distance + 0x800); -+ offset = ((int32_t)distance << 20) >> 20; -+ } else { -+ movptr_with_offset(reg1, dest.target(), offset); -+ } -+} ++ li(t0, argument_type_offset0); ++ li(t1, type_step); ++ mul(t1, index, t1); ++ add(t0, t0, t1); ++ add(mdo_addr, mdp, t0); ++ Address mdo_arg_addr(mdo_addr, 0); ++ profile_obj_type(tmp, mdo_arg_addr, t1); + -+void MacroAssembler::build_frame(int framesize) { -+ assert(framesize > 0, "framesize must be > 0"); -+ sub(sp, sp, framesize); -+ sd(fp, Address(sp, framesize - 2 * wordSize)); -+ sd(ra, Address(sp, framesize - wordSize)); -+ if (PreserveFramePointer) { add(fp, sp, framesize); } -+} ++ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); ++ addi(off_to_args, off_to_args, to_add); + -+void MacroAssembler::remove_frame(int framesize) { -+ assert(framesize > 0, "framesize must be > 0"); -+ ld(fp, Address(sp, framesize - 2 * wordSize)); -+ ld(ra, Address(sp, framesize - wordSize)); -+ add(sp, sp, framesize); -+} ++ // increment index by 1 ++ addi(index, index, 1); ++ li(t1, TypeProfileArgsLimit); ++ blt(index, t1, loop); ++ bind(loopEnd); + -+void MacroAssembler::reserved_stack_check() { -+ // testing if reserved zone needs to be enabled -+ Label no_reserved_zone_enabling; ++ if (MethodData::profile_return()) { ++ ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); ++ addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count()); ++ } + -+ ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); -+ bltu(sp, t0, no_reserved_zone_enabling); ++ add(t0, mdp, off_to_args); ++ bind(done); ++ mv(mdp, t0); + -+ enter(); // RA and FP are live. -+ mv(c_rarg0, xthread); -+ int32_t offset = 0; -+ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset); -+ jalr(x1, t0, offset); -+ leave(); ++ // unspill the clobbered registers ++ pop_reg(pushed_registers, sp); + -+ // We have already removed our own frame. -+ // throw_delayed_StackOverflowError will think that it's been -+ // called by our caller. -+ offset = 0; -+ la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset); -+ jalr(x0, t0, offset); -+ should_not_reach_here(); ++ if (MethodData::profile_return()) { ++ // We're right after the type profile for the last ++ // argument. tmp is the number of cells left in the ++ // CallTypeData/VirtualCallTypeData to reach its end. Non null ++ // if there's a return to profile. ++ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); ++ shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size)); ++ } ++ sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++ } else { ++ assert(MethodData::profile_return(), "either profile call args or call ret"); ++ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); ++ } + -+ bind(no_reserved_zone_enabling); -+} ++ // mdp points right after the end of the ++ // CallTypeData/VirtualCallTypeData, right after the cells for the ++ // return value type if there's one + -+// Move the address of the polling page into dest. -+void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) { -+ if (SafepointMechanism::uses_thread_local_poll()) { -+ ld(dest, Address(xthread, Thread::polling_page_offset())); -+ } else { -+ uint64_t align = (uint64_t)page & 0xfff; -+ assert(align == 0, "polling page must be page aligned"); -+ la_patchable(dest, Address(page, rtype), offset); ++ bind(profile_continue); + } +} + -+// Move the address of the polling page into dest. -+void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) { -+ int32_t offset = 0; -+ get_polling_page(dest, page, offset, rtype); -+ read_polling_page(dest, offset, rtype); -+} ++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { ++ assert_different_registers(mdp, ret, tmp, xbcp, t0, t1); ++ if (ProfileInterpreter && MethodData::profile_return()) { ++ Label profile_continue, done; + -+// Read the polling page. The address of the polling page must -+// already be in r. -+void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { -+ code_section()->relocate(pc(), rtype); -+ lwu(zr, Address(r, offset)); -+} ++ test_method_data_pointer(mdp, profile_continue); + -+void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { -+#ifdef ASSERT -+ { -+ ThreadInVMfromUnknown tiv; -+ assert (UseCompressedOops, "should only be used for compressed oops"); -+ assert (Universe::heap() != NULL, "java heap should be initialized"); -+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); -+ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); ++ if (MethodData::profile_return_jsr292_only()) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ // If we don't profile all invoke bytecodes we must make sure ++ // it's a bytecode we indeed profile. We can't go back to the ++ // begining of the ProfileData we intend to update to check its ++ // type because we're right after it and we don't known its ++ // length ++ Label do_profile; ++ lbu(t0, Address(xbcp, 0)); ++ li(tmp, (u1)Bytecodes::_invokedynamic); ++ beq(t0, tmp, do_profile); ++ li(tmp, (u1)Bytecodes::_invokehandle); ++ beq(t0, tmp, do_profile); ++ get_method(tmp); ++ lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes())); ++ li(t1, static_cast(vmIntrinsics::_compiledLambdaForm)); ++ bne(t0, t1, profile_continue); ++ bind(do_profile); ++ } ++ ++ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); ++ mv(tmp, ret); ++ profile_obj_type(tmp, mdo_ret_addr, t1); ++ ++ bind(profile_continue); + } -+#endif -+ int oop_index = oop_recorder()->find_index(obj); -+ RelocationHolder rspec = oop_Relocation::spec(oop_index); -+ code_section()->relocate(pc(), rspec); -+ li32(dst, 0xDEADBEEF); -+ zero_extend(dst, dst, 32); +} + -+void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { -+ assert (UseCompressedClassPointers, "should only be used for compressed headers"); -+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); -+ int index = oop_recorder()->find_index(k); -+ assert(!Universe::heap()->is_in_reserved(k), "should not be an oop"); ++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) { ++ assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3); ++ if (ProfileInterpreter && MethodData::profile_parameters()) { ++ Label profile_continue, done; + -+ RelocationHolder rspec = metadata_Relocation::spec(index); -+ code_section()->relocate(pc(), rspec); -+ narrowKlass nk = Klass::encode_klass(k); -+ li32(dst, nk); -+ zero_extend(dst, dst, 32); -+} ++ test_method_data_pointer(mdp, profile_continue); + -+// Maybe emit a call via a trampoline. If the code cache is small -+// trampolines won't be emitted. -+address MacroAssembler::trampoline_call(Address entry) { -+ assert(JavaThread::current()->is_Compiler_thread(), "just checking"); -+ assert(entry.rspec().type() == relocInfo::runtime_call_type || -+ entry.rspec().type() == relocInfo::opt_virtual_call_type || -+ entry.rspec().type() == relocInfo::static_call_type || -+ entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); ++ // Load the offset of the area within the MDO used for ++ // parameters. If it's negative we're not profiling any parameters ++ lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()))); ++ srli(tmp2, tmp1, 31); ++ bnez(tmp2, profile_continue); // i.e. sign bit set + -+ // We need a trampoline if branches are far. -+ if (far_branches()) { -+ bool in_scratch_emit_size = false; -+#ifdef COMPILER2 -+ // We don't want to emit a trampoline if C2 is generating dummy -+ // code during its branch shortening phase. -+ CompileTask* task = ciEnv::current()->task(); -+ in_scratch_emit_size = -+ (task != NULL && is_c2_compile(task->comp_level()) && -+ Compile::current()->in_scratch_emit_size()); -+#endif -+ if (!in_scratch_emit_size) { -+ address stub = emit_trampoline_stub(offset(), entry.target()); -+ if (stub == NULL) { -+ postcond(pc() == badAddress); -+ return NULL; // CodeCache is full -+ } -+ } -+ } -+ -+ address call_pc = pc(); -+ relocate(entry.rspec()); -+ if (!far_branches()) { -+ jal(entry.target()); -+ } else { -+ jal(pc()); -+ } -+ -+ postcond(pc() != badAddress); -+ return call_pc; -+} -+ -+address MacroAssembler::ic_call(address entry, jint method_index) { -+ RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); -+ movptr(t1, (address)Universe::non_oop_word()); -+ assert_cond(entry != NULL); -+ return trampoline_call(Address(entry, rh)); -+} -+ -+// Emit a trampoline stub for a call to a target which is too far away. -+// -+// code sequences: -+// -+// call-site: -+// branch-and-link to or -+// -+// Related trampoline stub for this call site in the stub section: -+// load the call target from the constant pool -+// branch (RA still points to the call site above) -+ -+address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, -+ address dest) { -+ // Max stub size: alignment nop, TrampolineStub. -+ address stub = start_a_stub(NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size); -+ if (stub == NULL) { -+ return NULL; // CodeBuffer::expand failed -+ } -+ -+ // Create a trampoline stub relocation which relates this trampoline stub -+ // with the call instruction at insts_call_instruction_offset in the -+ // instructions code-section. ++ // Compute a pointer to the area for parameters from the offset ++ // and move the pointer to the slot for the last ++ // parameters. Collect profiling from last parameter down. ++ // mdo start + parameters offset + array length - 1 ++ add(mdp, mdp, tmp1); ++ ld(tmp1, Address(mdp, ArrayData::array_len_offset())); ++ add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); + -+ // make sure 4 byte aligned here, so that the destination address would be -+ // 8 byte aligned after 3 intructions -+ while (offset() % wordSize == 0) { nop(); } ++ Label loop; ++ bind(loop); + -+ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + -+ insts_call_instruction_offset)); -+ const int stub_start_offset = offset(); ++ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); ++ int type_base = in_bytes(ParametersTypeData::type_offset(0)); ++ int per_arg_scale = exact_log2(DataLayout::cell_size); ++ add(t0, mdp, off_base); ++ add(t1, mdp, type_base); + -+ // Now, create the trampoline stub's code: -+ // - load the call -+ // - call -+ Label target; -+ ld(t0, target); // auipc + ld -+ jr(t0); // jalr -+ bind(target); -+ assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, -+ "should be"); -+ emit_int64((intptr_t)dest); ++ shadd(tmp2, tmp1, t0, tmp2, per_arg_scale); ++ // load offset on the stack from the slot for this parameter ++ ld(tmp2, Address(tmp2, 0)); ++ neg(tmp2, tmp2); + -+ const address stub_start_addr = addr_at(stub_start_offset); ++ // read the parameter from the local area ++ shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize); ++ ld(tmp2, Address(tmp2, 0)); + -+ assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); ++ // profile the parameter ++ shadd(t1, tmp1, t1, t0, per_arg_scale); ++ Address arg_type(t1, 0); ++ profile_obj_type(tmp2, arg_type, tmp3); + -+ end_a_stub(); -+ return stub_start_addr; -+} ++ // go to next parameter ++ add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); ++ bgez(tmp1, loop); + -+Address MacroAssembler::add_memory_helper(const Address dst) { -+ switch (dst.getMode()) { -+ case Address::base_plus_offset: -+ // This is the expected mode, although we allow all the other -+ // forms below. -+ return form_address(dst.base(), dst.offset(), 12, t1); -+ default: -+ la(t1, dst); -+ return Address(t1); ++ bind(profile_continue); + } +} + -+void MacroAssembler::increment(const Address dst, int64_t value) { -+ assert(((dst.getMode() == Address::base_plus_offset && -+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), -+ "invalid value and address mode combination"); -+ Address adr = add_memory_helper(dst); -+ assert(!adr.uses(t0), "invalid dst for address increment"); -+ ld(t0, adr); -+ add(t0, t0, value, t1); -+ sd(t0, adr); ++void InterpreterMacroAssembler::get_method_counters(Register method, ++ Register mcs, Label& skip) { ++ Label has_counters; ++ ld(mcs, Address(method, Method::method_counters_offset())); ++ bnez(mcs, has_counters); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::build_method_counters), method); ++ ld(mcs, Address(method, Method::method_counters_offset())); ++ beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory ++ bind(has_counters); +} + -+void MacroAssembler::incrementw(const Address dst, int32_t value) { -+ assert(((dst.getMode() == Address::base_plus_offset && -+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), -+ "invalid value and address mode combination"); -+ Address adr = add_memory_helper(dst); -+ assert(!adr.uses(t0), "invalid dst for address increment"); -+ lwu(t0, adr); -+ addw(t0, t0, value, t1); -+ sw(t0, adr); ++#ifdef ASSERT ++void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits, ++ const char* msg, bool stop_by_hit) { ++ Label L; ++ andi(t0, access_flags, flag_bits); ++ if (stop_by_hit) { ++ beqz(t0, L); ++ } else { ++ bnez(t0, L); ++ } ++ stop(msg); ++ bind(L); +} + -+void MacroAssembler::decrement(const Address dst, int64_t value) { -+ assert(((dst.getMode() == Address::base_plus_offset && -+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), -+ "invalid value and address mode combination"); -+ Address adr = add_memory_helper(dst); -+ assert(!adr.uses(t0), "invalid dst for address decrement"); -+ ld(t0, adr); -+ sub(t0, t0, value, t1); -+ sd(t0, adr); ++void InterpreterMacroAssembler::verify_frame_setup() { ++ Label L; ++ const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ ld(t0, monitor_block_top); ++ beq(esp, t0, L); ++ stop("broken stack frame setup in interpreter"); ++ bind(L); +} ++#endif +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp +new file mode 100644 +index 00000000000..4d8cb086f82 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp +@@ -0,0 +1,285 @@ ++/* ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+void MacroAssembler::decrementw(const Address dst, int32_t value) { -+ assert(((dst.getMode() == Address::base_plus_offset && -+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), -+ "invalid value and address mode combination"); -+ Address adr = add_memory_helper(dst); -+ assert(!adr.uses(t0), "invalid dst for address decrement"); -+ lwu(t0, adr); -+ subw(t0, t0, value, t1); -+ sw(t0, adr); -+} ++#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP ++#define CPU_RISCV_INTERP_MASM_RISCV_HPP + -+void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { -+ assert_different_registers(src1, t0); -+ int32_t offset; -+ la_patchable(t0, src2, offset); -+ ld(t0, Address(t0, offset)); -+ beq(src1, t0, equal); -+} ++#include "asm/macroAssembler.hpp" ++#include "interpreter/invocationCounter.hpp" ++#include "runtime/frame.hpp" + -+void MacroAssembler::oop_equal(Register obj1, Register obj2, Label& equal, bool is_far) { -+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->obj_equals(this, obj1, obj2, equal, is_far); -+} ++// This file specializes the assember with interpreter-specific macros + -+void MacroAssembler::oop_nequal(Register obj1, Register obj2, Label& nequal, bool is_far) { -+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->obj_nequals(this, obj1, obj2, nequal, is_far); -+} ++typedef ByteSize (*OffsetFunction)(uint); + -+#ifdef COMPILER2 -+// Set dst NaN if either source is NaN. -+void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, -+ bool is_double, bool is_min) { -+ assert_different_registers(dst, src1, src2); -+ Label Ldone; -+ fsflags(zr); -+ if (is_double) { -+ if (is_min) { -+ fmin_d(dst, src1, src2); -+ } else { -+ fmax_d(dst, src1, src2); -+ } -+ // flt is just used for set fflag NV -+ flt_d(zr, src1, src2); -+ } else { -+ if (is_min) { -+ fmin_s(dst, src1, src2); -+ } else { -+ fmax_s(dst, src1, src2); -+ } -+ // flt is just used for set fflag NV -+ flt_s(zr, src1, src2); -+ } -+ frflags(t0); -+ beqz(t0, Ldone); ++class InterpreterMacroAssembler: public MacroAssembler { ++ protected: ++ // Interpreter specific version of call_VM_base ++ using MacroAssembler::call_VM_leaf_base; + -+ // Src1 or src2 must be NaN here. Set dst NaN. -+ if (is_double) { -+ fadd_d(dst, src1, src2); -+ } else { -+ fadd_s(dst, src1, src2); -+ } -+ bind(Ldone); -+} ++ virtual void call_VM_leaf_base(address entry_point, ++ int number_of_arguments); + -+address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, -+ Register tmp4, Register tmp5, Register tmp6, Register result, -+ Register cnt1, int elem_size) { -+ Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; -+ Register tmp1 = t0; -+ Register tmp2 = t1; -+ Register cnt2 = tmp2; // cnt2 only used in array length compare -+ Register elem_per_word = tmp6; -+ int log_elem_size = exact_log2(elem_size); -+ int length_offset = arrayOopDesc::length_offset_in_bytes(); -+ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); ++ virtual void call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions); + -+ assert(elem_size == 1 || elem_size == 2, "must be char or byte"); -+ assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); -+ mv(elem_per_word, wordSize / elem_size); ++ // base routine for all dispatches ++ void dispatch_base(TosState state, address* table, bool verifyoop = true, ++ bool generate_poll = false, Register Rs = t0); + -+ BLOCK_COMMENT("arrays_equals {"); ++ public: ++ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {} ++ virtual ~InterpreterMacroAssembler() {} + -+ // if (a1 == a2), return true -+ oop_equal(a1, a2, SAME); ++ void load_earlyret_value(TosState state); + -+ mv(result, false); -+ beqz(a1, DONE); -+ beqz(a2, DONE); -+ lwu(cnt1, Address(a1, length_offset)); -+ lwu(cnt2, Address(a2, length_offset)); -+ bne(cnt2, cnt1, DONE); -+ beqz(cnt1, SAME); ++ void jump_to_entry(address entry); + -+ slli(tmp5, cnt1, 3 + log_elem_size); -+ sub(tmp5, zr, tmp5); -+ add(a1, a1, base_offset); -+ add(a2, a2, base_offset); -+ ld(tmp3, Address(a1, 0)); -+ ld(tmp4, Address(a2, 0)); -+ ble(cnt1, elem_per_word, SHORT); // short or same ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); + -+ // Main 16 byte comparison loop with 2 exits -+ bind(NEXT_DWORD); { -+ ld(tmp1, Address(a1, wordSize)); -+ ld(tmp2, Address(a2, wordSize)); -+ sub(cnt1, cnt1, 2 * wordSize / elem_size); -+ blez(cnt1, TAIL); -+ bne(tmp3, tmp4, DONE); -+ ld(tmp3, Address(a1, 2 * wordSize)); -+ ld(tmp4, Address(a2, 2 * wordSize)); -+ add(a1, a1, 2 * wordSize); -+ add(a2, a2, 2 * wordSize); -+ ble(cnt1, elem_per_word, TAIL2); -+ } beq(tmp1, tmp2, NEXT_DWORD); -+ j(DONE); ++ // Interpreter-specific registers ++ void save_bcp() { ++ sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize)); ++ } + -+ bind(TAIL); -+ xorr(tmp4, tmp3, tmp4); -+ xorr(tmp2, tmp1, tmp2); -+ sll(tmp2, tmp2, tmp5); -+ orr(tmp5, tmp4, tmp2); -+ j(IS_TMP5_ZR); ++ void restore_bcp() { ++ ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize)); ++ } + -+ bind(TAIL2); -+ bne(tmp1, tmp2, DONE); ++ void restore_locals() { ++ ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize)); ++ } + -+ bind(SHORT); -+ xorr(tmp4, tmp3, tmp4); -+ sll(tmp5, tmp4, tmp5); ++ void restore_constant_pool_cache() { ++ ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); ++ } + -+ bind(IS_TMP5_ZR); -+ bnez(tmp5, DONE); ++ void get_dispatch(); + -+ bind(SAME); -+ mv(result, true); -+ // That's it. -+ bind(DONE); ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg) { ++ ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize)); ++ } + -+ BLOCK_COMMENT("} array_equals"); -+ postcond(pc() != badAddress); -+ return pc(); -+} ++ void get_const(Register reg) { ++ get_method(reg); ++ ld(reg, Address(reg, in_bytes(Method::const_offset()))); ++ } + -+// Compare Strings ++ void get_constant_pool(Register reg) { ++ get_const(reg); ++ ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset()))); ++ } + -+// For Strings we're passed the address of the first characters in a1 -+// and a2 and the length in cnt1. -+// elem_size is the element size in bytes: either 1 or 2. -+// There are two implementations. For arrays >= 8 bytes, all -+// comparisons (including the final one, which may overlap) are -+// performed 8 bytes at a time. For strings < 8 bytes, we compare a -+// halfword, then a short, and then a byte. ++ void get_constant_pool_cache(Register reg) { ++ get_constant_pool(reg); ++ ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes())); ++ } + -+void MacroAssembler::string_equals(Register a1, Register a2, -+ Register result, Register cnt1, int elem_size) -+{ -+ Label SAME, DONE, SHORT, NEXT_WORD; -+ Register tmp1 = t0; -+ Register tmp2 = t1; ++ void get_cpool_and_tags(Register cpool, Register tags) { ++ get_constant_pool(cpool); ++ ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes())); ++ } + -+ assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); -+ assert_different_registers(a1, a2, result, cnt1, t0, t1); ++ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); ++ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_method_counters(Register method, Register mcs, Label& skip); + -+ BLOCK_COMMENT("string_equals {"); ++ // Load cpool->resolved_references(index). ++ void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15); + -+ beqz(cnt1, SAME); -+ mv(result, false); ++ // Load cpool->resolved_klass_at(index). ++ void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp); + -+ // Check for short strings, i.e. smaller than wordSize. -+ sub(cnt1, cnt1, wordSize); -+ blez(cnt1, SHORT); ++ void load_resolved_method_at_index(int byte_no, Register method, Register cache); + -+ // Main 8 byte comparison loop. -+ bind(NEXT_WORD); { -+ ld(tmp1, Address(a1, 0)); -+ add(a1, a1, wordSize); -+ ld(tmp2, Address(a2, 0)); -+ add(a2, a2, wordSize); -+ sub(cnt1, cnt1, wordSize); -+ bne(tmp1, tmp2, DONE); -+ } bgtz(cnt1, NEXT_WORD); ++ void pop_ptr(Register r = x10); ++ void pop_i(Register r = x10); ++ void pop_l(Register r = x10); ++ void pop_f(FloatRegister r = f10); ++ void pop_d(FloatRegister r = f10); ++ void push_ptr(Register r = x10); ++ void push_i(Register r = x10); ++ void push_l(Register r = x10); ++ void push_f(FloatRegister r = f10); ++ void push_d(FloatRegister r = f10); + -+ if (!AvoidUnalignedAccesses) { -+ // Last longword. In the case where length == 4 we compare the -+ // same longword twice, but that's still faster than another -+ // conditional branch. -+ // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when -+ // length == 4. -+ add(tmp1, a1, cnt1); -+ ld(tmp1, Address(tmp1, 0)); -+ add(tmp2, a2, cnt1); -+ ld(tmp2, Address(tmp2, 0)); -+ bne(tmp1, tmp2, DONE); -+ j(SAME); -+ } ++ void pop(TosState state); // transition vtos -> state ++ void push(TosState state); // transition state -> vtos + -+ bind(SHORT); -+ ld(tmp1, Address(a1)); -+ ld(tmp2, Address(a2)); -+ xorr(tmp1, tmp1, tmp2); -+ neg(cnt1, cnt1); -+ slli(cnt1, cnt1, LogBitsPerByte); -+ sll(tmp1, tmp1, cnt1); -+ bnez(tmp1, DONE); ++ void empty_expression_stack() { ++ ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize)); ++ // NULL last_sp until next java call ++ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ } + -+ // Arrays are equal. -+ bind(SAME); -+ mv(result, true); ++ // Helpers for swap and dup ++ void load_ptr(int n, Register val); ++ void store_ptr(int n, Register val); + -+ // That's it. -+ bind(DONE); -+ BLOCK_COMMENT("} string_equals"); -+} ++ // Load float value from 'address'. The value is loaded onto the FPU register v0. ++ void load_float(Address src); ++ void load_double(Address src); + -+typedef void (MacroAssembler::*load_chr_insn)(Register Rd, const Address &adr, Register temp); ++ // Generate a subtype check: branch to ok_is_subtype if sub_klass is ++ // a subtype of super_klass. ++ void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); + -+// Compare strings. -+void MacroAssembler::string_compare(Register str1, Register str2, -+ Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, -+ Register tmp3, int ae) -+{ -+ Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, -+ DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, -+ SHORT_LOOP_START, TAIL_CHECK, L; ++ // Dispatching ++ void dispatch_prolog(TosState state, int step = 0); ++ void dispatch_epilog(TosState state, int step = 0); ++ // dispatch via t0 ++ void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0); ++ // dispatch normal table via t0 (assume t0 is loaded already) ++ void dispatch_only_normal(TosState state, Register Rs = t0); ++ void dispatch_only_noverify(TosState state, Register Rs = t0); ++ // load t0 from [xbcp + step] and dispatch via t0 ++ void dispatch_next(TosState state, int step = 0, bool generate_poll = false); ++ // load t0 from [xbcp] and dispatch via t0 and table ++ void dispatch_via (TosState state, address* table); + -+ const int STUB_THRESHOLD = 64 + 8; -+ bool isLL = ae == StrIntrinsicNode::LL; -+ bool isLU = ae == StrIntrinsicNode::LU; -+ bool isUL = ae == StrIntrinsicNode::UL; ++ // jump to an invoked target ++ void prepare_to_jump_from_interpreted(); ++ void jump_from_interpreted(Register method); + -+ bool str1_isL = isLL || isLU; -+ bool str2_isL = isLL || isUL; + -+ // for L strings, 1 byte for 1 character -+ // for U strings, 2 bytes for 1 character -+ int str1_chr_size = str1_isL ? 1 : 2; -+ int str2_chr_size = str2_isL ? 1 : 2; -+ int minCharsInWord = isLL ? wordSize : wordSize / 2; ++ // Returning from interpreted functions ++ // ++ // Removes the current activation (incl. unlocking of monitors) ++ // and sets up the return address. This code is also used for ++ // exception unwindwing. In that case, we do not want to throw ++ // IllegalMonitorStateExceptions, since that might get us into an ++ // infinite rethrow exception loop. ++ // Additionally this code is used for popFrame and earlyReturn. ++ // In popFrame case we want to skip throwing an exception, ++ // installing an exception, and notifying jvmdi. ++ // In earlyReturn case we only want to skip throwing an exception ++ // and installing an exception. ++ void remove_activation(TosState state, ++ bool throw_monitor_exception = true, ++ bool install_monitor_exception = true, ++ bool notify_jvmdi = true); + -+ load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; ++ // FIXME: Give us a valid frame at a null check. ++ virtual void null_check(Register reg, int offset = -1) { ++ MacroAssembler::null_check(reg, offset); ++ } + -+ BLOCK_COMMENT("string_compare {"); ++ // Object locking ++ void lock_object (Register lock_reg); ++ void unlock_object(Register lock_reg); + -+ // Bizzarely, the counts are passed in bytes, regardless of whether they -+ // are L or U strings, however the result is always in characters. -+ if (!str1_isL) { -+ sraiw(cnt1, cnt1, 1); -+ } -+ if (!str2_isL) { -+ sraiw(cnt2, cnt2, 1); -+ } ++ // Interpreter profiling operations ++ void set_method_data_pointer_for_bcp(); ++ void test_method_data_pointer(Register mdp, Label& zero_continue); ++ void verify_method_data_pointer(); + -+ // Compute the minimum of the string lengths and save the difference in result. -+ sub(result, cnt1, cnt2); -+ bgt(cnt1, cnt2, L); -+ mv(cnt2, cnt1); -+ bind(L); ++ void set_mdp_data_at(Register mdp_in, int constant, Register value); ++ void increment_mdp_data_at(Address data, bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, int constant, ++ bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, Register reg, int constant, ++ bool decrement = false); ++ void increment_mask_and_jump(Address counter_addr, ++ int increment, Address mask, ++ Register tmp1, Register tmp2, ++ bool preloaded, Label* where); + -+ // A very short string -+ mv(t0, minCharsInWord); -+ ble(cnt2, t0, SHORT_STRING); ++ void set_mdp_flag_at(Register mdp_in, int flag_constant); ++ void test_mdp_data_at(Register mdp_in, int offset, Register value, ++ Register test_value_out, ++ Label& not_equal_continue); + -+ // Compare longwords -+ // load first parts of strings and finish initialization while loading -+ { -+ if (str1_isL == str2_isL) { // LL or UU -+ // check if str1 and str2 are same string -+ beq(str1, str2, DONE); -+ // load 8 bytes once to compare -+ ld(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ mv(t0, STUB_THRESHOLD); -+ bge(cnt2, t0, STUB); -+ sub(cnt2, cnt2, minCharsInWord); -+ beqz(cnt2, TAIL_CHECK); -+ // convert cnt2 from characters to bytes -+ if(!str1_isL) { -+ slli(cnt2, cnt2, 1); -+ } -+ add(str2, str2, cnt2); -+ add(str1, str1, cnt2); -+ sub(cnt2, zr, cnt2); -+ } else if (isLU) { // LU case -+ lwu(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ mv(t0, STUB_THRESHOLD); -+ bge(cnt2, t0, STUB); -+ addi(cnt2, cnt2, -4); -+ add(str1, str1, cnt2); -+ sub(cnt1, zr, cnt2); -+ slli(cnt2, cnt2, 1); -+ add(str2, str2, cnt2); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ sub(cnt2, zr, cnt2); -+ addi(cnt1, cnt1, 4); -+ } else { // UL case -+ ld(tmp1, Address(str1)); -+ lwu(tmp2, Address(str2)); -+ mv(t0, STUB_THRESHOLD); -+ bge(cnt2, t0, STUB); -+ addi(cnt2, cnt2, -4); -+ slli(t0, cnt2, 1); -+ sub(cnt1, zr, t0); -+ add(str1, str1, t0); -+ add(str2, str2, cnt2); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ sub(cnt2, zr, cnt2); -+ addi(cnt1, cnt1, 8); -+ } -+ addi(cnt2, cnt2, isUL ? 4 : 8); -+ bgez(cnt2, TAIL); -+ xorr(tmp3, tmp1, tmp2); -+ bnez(tmp3, DIFFERENCE); ++ void record_klass_in_profile(Register receiver, Register mdp, ++ Register reg2, bool is_virtual_call); ++ void record_klass_in_profile_helper(Register receiver, Register mdp, ++ Register reg2, ++ Label& done, bool is_virtual_call); ++ void record_item_in_profile_helper(Register item, Register mdp, ++ Register reg2, int start_row, Label& done, int total_rows, ++ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, ++ int non_profiled_offset); + -+ // main loop -+ bind(NEXT_WORD); -+ if (str1_isL == str2_isL) { // LL or UU -+ add(t0, str1, cnt2); -+ ld(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ addi(cnt2, cnt2, 8); -+ } else if (isLU) { // LU case -+ add(t0, str1, cnt1); -+ lwu(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ addi(cnt1, cnt1, 4); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ addi(cnt2, cnt2, 8); -+ } else { // UL case -+ add(t0, str2, cnt2); -+ lwu(tmp2, Address(t0)); -+ add(t0, str1, cnt1); -+ ld(tmp1, Address(t0)); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ addi(cnt1, cnt1, 8); -+ addi(cnt2, cnt2, 4); -+ } -+ bgez(cnt2, TAIL); ++ void update_mdp_by_offset(Register mdp_in, int offset_of_offset); ++ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); ++ void update_mdp_by_constant(Register mdp_in, int constant); ++ void update_mdp_for_ret(Register return_bci); + -+ xorr(tmp3, tmp1, tmp2); -+ beqz(tmp3, NEXT_WORD); -+ j(DIFFERENCE); -+ bind(TAIL); -+ xorr(tmp3, tmp1, tmp2); -+ bnez(tmp3, DIFFERENCE); -+ // Last longword. -+ if (AvoidUnalignedAccesses) { -+ // Aligned access. Load bytes from byte-aligned address, -+ // which may contain invalid bytes when remaining bytes is -+ // less than 4(UL/LU) or 8 (LL/UU). -+ // Invalid bytes should be removed before comparison. -+ if (str1_isL == str2_isL) { // LL or UU -+ add(t0, str1, cnt2); -+ ld(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ } else if (isLU) { // LU -+ add(t0, str1, cnt1); -+ lwu(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ } else { // UL -+ add(t0, str1, cnt1); -+ ld(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ lwu(tmp2, Address(t0)); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ slli(cnt2, cnt2, 1); // UL case should convert cnt2 to bytes -+ } -+ // remove invalid bytes -+ slli(t0, cnt2, LogBitsPerByte); -+ sll(tmp1, tmp1, t0); -+ sll(tmp2, tmp2, t0); -+ } else { -+ // Last longword. In the case where length == 4 we compare the -+ // same longword twice, but that's still faster than another -+ // conditional branch. -+ if (str1_isL == str2_isL) { // LL or UU -+ ld(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ } else if (isLU) { // LU case -+ lwu(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ } else { // UL case -+ ld(tmp1, Address(str1)); -+ lwu(tmp2, Address(str2)); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ } -+ } -+ bind(TAIL_CHECK); -+ xorr(tmp3, tmp1, tmp2); -+ beqz(tmp3, DONE); ++ // narrow int return value ++ void narrow(Register result); + -+ // Find the first different characters in the longwords and -+ // compute their difference. -+ bind(DIFFERENCE); -+ ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb -+ srl(tmp1, tmp1, result); -+ srl(tmp2, tmp2, result); -+ if (isLL) { -+ andi(tmp1, tmp1, 0xFF); -+ andi(tmp2, tmp2, 0xFF); -+ } else { -+ andi(tmp1, tmp1, 0xFFFF); -+ andi(tmp2, tmp2, 0xFFFF); -+ } -+ sub(result, tmp1, tmp2); -+ j(DONE); -+ } ++ void profile_taken_branch(Register mdp, Register bumped_count); ++ void profile_not_taken_branch(Register mdp); ++ void profile_call(Register mdp); ++ void profile_final_call(Register mdp); ++ void profile_virtual_call(Register receiver, Register mdp, ++ Register t1, ++ bool receiver_can_be_null = false); ++ void profile_ret(Register return_bci, Register mdp); ++ void profile_null_seen(Register mdp); ++ void profile_typecheck(Register mdp, Register klass, Register temp); ++ void profile_typecheck_failed(Register mdp); ++ void profile_switch_default(Register mdp); ++ void profile_switch_case(Register index_in_scratch, Register mdp, ++ Register temp); + -+ bind(STUB); -+ RuntimeAddress stub = NULL; -+ switch (ae) { -+ case StrIntrinsicNode::LL: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); -+ break; -+ case StrIntrinsicNode::UU: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); -+ break; -+ case StrIntrinsicNode::LU: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); -+ break; -+ case StrIntrinsicNode::UL: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+ assert(stub.target() != NULL, "compare_long_string stub has not been generated"); -+ trampoline_call(stub); -+ j(DONE); ++ void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp); ++ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); ++ void profile_return_type(Register mdp, Register ret, Register tmp); ++ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3); + -+ bind(SHORT_STRING); -+ // Is the minimum length zero? -+ beqz(cnt2, DONE); -+ // arrange code to do most branches while loading and loading next characters -+ // while comparing previous -+ (this->*str1_load_chr)(tmp1, Address(str1), t0); -+ addi(str1, str1, str1_chr_size); -+ addi(cnt2, cnt2, -1); -+ beqz(cnt2, SHORT_LAST_INIT); -+ (this->*str2_load_chr)(cnt1, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ j(SHORT_LOOP_START); -+ bind(SHORT_LOOP); -+ addi(cnt2, cnt2, -1); -+ beqz(cnt2, SHORT_LAST); -+ bind(SHORT_LOOP_START); -+ (this->*str1_load_chr)(tmp2, Address(str1), t0); -+ addi(str1, str1, str1_chr_size); -+ (this->*str2_load_chr)(t0, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ bne(tmp1, cnt1, SHORT_LOOP_TAIL); -+ addi(cnt2, cnt2, -1); -+ beqz(cnt2, SHORT_LAST2); -+ (this->*str1_load_chr)(tmp1, Address(str1), t0); -+ addi(str1, str1, str1_chr_size); -+ (this->*str2_load_chr)(cnt1, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ beq(tmp2, t0, SHORT_LOOP); -+ sub(result, tmp2, t0); -+ j(DONE); -+ bind(SHORT_LOOP_TAIL); -+ sub(result, tmp1, cnt1); -+ j(DONE); -+ bind(SHORT_LAST2); -+ beq(tmp2, t0, DONE); -+ sub(result, tmp2, t0); ++ // Debugging ++ // only if +VerifyFPU && (state == ftos || state == dtos) ++ void verify_FPU(int stack_depth, TosState state = ftos); + -+ j(DONE); -+ bind(SHORT_LAST_INIT); -+ (this->*str2_load_chr)(cnt1, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ bind(SHORT_LAST); -+ beq(tmp1, cnt1, DONE); -+ sub(result, tmp1, cnt1); ++ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; + -+ bind(DONE); ++ // support for jvmti/dtrace ++ void notify_method_entry(); ++ void notify_method_exit(TosState state, NotifyMethodExitMode mode); + -+ BLOCK_COMMENT("} string_compare"); -+} ++ virtual void _call_Unimplemented(address call_site) { ++ save_bcp(); ++ set_last_Java_frame(esp, fp, (address) pc(), t0); ++ MacroAssembler::_call_Unimplemented(call_site); ++ } + -+// short string -+// StringUTF16.indexOfChar -+// StringLatin1.indexOfChar -+void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, -+ Register ch, Register result, -+ bool isL) -+{ -+ Register ch1 = t0; -+ Register index = t1; ++#ifdef ASSERT ++ void verify_access_flags(Register access_flags, uint32_t flag_bits, ++ const char* msg, bool stop_by_hit = true); ++ void verify_frame_setup(); ++#endif ++}; + -+ BLOCK_COMMENT("string_indexof_char_short {"); ++#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp +new file mode 100644 +index 00000000000..d93530d8564 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp +@@ -0,0 +1,295 @@ ++/* ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ Label LOOP, LOOP1, LOOP4, LOOP8; -+ Label MATCH, MATCH1, MATCH2, MATCH3, -+ MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "memory/universe.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/signature.hpp" + -+ mv(result, -1); -+ mv(index, zr); ++#define __ _masm-> + -+ bind(LOOP); -+ addi(t0, index, 8); -+ ble(t0, cnt1, LOOP8); -+ addi(t0, index, 4); -+ ble(t0, cnt1, LOOP4); -+ j(LOOP1); ++// Implementation of SignatureHandlerGenerator ++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; } ++Register InterpreterRuntime::SignatureHandlerGenerator::to() { return sp; } ++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; } + -+ bind(LOOP8); -+ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); -+ beq(ch, ch1, MATCH); -+ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); -+ beq(ch, ch1, MATCH1); -+ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); -+ beq(ch, ch1, MATCH2); -+ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); -+ beq(ch, ch1, MATCH3); -+ isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); -+ beq(ch, ch1, MATCH4); -+ isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); -+ beq(ch, ch1, MATCH5); -+ isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); -+ beq(ch, ch1, MATCH6); -+ isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); -+ beq(ch, ch1, MATCH7); -+ addi(index, index, 8); -+ addi(str1, str1, isL ? 8 : 16); -+ blt(index, cnt1, LOOP); -+ j(NOMATCH); ++Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() { ++ if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { ++ return g_INTArgReg[++_num_reg_int_args]; ++ } ++ return noreg; ++} + -+ bind(LOOP4); -+ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); -+ beq(ch, ch1, MATCH); -+ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); -+ beq(ch, ch1, MATCH1); -+ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); -+ beq(ch, ch1, MATCH2); -+ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); -+ beq(ch, ch1, MATCH3); -+ addi(index, index, 4); -+ addi(str1, str1, isL ? 4 : 8); -+ bge(index, cnt1, NOMATCH); ++FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() { ++ if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { ++ return g_FPArgReg[_num_reg_fp_args++]; ++ } else { ++ return fnoreg; ++ } ++} + -+ bind(LOOP1); -+ isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); -+ beq(ch, ch1, MATCH); -+ addi(index, index, 1); -+ addi(str1, str1, isL ? 1 : 2); -+ blt(index, cnt1, LOOP1); -+ j(NOMATCH); ++int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() { ++ int ret = _stack_offset; ++ _stack_offset += wordSize; ++ return ret; ++} + -+ bind(MATCH1); -+ addi(index, index, 1); -+ j(MATCH); ++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( ++ const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { ++ _masm = new MacroAssembler(buffer); // allocate on resourse area by default ++ _num_reg_int_args = (method->is_static() ? 1 : 0); ++ _num_reg_fp_args = 0; ++ _stack_offset = 0; ++} + -+ bind(MATCH2); -+ addi(index, index, 2); -+ j(MATCH); ++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { ++ const Address src(from(), Interpreter::local_offset_in_bytes(offset())); + -+ bind(MATCH3); -+ addi(index, index, 3); -+ j(MATCH); ++ Register reg = next_gpr(); ++ if (reg != noreg) { ++ __ lw(reg, src); ++ } else { ++ __ lw(x10, src); ++ __ sw(x10, Address(to(), next_stack_offset())); ++ } ++} + -+ bind(MATCH4); -+ addi(index, index, 4); -+ j(MATCH); ++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { ++ const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); + -+ bind(MATCH5); -+ addi(index, index, 5); -+ j(MATCH); ++ Register reg = next_gpr(); ++ if (reg != noreg) { ++ __ ld(reg, src); ++ } else { ++ __ ld(x10, src); ++ __ sd(x10, Address(to(), next_stack_offset())); ++ } ++} + -+ bind(MATCH6); -+ addi(index, index, 6); -+ j(MATCH); ++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { ++ const Address src(from(), Interpreter::local_offset_in_bytes(offset())); + -+ bind(MATCH7); -+ addi(index, index, 7); ++ FloatRegister reg = next_fpr(); ++ if (reg != fnoreg) { ++ __ flw(reg, src); ++ } else { ++ // a floating-point argument is passed according to the integer calling ++ // convention if no floating-point argument register available ++ pass_int(); ++ } ++} + -+ bind(MATCH); -+ mv(result, index); -+ bind(NOMATCH); -+ BLOCK_COMMENT("} string_indexof_char_short"); ++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { ++ const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ ++ FloatRegister reg = next_fpr(); ++ if (reg != fnoreg) { ++ __ fld(reg, src); ++ } else { ++ // a floating-point argument is passed according to the integer calling ++ // convention if no floating-point argument register available ++ pass_long(); ++ } +} + -+// StringUTF16.indexOfChar -+// StringLatin1.indexOfChar -+void MacroAssembler::string_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ bool isL) -+{ -+ Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; -+ Register ch1 = t0; -+ Register orig_cnt = t1; -+ Register mask1 = tmp3; -+ Register mask2 = tmp2; -+ Register match_mask = tmp1; -+ Register trailing_char = tmp4; -+ Register unaligned_elems = tmp4; ++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { ++ Register reg = next_gpr(); ++ if (reg == c_rarg1) { ++ assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); ++ __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset())); ++ } else if (reg != noreg) { ++ // c_rarg2-c_rarg7 ++ __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2, 2:c_rarg3... ++ __ ld(temp(), x10); ++ Label L; ++ __ beqz(temp(), L); ++ __ mv(reg, x10); ++ __ bind(L); ++ } else { ++ //to stack ++ __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ ld(temp(), x10); ++ Label L; ++ __ bnez(temp(), L); ++ __ mv(x10, zr); ++ __ bind(L); ++ assert(sizeof(jobject) == wordSize, ""); ++ __ sd(x10, Address(to(), next_stack_offset())); ++ } ++} + -+ BLOCK_COMMENT("string_indexof_char {"); -+ beqz(cnt1, NOMATCH); ++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { ++ // generate code to handle arguments ++ iterate(fingerprint); + -+ addi(t0, cnt1, isL ? -32 : -16); -+ bgtz(t0, DO_LONG); -+ string_indexof_char_short(str1, cnt1, ch, result, isL); -+ j(DONE); ++ // return result handler ++ __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type()))); ++ __ ret(); + -+ bind(DO_LONG); -+ mv(orig_cnt, cnt1); -+ if (AvoidUnalignedAccesses) { -+ Label ALIGNED; -+ andi(unaligned_elems, str1, 0x7); -+ beqz(unaligned_elems, ALIGNED); -+ sub(unaligned_elems, unaligned_elems, 8); -+ neg(unaligned_elems, unaligned_elems); -+ if (!isL) { -+ srli(unaligned_elems, unaligned_elems, 1); -+ } -+ // do unaligned part per element -+ string_indexof_char_short(str1, unaligned_elems, ch, result, isL); -+ bgez(result, DONE); -+ mv(orig_cnt, cnt1); -+ sub(cnt1, cnt1, unaligned_elems); -+ bind(ALIGNED); -+ } ++ __ flush(); ++} + -+ // duplicate ch -+ if (isL) { -+ slli(ch1, ch, 8); -+ orr(ch, ch1, ch); -+ } -+ slli(ch1, ch, 16); -+ orr(ch, ch1, ch); -+ slli(ch1, ch, 32); -+ orr(ch, ch1, ch); + -+ if (!isL) { -+ slli(cnt1, cnt1, 1); -+ } ++// Implementation of SignatureHandlerLibrary + -+ mv(mask1, isL ? 0x0101010101010101 : 0x0001000100010001); -+ mv(mask2, isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); ++void SignatureHandlerLibrary::pd_set_handler(address handler) {} + -+ bind(CH1_LOOP); -+ ld(ch1, Address(str1)); -+ addi(str1, str1, 8); -+ addi(cnt1, cnt1, -8); -+ compute_match_mask(ch1, ch, match_mask, mask1, mask2); -+ bnez(match_mask, HIT); -+ bgtz(cnt1, CH1_LOOP); -+ j(NOMATCH); + -+ bind(HIT); -+ ctzc_bit(trailing_char, match_mask, isL, ch1, result); -+ srli(trailing_char, trailing_char, 3); -+ addi(cnt1, cnt1, 8); -+ ble(cnt1, trailing_char, NOMATCH); -+ // match case -+ if (!isL) { -+ srli(cnt1, cnt1, 1); -+ srli(trailing_char, trailing_char, 1); ++class SlowSignatureHandler ++ : public NativeSignatureIterator { ++ private: ++ address _from; ++ intptr_t* _to; ++ intptr_t* _int_args; ++ intptr_t* _fp_args; ++ intptr_t* _fp_identifiers; ++ unsigned int _num_reg_int_args; ++ unsigned int _num_reg_fp_args; ++ ++ intptr_t* single_slot_addr() { ++ intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ return from_addr; + } + -+ sub(result, orig_cnt, cnt1); -+ add(result, result, trailing_char); -+ j(DONE); ++ intptr_t* double_slot_addr() { ++ intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1)); ++ _from -= 2 * Interpreter::stackElementSize; ++ return from_addr; ++ } + -+ bind(NOMATCH); -+ mv(result, -1); ++ int pass_gpr(intptr_t value) { ++ if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { ++ *_int_args++ = value; ++ return _num_reg_int_args++; ++ } ++ return -1; ++ } + -+ bind(DONE); -+ BLOCK_COMMENT("} string_indexof_char"); -+} ++ int pass_fpr(intptr_t value) { ++ if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { ++ *_fp_args++ = value; ++ return _num_reg_fp_args++; ++ } ++ return -1; ++ } + -+// Search for needle in haystack and return index or -1 -+// x10: result -+// x11: haystack -+// x12: haystack_len -+// x13: needle -+// x14: needle_len -+void MacroAssembler::string_indexof(Register haystack, Register needle, -+ Register haystack_len, Register needle_len, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, -+ Register result, int ae) -+{ -+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); ++ void pass_stack(intptr_t value) { ++ *_to++ = value; ++ } + -+ Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; ++ virtual void pass_int() { ++ jint value = *(jint*)single_slot_addr(); ++ if (pass_gpr(value) < 0) { ++ pass_stack(value); ++ } ++ } + -+ Register ch1 = t0; -+ Register ch2 = t1; -+ Register nlen_tmp = tmp1; // needle len tmp -+ Register hlen_tmp = tmp2; // haystack len tmp -+ Register result_tmp = tmp4; ++ virtual void pass_long() { ++ intptr_t value = *double_slot_addr(); ++ if (pass_gpr(value) < 0) { ++ pass_stack(value); ++ } ++ } + -+ bool isLL = ae == StrIntrinsicNode::LL; ++ virtual void pass_object() { ++ intptr_t* addr = single_slot_addr(); ++ intptr_t value = *addr == 0 ? NULL : (intptr_t)addr; ++ if (pass_gpr(value) < 0) { ++ pass_stack(value); ++ } ++ } + -+ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; -+ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; -+ int needle_chr_shift = needle_isL ? 0 : 1; -+ int haystack_chr_shift = haystack_isL ? 0 : 1; -+ int needle_chr_size = needle_isL ? 1 : 2; -+ int haystack_chr_size = haystack_isL ? 1 : 2; -+ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; ++ virtual void pass_float() { ++ jint value = *(jint*) single_slot_addr(); ++ // a floating-point argument is passed according to the integer calling ++ // convention if no floating-point argument register available ++ if (pass_fpr(value) < 0 && pass_gpr(value) < 0) { ++ pass_stack(value); ++ } ++ } + -+ BLOCK_COMMENT("string_indexof {"); ++ virtual void pass_double() { ++ intptr_t value = *double_slot_addr(); ++ int arg = pass_fpr(value); ++ if (0 <= arg) { ++ *_fp_identifiers |= (1ull << arg); // mark as double ++ } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack ++ pass_stack(value); ++ } ++ } + -+ // Note, inline_string_indexOf() generates checks: -+ // if (pattern.count > src.count) return -1; -+ // if (pattern.count == 0) return 0; ++ public: ++ SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to) ++ : NativeSignatureIterator(method) ++ { ++ _from = from; ++ _to = to; + -+ // We have two strings, a source string in haystack, haystack_len and a pattern string -+ // in needle, needle_len. Find the first occurence of pattern in source or return -1. ++ _int_args = to - (method->is_static() ? 16 : 17); ++ _fp_args = to - 8; ++ _fp_identifiers = to - 9; ++ *(int*) _fp_identifiers = 0; ++ _num_reg_int_args = (method->is_static() ? 1 : 0); ++ _num_reg_fp_args = 0; ++ } + -+ // For larger pattern and source we use a simplified Boyer Moore algorithm. -+ // With a small pattern and source we use linear scan. ++ ~SlowSignatureHandler() ++ { ++ _from = NULL; ++ _to = NULL; ++ _int_args = NULL; ++ _fp_args = NULL; ++ _fp_identifiers = NULL; ++ } ++}; + -+ // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. -+ sub(result_tmp, haystack_len, needle_len); -+ // needle_len < 8, use linear scan -+ sub(t0, needle_len, 8); -+ bltz(t0, LINEARSEARCH); -+ // needle_len >= 256, use linear scan -+ sub(t0, needle_len, 256); -+ bgez(t0, LINEARSTUB); -+ // needle_len >= haystack_len/4, use linear scan -+ srli(t0, haystack_len, 2); -+ bge(needle_len, t0, LINEARSTUB); + -+ // Boyer-Moore-Horspool introduction: -+ // The Boyer Moore alogorithm is based on the description here:- -+ // -+ // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm -+ // -+ // This describes and algorithm with 2 shift rules. The 'Bad Character' rule -+ // and the 'Good Suffix' rule. -+ // -+ // These rules are essentially heuristics for how far we can shift the -+ // pattern along the search string. -+ // -+ // The implementation here uses the 'Bad Character' rule only because of the -+ // complexity of initialisation for the 'Good Suffix' rule. -+ // -+ // This is also known as the Boyer-Moore-Horspool algorithm: -+ // -+ // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm -+ // -+ // #define ASIZE 256 -+ // -+ // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { -+ // int i, j; -+ // unsigned c; -+ // unsigned char bc[ASIZE]; -+ // -+ // /* Preprocessing */ -+ // for (i = 0; i < ASIZE; ++i) -+ // bc[i] = m; -+ // for (i = 0; i < m - 1; ) { -+ // c = pattern[i]; -+ // ++i; -+ // // c < 256 for Latin1 string, so, no need for branch -+ // #ifdef PATTERN_STRING_IS_LATIN1 -+ // bc[c] = m - i; -+ // #else -+ // if (c < ASIZE) bc[c] = m - i; -+ // #endif -+ // } -+ // -+ // /* Searching */ -+ // j = 0; -+ // while (j <= n - m) { -+ // c = src[i+j]; -+ // if (pattern[m-1] == c) -+ // int k; -+ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); -+ // if (k < 0) return j; -+ // // c < 256 for Latin1 string, so, no need for branch -+ // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 -+ // // LL case: (c< 256) always true. Remove branch -+ // j += bc[pattern[j+m-1]]; -+ // #endif -+ // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF -+ // // UU case: need if (c if not. -+ // if (c < ASIZE) -+ // j += bc[pattern[j+m-1]]; -+ // else -+ // j += m -+ // #endif -+ // } -+ // return -1; -+ // } ++JRT_ENTRY(address, ++ InterpreterRuntime::slow_signature_handler(JavaThread* current, ++ Method* method, ++ intptr_t* from, ++ intptr_t* to)) ++ methodHandle m(current, (Method*)method); ++ assert(m->is_native(), "sanity check"); + -+ // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result -+ Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, -+ BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; ++ // handle arguments ++ SlowSignatureHandler ssh(m, (address)from, to); ++ ssh.iterate(UCONST64(-1)); + -+ Register haystack_end = haystack_len; -+ Register skipch = tmp2; ++ // return result handler ++ return Interpreter::result_handler(m->result_type()); ++JRT_END +diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp +new file mode 100644 +index 00000000000..05df63ba2ae +--- /dev/null ++++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // pattern length is >=8, so, we can read at least 1 register for cases when -+ // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for -+ // UL case. We'll re-read last character in inner pre-loop code to have -+ // single outer pre-loop load -+ const int firstStep = isLL ? 7 : 3; ++#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP ++#define CPU_RISCV_INTERPRETERRT_RISCV_HPP + -+ const int ASIZE = 256; -+ const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) ++// This is included in the middle of class Interpreter. ++// Do not include files here. + -+ sub(sp, sp, ASIZE); ++// native method calls + -+ // init BC offset table with default value: needle_len -+ slli(t0, needle_len, 8); -+ orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] -+ slli(tmp1, t0, 16); -+ orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] -+ slli(tmp1, t0, 32); -+ orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] ++class SignatureHandlerGenerator: public NativeSignatureIterator { ++ private: ++ MacroAssembler* _masm; ++ unsigned int _num_reg_fp_args; ++ unsigned int _num_reg_int_args; ++ int _stack_offset; + -+ mv(ch1, sp); // ch1 is t0 -+ mv(tmp6, ASIZE / STORE_BYTES); // loop iterations ++ void pass_int(); ++ void pass_long(); ++ void pass_float(); ++ void pass_double(); ++ void pass_object(); + -+ bind(BM_INIT_LOOP); -+ // for (i = 0; i < ASIZE; ++i) -+ // bc[i] = m; -+ for (int i = 0; i < 4; i++) { -+ sd(tmp5, Address(ch1, i * wordSize)); ++ Register next_gpr(); ++ FloatRegister next_fpr(); ++ int next_stack_offset(); ++ ++ public: ++ // Creation ++ SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); ++ virtual ~SignatureHandlerGenerator() { ++ _masm = NULL; + } -+ add(ch1, ch1, 32); -+ sub(tmp6, tmp6, 4); -+ bgtz(tmp6, BM_INIT_LOOP); + -+ sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern -+ Register orig_haystack = tmp5; -+ mv(orig_haystack, haystack); -+ // result_tmp = tmp4 -+ shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); -+ sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 -+ mv(tmp3, needle); ++ // Code generation ++ void generate(uint64_t fingerprint); + -+ // for (i = 0; i < m - 1; ) { -+ // c = pattern[i]; -+ // ++i; -+ // // c < 256 for Latin1 string, so, no need for branch -+ // #ifdef PATTERN_STRING_IS_LATIN1 -+ // bc[c] = m - i; -+ // #else -+ // if (c < ASIZE) bc[c] = m - i; -+ // #endif -+ // } -+ bind(BCLOOP); -+ (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); -+ add(tmp3, tmp3, needle_chr_size); -+ if (!needle_isL) { -+ // ae == StrIntrinsicNode::UU -+ mv(tmp6, ASIZE); -+ bgeu(ch1, tmp6, BCSKIP); -+ } -+ add(tmp4, sp, ch1); -+ sb(ch2, Address(tmp4)); // store skip offset to BC offset table ++ // Code generation support ++ static Register from(); ++ static Register to(); ++ static Register temp(); ++}; + -+ bind(BCSKIP); -+ sub(ch2, ch2, 1); // for next pattern element, skip distance -1 -+ bgtz(ch2, BCLOOP); ++#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp +new file mode 100644 +index 00000000000..9a6084afa1d +--- /dev/null ++++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp +@@ -0,0 +1,86 @@ ++/* ++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // tmp6: pattern end, address after needle -+ shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); -+ if (needle_isL == haystack_isL) { -+ // load last 8 bytes (8LL/4UU symbols) -+ ld(tmp6, Address(tmp6, -wordSize)); -+ } else { -+ // UL: from UTF-16(source) search Latin1(pattern) -+ lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) -+ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d -+ // We'll have to wait until load completed, but it's still faster than per-character loads+checks -+ srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a -+ slli(ch2, tmp6, XLEN - 24); -+ srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b -+ slli(ch1, tmp6, XLEN - 16); -+ srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c -+ andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d -+ slli(ch2, ch2, 16); -+ orr(ch2, ch2, ch1); // 0x00000b0c -+ slli(result, tmp3, 48); // use result as temp register -+ orr(tmp6, tmp6, result); // 0x0a00000d -+ slli(result, ch2, 16); -+ orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d -+ } ++#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP ++#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP + -+ // i = m - 1; -+ // skipch = j + i; -+ // if (skipch == pattern[m - 1] -+ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); -+ // else -+ // move j with bad char offset table -+ bind(BMLOOPSTR2); -+ // compare pattern to source string backward -+ shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); -+ (this->*haystack_load_1chr)(skipch, Address(result), noreg); -+ sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 -+ if (needle_isL == haystack_isL) { -+ // re-init tmp3. It's for free because it's executed in parallel with -+ // load above. Alternative is to initialize it before loop, but it'll -+ // affect performance on in-order systems with 2 or more ld/st pipelines -+ srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] -+ } -+ if (!isLL) { // UU/UL case -+ slli(ch2, nlen_tmp, 1); // offsets in bytes -+ } -+ bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char -+ add(result, haystack, isLL ? nlen_tmp : ch2); -+ ld(ch2, Address(result)); // load 8 bytes from source string -+ mv(ch1, tmp6); -+ if (isLL) { -+ j(BMLOOPSTR1_AFTER_LOAD); -+ } else { -+ sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 -+ j(BMLOOPSTR1_CMP); -+ } ++private: + -+ bind(BMLOOPSTR1); -+ shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); -+ shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); -+ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); -+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ // FP value associated with _last_Java_sp: ++ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to + -+ bind(BMLOOPSTR1_AFTER_LOAD); -+ sub(nlen_tmp, nlen_tmp, 1); -+ bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); ++public: ++ // Each arch must define reset, save, restore ++ // These are used by objects that only care about: ++ // 1 - initializing a new state (thread creation, javaCalls) ++ // 2 - saving a current state (javaCalls) ++ // 3 - restoring an old state (javaCalls) + -+ bind(BMLOOPSTR1_CMP); -+ beq(ch1, ch2, BMLOOPSTR1); ++ void clear(void) { ++ // clearing _last_Java_sp must be first ++ _last_Java_sp = NULL; ++ OrderAccess::release(); ++ _last_Java_fp = NULL; ++ _last_Java_pc = NULL; ++ } + -+ bind(BMSKIP); -+ if (!isLL) { -+ // if we've met UTF symbol while searching Latin1 pattern, then we can -+ // skip needle_len symbols -+ if (needle_isL != haystack_isL) { -+ mv(result_tmp, needle_len); -+ } else { -+ mv(result_tmp, 1); ++ void copy(JavaFrameAnchor* src) { ++ // In order to make sure the transition state is valid for "this" ++ // We must clear _last_Java_sp before copying the rest of the new data ++ // ++ // Hack Alert: Temporary bugfix for 4717480/4721647 ++ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp ++ // unless the value is changing ++ // ++ assert(src != NULL, "Src should not be NULL."); ++ if (_last_Java_sp != src->_last_Java_sp) { ++ _last_Java_sp = NULL; ++ OrderAccess::release(); + } -+ mv(t0, ASIZE); -+ bgeu(skipch, t0, BMADV); ++ _last_Java_fp = src->_last_Java_fp; ++ _last_Java_pc = src->_last_Java_pc; ++ // Must be last so profiler will always see valid frame if has_last_frame() is true ++ _last_Java_sp = src->_last_Java_sp; + } -+ add(result_tmp, sp, skipch); -+ lbu(result_tmp, Address(result_tmp)); // load skip offset + -+ bind(BMADV); -+ sub(nlen_tmp, needle_len, 1); -+ // move haystack after bad char skip offset -+ shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); -+ ble(haystack, haystack_end, BMLOOPSTR2); -+ add(sp, sp, ASIZE); -+ j(NOMATCH); ++ bool walkable(void) { return _last_Java_sp != NULL && _last_Java_pc != NULL; } ++ void make_walkable(JavaThread* thread); ++ void capture_last_Java_pc(void); + -+ bind(BMLOOPSTR1_LASTCMP); -+ bne(ch1, ch2, BMSKIP); ++ intptr_t* last_Java_sp(void) const { return _last_Java_sp; } + -+ bind(BMMATCH); -+ sub(result, haystack, orig_haystack); -+ if (!haystack_isL) { -+ srli(result, result, 1); -+ } -+ add(sp, sp, ASIZE); -+ j(DONE); ++ const address last_Java_pc(void) { return _last_Java_pc; } + -+ bind(LINEARSTUB); -+ sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm -+ bltz(t0, LINEARSEARCH); -+ mv(result, zr); -+ RuntimeAddress stub = NULL; -+ if (isLL) { -+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); -+ assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); -+ } else if (needle_isL) { -+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); -+ assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); -+ } else { -+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); -+ assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); -+ } -+ trampoline_call(stub); -+ j(DONE); -+ -+ bind(NOMATCH); -+ mv(result, -1); -+ j(DONE); -+ -+ bind(LINEARSEARCH); -+ string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); -+ -+ bind(DONE); -+ BLOCK_COMMENT("} string_indexof"); -+} -+ -+// string_indexof -+// result: x10 -+// src: x11 -+// src_count: x12 -+// pattern: x13 -+// pattern_count: x14 or 1/2/3/4 -+void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, -+ Register haystack_len, Register needle_len, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ int needle_con_cnt, Register result, int ae) -+{ -+ // Note: -+ // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant -+ // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 -+ assert(needle_con_cnt <= 4, "Invalid needle constant count"); -+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); -+ -+ Register ch1 = t0; -+ Register ch2 = t1; -+ Register hlen_neg = haystack_len, nlen_neg = needle_len; -+ Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; -+ -+ bool isLL = ae == StrIntrinsicNode::LL; ++private: + -+ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; -+ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; -+ int needle_chr_shift = needle_isL ? 0 : 1; -+ int haystack_chr_shift = haystack_isL ? 0 : 1; -+ int needle_chr_size = needle_isL ? 1 : 2; -+ int haystack_chr_size = haystack_isL ? 1 : 2; ++ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } + -+ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; -+ load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; ++public: + -+ Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; ++ void set_last_Java_sp(intptr_t* java_sp) { _last_Java_sp = java_sp; OrderAccess::release(); } + -+ Register first = tmp3; ++ intptr_t* last_Java_fp(void) { return _last_Java_fp; } + -+ if (needle_con_cnt == -1) { -+ Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; ++#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp +new file mode 100644 +index 00000000000..814ed23e471 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp +@@ -0,0 +1,214 @@ ++/* ++ * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); -+ bltz(t0, DOSHORT); ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "runtime/safepoint.hpp" + -+ (this->*needle_load_1chr)(first, Address(needle), noreg); -+ slli(t0, needle_len, needle_chr_shift); -+ add(needle, needle, t0); -+ neg(nlen_neg, t0); -+ slli(t0, result_tmp, haystack_chr_shift); -+ add(haystack, haystack, t0); -+ neg(hlen_neg, t0); ++#define __ masm-> + -+ bind(FIRST_LOOP); -+ add(t0, haystack, hlen_neg); -+ (this->*haystack_load_1chr)(ch2, Address(t0), noreg); -+ beq(first, ch2, STR1_LOOP); ++#define BUFFER_SIZE 30*wordSize + -+ bind(STR2_NEXT); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, FIRST_LOOP); -+ j(NOMATCH); ++// Instead of issuing a LoadLoad barrier we create an address ++// dependency between loads; this might be more efficient. + -+ bind(STR1_LOOP); -+ add(nlen_tmp, nlen_neg, needle_chr_size); -+ add(hlen_tmp, hlen_neg, haystack_chr_size); -+ bgez(nlen_tmp, MATCH); ++// Common register usage: ++// x10/f10: result ++// c_rarg0: jni env ++// c_rarg1: obj ++// c_rarg2: jfield id + -+ bind(STR1_NEXT); -+ add(ch1, needle, nlen_tmp); -+ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); -+ add(ch2, haystack, hlen_tmp); -+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); -+ bne(ch1, ch2, STR2_NEXT); -+ add(nlen_tmp, nlen_tmp, needle_chr_size); -+ add(hlen_tmp, hlen_tmp, haystack_chr_size); -+ bltz(nlen_tmp, STR1_NEXT); -+ j(MATCH); ++static const Register robj = x13; ++static const Register rcounter = x14; ++static const Register roffset = x15; ++static const Register rcounter_addr = x16; ++static const Register result = x17; + -+ bind(DOSHORT); -+ if (needle_isL == haystack_isL) { -+ sub(t0, needle_len, 2); -+ bltz(t0, DO1); -+ bgtz(t0, DO3); -+ } ++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { ++ const char *name; ++ switch (type) { ++ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; ++ case T_BYTE: name = "jni_fast_GetByteField"; break; ++ case T_CHAR: name = "jni_fast_GetCharField"; break; ++ case T_SHORT: name = "jni_fast_GetShortField"; break; ++ case T_INT: name = "jni_fast_GetIntField"; break; ++ case T_LONG: name = "jni_fast_GetLongField"; break; ++ case T_FLOAT: name = "jni_fast_GetFloatField"; break; ++ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; ++ default: ShouldNotReachHere(); ++ name = NULL; // unreachable + } ++ ResourceMark rm; ++ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); ++ CodeBuffer cbuf(blob); ++ MacroAssembler* masm = new MacroAssembler(&cbuf); ++ address fast_entry = __ pc(); + -+ if (needle_con_cnt == 4) { -+ Label CH1_LOOP; -+ (this->*load_4chr)(ch1, Address(needle), noreg); -+ sub(result_tmp, haystack_len, 4); -+ slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp -+ add(haystack, haystack, tmp3); -+ neg(hlen_neg, tmp3); ++ Label slow; ++ int32_t offset = 0; ++ __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset); ++ __ addi(rcounter_addr, rcounter_addr, offset); + -+ bind(CH1_LOOP); -+ add(ch2, haystack, hlen_neg); -+ (this->*load_4chr)(ch2, Address(ch2), noreg); -+ beq(ch1, ch2, MATCH); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, CH1_LOOP); -+ j(NOMATCH); -+ } ++ Address safepoint_counter_addr(rcounter_addr, 0); ++ __ lwu(rcounter, safepoint_counter_addr); ++ // An even value means there are no ongoing safepoint operations ++ __ andi(t0, rcounter, 1); ++ __ bnez(t0, slow); + -+ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { -+ Label CH1_LOOP; -+ BLOCK_COMMENT("string_indexof DO2 {"); -+ bind(DO2); -+ (this->*load_2chr)(ch1, Address(needle), noreg); -+ if (needle_con_cnt == 2) { -+ sub(result_tmp, haystack_len, 2); -+ } -+ slli(tmp3, result_tmp, haystack_chr_shift); -+ add(haystack, haystack, tmp3); -+ neg(hlen_neg, tmp3); ++ if (JvmtiExport::can_post_field_access()) { ++ // Using barrier to order wrt. JVMTI check and load of result. ++ __ membar(MacroAssembler::LoadLoad); + -+ bind(CH1_LOOP); -+ add(tmp3, haystack, hlen_neg); -+ (this->*load_2chr)(ch2, Address(tmp3), noreg); -+ beq(ch1, ch2, MATCH); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, CH1_LOOP); -+ j(NOMATCH); -+ BLOCK_COMMENT("} string_indexof DO2"); ++ // Check to see if a field access watch has been set before we ++ // take the fast path. ++ int32_t offset2; ++ __ la_patchable(result, ++ ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), ++ offset2); ++ __ lwu(result, Address(result, offset2)); ++ __ bnez(result, slow); ++ ++ __ mv(robj, c_rarg1); ++ } else { ++ // Using address dependency to order wrt. load of result. ++ __ xorr(robj, c_rarg1, rcounter); ++ __ xorr(robj, robj, rcounter); // obj, since ++ // robj ^ rcounter ^ rcounter == robj ++ // robj is address dependent on rcounter. + } + -+ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { -+ Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; -+ BLOCK_COMMENT("string_indexof DO3 {"); -+ -+ bind(DO3); -+ (this->*load_2chr)(first, Address(needle), noreg); -+ (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); -+ if (needle_con_cnt == 3) { -+ sub(result_tmp, haystack_len, 3); -+ } -+ slli(hlen_tmp, result_tmp, haystack_chr_shift); -+ add(haystack, haystack, hlen_tmp); -+ neg(hlen_neg, hlen_tmp); -+ -+ bind(FIRST_LOOP); -+ add(ch2, haystack, hlen_neg); -+ (this->*load_2chr)(ch2, Address(ch2), noreg); -+ beq(first, ch2, STR1_LOOP); -+ -+ bind(STR2_NEXT); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, FIRST_LOOP); -+ j(NOMATCH); ++ // Both robj and t0 are clobbered by try_resolve_jobject_in_native. ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ assert_cond(bs != NULL); ++ bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow); + -+ bind(STR1_LOOP); -+ add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); -+ add(ch2, haystack, hlen_tmp); -+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); -+ bne(ch1, ch2, STR2_NEXT); -+ j(MATCH); -+ BLOCK_COMMENT("} string_indexof DO3"); -+ } ++ __ srli(roffset, c_rarg2, 2); // offset + -+ if (needle_con_cnt == -1 || needle_con_cnt == 1) { -+ Label DO1_LOOP; ++ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); ++ speculative_load_pclist[count] = __ pc(); // Used by the segfault handler ++ __ add(roffset, robj, roffset); + -+ BLOCK_COMMENT("string_indexof DO1 {"); -+ bind(DO1); -+ (this->*needle_load_1chr)(ch1, Address(needle), noreg); -+ sub(result_tmp, haystack_len, 1); -+ mv(tmp3, result_tmp); -+ if (haystack_chr_shift) { -+ slli(tmp3, result_tmp, haystack_chr_shift); ++ switch (type) { ++ case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break; ++ case T_BYTE: __ lb(result, Address(roffset, 0)); break; ++ case T_CHAR: __ lhu(result, Address(roffset, 0)); break; ++ case T_SHORT: __ lh(result, Address(roffset, 0)); break; ++ case T_INT: __ lw(result, Address(roffset, 0)); break; ++ case T_LONG: __ ld(result, Address(roffset, 0)); break; ++ case T_FLOAT: { ++ __ flw(f28, Address(roffset, 0)); // f28 as temporaries ++ __ fmv_x_w(result, f28); // f{31--0}-->x ++ break; + } -+ add(haystack, haystack, tmp3); -+ neg(hlen_neg, tmp3); -+ -+ bind(DO1_LOOP); -+ add(tmp3, haystack, hlen_neg); -+ (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); -+ beq(ch1, ch2, MATCH); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, DO1_LOOP); -+ BLOCK_COMMENT("} string_indexof DO1"); ++ case T_DOUBLE: { ++ __ fld(f28, Address(roffset, 0)); // f28 as temporaries ++ __ fmv_x_d(result, f28); // d{63--0}-->x ++ break; ++ } ++ default: ShouldNotReachHere(); + } + -+ bind(NOMATCH); -+ mv(result, -1); -+ j(DONE); ++ // Using acquire: Order JVMTI check and load of result wrt. succeeding check ++ // (LoadStore for volatile field). ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + -+ bind(MATCH); -+ srai(t0, hlen_neg, haystack_chr_shift); -+ add(result, result_tmp, t0); ++ __ lw(t0, safepoint_counter_addr); ++ __ bne(rcounter, t0, slow); + -+ bind(DONE); -+} ++ switch (type) { ++ case T_FLOAT: __ fmv_w_x(f10, result); break; ++ case T_DOUBLE: __ fmv_d_x(f10, result); break; ++ default: __ mv(x10, result); break; ++ } ++ __ ret(); + -+void MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, -+ VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) { -+ Label loop; -+ Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16; ++ slowcase_entry_pclist[count++] = __ pc(); ++ __ bind(slow); ++ address slow_case_addr; ++ switch (type) { ++ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; ++ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; ++ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; ++ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; ++ case T_INT: slow_case_addr = jni_GetIntField_addr(); break; ++ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; ++ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; ++ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; ++ default: ShouldNotReachHere(); ++ slow_case_addr = NULL; // unreachable ++ } + -+ bind(loop); -+ vsetvli(tmp1, cnt, sew, Assembler::m2); -+ vlex_v(vr1, a1, sew); -+ vlex_v(vr2, a2, sew); -+ vmsne_vv(vrs, vr1, vr2); -+ vfirst_m(tmp2, vrs); -+ bgez(tmp2, DONE); -+ sub(cnt, cnt, tmp1); -+ if (!islatin) { -+ slli(tmp1, tmp1, 1); // get byte counts ++ { ++ __ enter(); ++ int32_t tmp_offset = 0; ++ __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset); ++ __ jalr(x1, t0, tmp_offset); ++ __ leave(); ++ __ ret(); + } -+ add(a1, a1, tmp1); -+ add(a2, a2, tmp1); -+ bnez(cnt, loop); ++ __ flush(); + -+ mv(result, true); ++ return fast_entry; +} + -+void MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) { -+ Label DONE; -+ Register tmp1 = t0; -+ Register tmp2 = t1; -+ -+ BLOCK_COMMENT("string_equals_v {"); + -+ mv(result, false); ++address JNI_FastGetField::generate_fast_get_boolean_field() { ++ return generate_fast_get_int_field0(T_BOOLEAN); ++} + -+ if (elem_size == 2) { -+ srli(cnt, cnt, 1); -+ } ++address JNI_FastGetField::generate_fast_get_byte_field() { ++ return generate_fast_get_int_field0(T_BYTE); ++} + -+ element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); ++address JNI_FastGetField::generate_fast_get_char_field() { ++ return generate_fast_get_int_field0(T_CHAR); ++} + -+ bind(DONE); -+ BLOCK_COMMENT("} string_equals_v"); ++address JNI_FastGetField::generate_fast_get_short_field() { ++ return generate_fast_get_int_field0(T_SHORT); +} + -+// used by C2 ClearArray patterns. -+// base: Address of a buffer to be zeroed -+// cnt: Count in HeapWords -+// -+// base, cnt, v0, v1 and t0 are clobbered. -+void MacroAssembler::clear_array_v(Register base, Register cnt) { -+ Label loop; ++address JNI_FastGetField::generate_fast_get_int_field() { ++ return generate_fast_get_int_field0(T_INT); ++} + -+ // making zero words -+ vsetvli(t0, cnt, Assembler::e64, Assembler::m4); -+ vxor_vv(v0, v0, v0); ++address JNI_FastGetField::generate_fast_get_long_field() { ++ return generate_fast_get_int_field0(T_LONG); ++} + -+ bind(loop); -+ vsetvli(t0, cnt, Assembler::e64, Assembler::m4); -+ vse64_v(v0, base); -+ sub(cnt, cnt, t0); -+ shadd(base, t0, base, t0, 3); -+ bnez(cnt, loop); ++address JNI_FastGetField::generate_fast_get_float_field() { ++ return generate_fast_get_int_field0(T_FLOAT); +} + -+void MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result, -+ Register cnt1, int elem_size) { -+ Label DONE; -+ Register tmp1 = t0; -+ Register tmp2 = t1; -+ Register cnt2 = tmp2; -+ int length_offset = arrayOopDesc::length_offset_in_bytes(); -+ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); ++address JNI_FastGetField::generate_fast_get_double_field() { ++ return generate_fast_get_int_field0(T_DOUBLE); ++} +diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp +new file mode 100644 +index 00000000000..83ffcc55d83 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp +@@ -0,0 +1,106 @@ ++/* ++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ BLOCK_COMMENT("arrays_equals_v {"); ++#ifndef CPU_RISCV_JNITYPES_RISCV_HPP ++#define CPU_RISCV_JNITYPES_RISCV_HPP + -+ // if (a1 == a2), return true -+ mv(result, true); -+ oop_equal(a1, a2, DONE); ++#include "jni.h" ++#include "memory/allStatic.hpp" ++#include "oops/oop.hpp" + -+ mv(result, false); -+ // if a1 == null or a2 == null, return false -+ beqz(a1, DONE); -+ beqz(a2, DONE); -+ // if (a1.length != a2.length), return false -+ lwu(cnt1, Address(a1, length_offset)); -+ lwu(cnt2, Address(a2, length_offset)); -+ bne(cnt1, cnt2, DONE); ++// This file holds platform-dependent routines used to write primitive jni ++// types to the array of arguments passed into JavaCalls::call + -+ la(a1, Address(a1, base_offset)); -+ la(a2, Address(a2, base_offset)); ++class JNITypes : private AllStatic { ++ // These functions write a java primitive type (in native format) ++ // to a java stack slot array to be passed as an argument to JavaCalls:calls. ++ // I.e., they are functionally 'push' operations if they have a 'pos' ++ // formal parameter. Note that jlong's and jdouble's are written ++ // _in reverse_ of the order in which they appear in the interpreter ++ // stack. This is because call stubs (see stubGenerator_sparc.cpp) ++ // reverse the argument list constructed by JavaCallArguments (see ++ // javaCalls.hpp). + -+ element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); ++public: ++ // Ints are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; } ++ static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; } ++ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; } + -+ bind(DONE); ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to+1). ++ static inline void put_long(jlong from, intptr_t *to) { ++ *(jlong*) (to + 1) = from; ++ } + -+ BLOCK_COMMENT("} arrays_equals_v"); -+} ++ static inline void put_long(jlong from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = from; ++ pos += 2; ++ } + -+void MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, -+ Register result, Register tmp1, Register tmp2, int encForm) { -+ Label DIFFERENCE, DONE, L, loop; -+ bool encLL = encForm == StrIntrinsicNode::LL; -+ bool encLU = encForm == StrIntrinsicNode::LU; -+ bool encUL = encForm == StrIntrinsicNode::UL; ++ static inline void put_long(jlong *from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = *from; ++ pos += 2; ++ } + -+ bool str1_isL = encLL || encLU; -+ bool str2_isL = encLL || encUL; ++ // Oops are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); } ++ static inline void put_obj(jobject from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; } + -+ int minCharsInWord = encLL ? wordSize : wordSize / 2; ++ // Floats are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } ++ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } ++ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } + -+ BLOCK_COMMENT("string_compare {"); ++#undef _JNI_SLOT_OFFSET ++#define _JNI_SLOT_OFFSET 1 ++ // Doubles are stored in native word format in one JavaCallArgument ++ // slot at *(to+1). ++ static inline void put_double(jdouble from, intptr_t *to) { ++ *(jdouble*) (to + 1) = from; ++ } + -+ // for Lating strings, 1 byte for 1 character -+ // for UTF16 strings, 2 bytes for 1 character -+ if (!str1_isL) -+ sraiw(cnt1, cnt1, 1); -+ if (!str2_isL) -+ sraiw(cnt2, cnt2, 1); ++ static inline void put_double(jdouble from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = from; ++ pos += 2; ++ } + -+ // if str1 == str2, return the difference -+ // save the minimum of the string lengths in cnt2. -+ sub(result, cnt1, cnt2); -+ bgt(cnt1, cnt2, L); -+ mv(cnt2, cnt1); -+ bind(L); ++ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = *from; ++ pos += 2; ++ } + -+ if (str1_isL == str2_isL) { // LL or UU -+ element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE); -+ j(DONE); -+ } else { // LU or UL -+ Register strL = encLU ? str1 : str2; -+ Register strU = encLU ? str2 : str1; -+ VectorRegister vstr1 = encLU ? v4 : v0; -+ VectorRegister vstr2 = encLU ? v0 : v4; ++ // The get_xxx routines, on the other hand, actually _do_ fetch ++ // java primitive types from the interpreter stack. ++ // No need to worry about alignment on Intel. ++ static inline jint get_int (intptr_t *from) { return *(jint *) from; } ++ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } ++ static inline oop get_obj (intptr_t *from) { return *(oop *) from; } ++ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } ++ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } ++#undef _JNI_SLOT_OFFSET ++}; + -+ bind(loop); -+ vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2); -+ vle8_v(vstr1, strL); -+ vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4); -+ vzext_vf2(vstr2, vstr1); -+ vle16_v(vstr1, strU); -+ vmsne_vv(v0, vstr2, vstr1); -+ vfirst_m(tmp2, v0); -+ bgez(tmp2, DIFFERENCE); -+ sub(cnt2, cnt2, tmp1); -+ add(strL, strL, tmp1); -+ shadd(strU, tmp1, strU, tmp1, 1); -+ bnez(cnt2, loop); -+ j(DONE); -+ } -+ bind(DIFFERENCE); -+ slli(tmp1, tmp2, 1); -+ add(str1, str1, str1_isL ? tmp2 : tmp1); -+ add(str2, str2, str2_isL ? tmp2 : tmp1); -+ str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0)); -+ str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0)); -+ sub(result, tmp1, tmp2); ++#endif // CPU_RISCV_JNITYPES_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +new file mode 100644 +index 00000000000..86710295444 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -0,0 +1,4016 @@ ++/* ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ bind(DONE); -+} ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "compiler/disassembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "memory/universe.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/accessDecorators.hpp" ++#include "oops/compressedOops.inline.hpp" ++#include "oops/klass.inline.hpp" ++#include "oops/oop.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/jniHandles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.hpp" ++#include "utilities/powerOfTwo.hpp" ++#ifdef COMPILER2 ++#include "opto/compile.hpp" ++#include "opto/node.hpp" ++#include "opto/output.hpp" ++#endif + -+address MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) { -+ Label loop; -+ assert_different_registers(src, dst, len, tmp, t0); ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) block_comment(str) ++#endif ++#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") + -+ BLOCK_COMMENT("byte_array_inflate_v {"); -+ bind(loop); -+ vsetvli(tmp, len, Assembler::e8, Assembler::m2); -+ vle8_v(v2, src); -+ vsetvli(t0, len, Assembler::e16, Assembler::m4); -+ vzext_vf2(v0, v2); -+ vse16_v(v0, dst); -+ sub(len, len, tmp); -+ add(src, src, tmp); -+ shadd(dst, tmp, dst, tmp, 1); -+ bnez(len, loop); -+ BLOCK_COMMENT("} byte_array_inflate_v"); -+ postcond(pc() != badAddress); -+ return pc(); ++static void pass_arg0(MacroAssembler* masm, Register arg) { ++ if (c_rarg0 != arg) { ++ assert_cond(masm != NULL); ++ masm->mv(c_rarg0, arg); ++ } +} + -+// Compress char[] array to byte[]. -+// result: the array length if every element in array can be encoded; 0, otherwise. -+void MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) { -+ Label done; -+ encode_iso_array_v(src, dst, len, result, tmp); -+ beqz(len, done); -+ mv(result, zr); -+ bind(done); ++static void pass_arg1(MacroAssembler* masm, Register arg) { ++ if (c_rarg1 != arg) { ++ assert_cond(masm != NULL); ++ masm->mv(c_rarg1, arg); ++ } +} + -+// result: the number of elements had been encoded. -+void MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) { -+ Label loop, DIFFERENCE, DONE; ++static void pass_arg2(MacroAssembler* masm, Register arg) { ++ if (c_rarg2 != arg) { ++ assert_cond(masm != NULL); ++ masm->mv(c_rarg2, arg); ++ } ++} + -+ BLOCK_COMMENT("encode_iso_array_v {"); -+ mv(result, 0); ++static void pass_arg3(MacroAssembler* masm, Register arg) { ++ if (c_rarg3 != arg) { ++ assert_cond(masm != NULL); ++ masm->mv(c_rarg3, arg); ++ } ++} + -+ bind(loop); -+ mv(tmp, 0xff); -+ vsetvli(t0, len, Assembler::e16, Assembler::m2); -+ vle16_v(v2, src); -+ // if element > 0xff, stop -+ vmsgtu_vx(v1, v2, tmp); -+ vfirst_m(tmp, v1); -+ vmsbf_m(v0, v1); -+ // compress char to byte -+ vsetvli(t0, len, Assembler::e8); -+ vncvt_x_x_w(v1, v2, Assembler::v0_t); -+ vse8_v(v1, dst, Assembler::v0_t); ++void MacroAssembler::align(int modulus, int extra_offset) { ++ CompressibleRegion cr(this); ++ while ((offset() + extra_offset) % modulus != 0) { nop(); } ++} + -+ bgez(tmp, DIFFERENCE); -+ add(result, result, t0); -+ add(dst, dst, t0); -+ sub(len, len, t0); -+ shadd(src, t0, src, t0, 1); -+ bnez(len, loop); -+ j(DONE); ++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { ++ call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); ++} + -+ bind(DIFFERENCE); -+ add(result, result, tmp); ++// Implementation of call_VM versions + -+ bind(DONE); -+ BLOCK_COMMENT("} encode_iso_array_v"); ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions) { ++ call_VM_helper(oop_result, entry_point, 0, check_exceptions); +} + -+address MacroAssembler::has_negatives_v(Register ary, Register len, Register result, Register tmp) { -+ Label loop, DONE; ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ pass_arg1(this, arg_1); ++ call_VM_helper(oop_result, entry_point, 1, check_exceptions); ++} + -+ mv(result, true); ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ pass_arg1(this, arg_1); ++ call_VM_helper(oop_result, entry_point, 2, check_exceptions); ++} + -+ bind(loop); -+ vsetvli(t0, len, Assembler::e8, Assembler::m4); -+ vle8_v(v0, ary); -+ // if element highest bit is set, return true -+ vmslt_vx(v0, v0, zr); -+ vfirst_m(tmp, v0); -+ bgez(tmp, DONE); ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ assert(arg_1 != c_rarg3, "smashed arg"); ++ assert(arg_2 != c_rarg3, "smashed arg"); ++ pass_arg3(this, arg_3); + -+ sub(len, len, t0); -+ add(ary, ary, t0); -+ bnez(len, loop); -+ mv(result, false); ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); + -+ bind(DONE); -+ postcond(pc() != badAddress); -+ return pc(); ++ pass_arg1(this, arg_1); ++ call_VM_helper(oop_result, entry_point, 3, check_exceptions); +} + -+// string indexof -+// compute index by trailing zeros -+void MacroAssembler::compute_index(Register haystack, Register trailing_zero, -+ Register match_mask, Register result, -+ Register ch2, Register tmp, -+ bool haystack_isL) -+{ -+ int haystack_chr_shift = haystack_isL ? 0 : 1; -+ srl(match_mask, match_mask, trailing_zero); -+ srli(match_mask, match_mask, 1); -+ srli(tmp, trailing_zero, LogBitsPerByte); -+ if (!haystack_isL) andi(tmp, tmp, 0xE); -+ add(haystack, haystack, tmp); -+ ld(ch2, Address(haystack)); -+ if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); -+ add(result, result, tmp); ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); +} + -+// string indexof -+// Find pattern element in src, compute match mask, -+// only the first occurrence of 0x80/0x8000 at low bits is the valid match index -+// match mask patterns would be like: -+// - 0x8080808080808080 (Latin1) -+// - 0x8000800080008000 (UTF16) -+void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, -+ Register mask1, Register mask2) -+{ -+ xorr(src, pattern, src); -+ sub(match_mask, src, mask1); -+ orr(src, src, mask2); -+ notr(src, src); -+ andr(match_mask, match_mask, src); ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ pass_arg1(this, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); +} + -+// add two unsigned input and output carry -+void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) -+{ -+ assert_different_registers(dst, carry); -+ assert_different_registers(dst, src2); -+ add(dst, src1, src2); -+ sltu(carry, dst, src2); -+} ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { + -+// add two input with carry -+void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) -+{ -+ assert_different_registers(dst, carry); -+ add(dst, src1, src2); -+ add(dst, dst, carry); ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ pass_arg1(this, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); +} + -+// add two unsigned input with carry and output carry -+void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) -+{ -+ assert_different_registers(dst, src2); -+ adc(dst, src1, src2, carry); -+ sltu(carry, dst, src2); ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ assert(arg_1 != c_rarg3, "smashed arg"); ++ assert(arg_2 != c_rarg3, "smashed arg"); ++ pass_arg3(this, arg_3); ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ pass_arg1(this, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); +} + -+void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, -+ Register src1, Register src2, Register carry) -+{ -+ cad(dest_lo, dest_lo, src1, carry); -+ add(dest_hi, dest_hi, carry); -+ cad(dest_lo, dest_lo, src2, carry); -+ add(final_dest_hi, dest_hi, carry); -+} ++// these are no-ops overridden by InterpreterMacroAssembler ++void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} ++void MacroAssembler::check_and_handle_popframe(Register java_thread) {} + -+// Code for BigInteger::mulAdd instrinsic -+// out = x10 -+// in = x11 -+// offset = x12 (already out.length-offset) -+// len = x13 -+// k = x14 -+void MacroAssembler::mul_add(Register out, Register in, Register offset, -+ Register len, Register k, Register tmp1, Register tmp2) { -+ Label L_loop_1, L_loop_2, L_end, L_not_zero; -+ bnez(len, L_not_zero); -+ mv(out, zr); -+ j(L_end); -+ bind(L_not_zero); -+ zero_extend(k, k, 32); -+ shadd(offset, offset, out, t0, LogBytesPerInt); -+ shadd(in, len, in, t0, LogBytesPerInt); -+ mv(out, zr); ++// Calls to C land ++// ++// When entering C land, the fp, & esp of the last Java frame have to be recorded ++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp ++// has to be reset to 0. This is required to allow proper stack traversal. ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Register last_java_pc, ++ Register tmp) { + -+ if (AvoidUnalignedAccesses) { -+ // if in and offset are both 8 bytes aligned. -+ orr(t0, in, offset); -+ andi(t0, t0, 0x7); -+ beqz(t0, L_loop_2); -+ } else { -+ j(L_loop_2); ++ if (last_java_pc->is_valid()) { ++ sd(last_java_pc, Address(xthread, ++ JavaThread::frame_anchor_offset() + ++ JavaFrameAnchor::last_Java_pc_offset())); + } + -+ bind(L_loop_1); -+ sub(in, in, 4); -+ lwu(t0, Address(in, 0)); -+ mul(t1, t0, k); -+ add(t0, t1, out); -+ sub(offset, offset, 4); -+ lwu(t1, Address(offset, 0)); -+ add(t0, t0, t1); -+ sw(t0, Address(offset)); -+ srli(out, t0, 32); -+ sub(len, len, 1); -+ beqz(len, L_end); -+ j(L_loop_1); -+ -+ -+ bind(L_loop_2); -+ Label L_one; -+ sub(len, len, 1); -+ bltz(len, L_end); -+ sub(len, len, 1); -+ bltz(len, L_one); -+ -+ sub(in, in, 8); -+ ld(tmp1, Address(in, 0)); -+ ror_imm(tmp1, tmp1, 32); // convert to little-endian -+ -+ const Register carry = out; -+ const Register src1_hi = t0; -+ const Register src1_lo = tmp2; -+ const Register src2 = t1; -+ -+ mulhu(src1_hi, k, tmp1); -+ mul(src1_lo, k, tmp1); -+ sub(offset, offset, 8); -+ ld(src2, Address(offset, 0)); -+ ror_imm(src2, src2, 32, tmp1); -+ add2_with_carry(carry, src1_hi, src1_lo, carry, src2, tmp1); -+ ror_imm(src1_lo, src1_lo, 32, tmp1); // back to big-endian -+ sd(src1_lo, Address(offset, 0)); -+ j(L_loop_2); -+ -+ bind(L_one); -+ sub(in, in, 4); -+ lwu(t0, Address(in, 0)); -+ mul(t1, t0, k); -+ add(t0, t1, out); -+ sub(offset, offset, 4); -+ lwu(t1, Address(offset, 0)); -+ add(t0, t0, t1); -+ sw(t0, Address(offset)); -+ srli(out, t0, 32); -+ -+ bind(L_end); -+} -+ -+/** -+ * Multiply 32 bit by 32 bit first loop. -+ */ -+void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, -+ Register y, Register y_idx, Register z, -+ Register carry, Register product, -+ Register idx, Register kdx) -+{ -+ // long carry = 0; -+ // for (int j=ystart, k=ystart+1+xstart; j >= 0; j--, k--) { -+ // long product = (y[j] & LONG_MASK) * -+ // (x[xstart] & LONG_MASK) + carry; -+ // z[k] = (int)product; -+ // carry = product >>> 32; -+ // } -+ // z[xstart] = (int)carry; ++ // determine last_java_sp register ++ if (last_java_sp == sp) { ++ mv(tmp, sp); ++ last_java_sp = tmp; ++ } else if (!last_java_sp->is_valid()) { ++ last_java_sp = esp; ++ } + -+ Label L_first_loop, L_first_loop_exit; ++ sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); + -+ shadd(t0, xstart, x, t0, LogBytesPerInt); -+ lwu(x_xstart, Address(t0, 0)); ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); ++ } ++} + -+ bind(L_first_loop); -+ sub(idx, idx, 1); -+ bltz(idx, L_first_loop_exit); ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc, ++ Register tmp) { ++ assert(last_java_pc != NULL, "must provide a valid PC"); + -+ shadd(t0, idx, y, t0, LogBytesPerInt); -+ lwu(y_idx, Address(t0, 0)); -+ mul(product, x_xstart, y_idx); -+ add(product, product, carry); -+ srli(carry, product, 32); -+ sub(kdx, kdx, 1); -+ shadd(t0, kdx, z, t0, LogBytesPerInt); -+ sw(product, Address(t0, 0)); -+ j(L_first_loop); ++ la(tmp, last_java_pc); ++ sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + -+ bind(L_first_loop_exit); ++ set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp); +} + -+/** -+ * Multiply 64 bit by 64 bit first loop. -+ */ -+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, -+ Register y, Register y_idx, Register z, -+ Register carry, Register product, -+ Register idx, Register kdx) -+{ -+ // -+ // jlong carry, x[], y[], z[]; -+ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { -+ // huge_128 product = y[idx] * x[xstart] + carry; -+ // z[kdx] = (jlong)product; -+ // carry = (jlong)(product >>> 64); -+ // } -+ // z[xstart] = carry; -+ // ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Label &L, ++ Register tmp) { ++ if (L.is_bound()) { ++ set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); ++ } else { ++ InstructionMark im(this); ++ L.add_patch_at(code(), locator()); ++ set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); ++ } ++} + -+ Label L_first_loop, L_first_loop_exit; -+ Label L_one_x, L_one_y, L_multiply; ++void MacroAssembler::reset_last_Java_frame(bool clear_fp) { ++ // we must set sp to zero to clear frame ++ sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); + -+ sub(xstart, xstart, 1); -+ bltz(xstart, L_one_x); ++ // must clear fp, so that compiled frames are not confused; it is ++ // possible that we need it only for debugging ++ if (clear_fp) { ++ sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); ++ } + -+ shadd(t0, xstart, x, t0, LogBytesPerInt); -+ ld(x_xstart, Address(t0, 0)); -+ ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian ++ // Always clear the pc because it could have been set by make_walkable() ++ sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); ++} + -+ bind(L_first_loop); -+ sub(idx, idx, 1); -+ bltz(idx, L_first_loop_exit); -+ sub(idx, idx, 1); -+ bltz(idx, L_one_y); ++void MacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++ java_thread = xthread; ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = esp; ++ } + -+ shadd(t0, idx, y, t0, LogBytesPerInt); -+ ld(y_idx, Address(t0, 0)); -+ ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian -+ bind(L_multiply); ++ // debugging support ++ assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); ++ assert(java_thread == xthread, "unexpected register"); + -+ mulhu(t0, x_xstart, y_idx); -+ mul(product, x_xstart, y_idx); -+ cad(product, product, carry, t1); -+ adc(carry, t0, zr, t1); ++ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); ++ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); + -+ sub(kdx, kdx, 2); -+ ror_imm(product, product, 32); // back to big-endian -+ shadd(t0, kdx, z, t0, LogBytesPerInt); -+ sd(product, Address(t0, 0)); ++ // push java thread (becomes first argument of C function) ++ mv(c_rarg0, java_thread); + -+ j(L_first_loop); ++ // set last Java frame before call ++ assert(last_java_sp != fp, "can't use fp"); + -+ bind(L_one_y); -+ lwu(y_idx, Address(y, 0)); -+ j(L_multiply); ++ Label l; ++ set_last_Java_frame(last_java_sp, fp, l, t0); + -+ bind(L_one_x); -+ lwu(x_xstart, Address(x, 0)); -+ j(L_first_loop); ++ // do the call, remove parameters ++ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); + -+ bind(L_first_loop_exit); -+} ++ // reset last Java frame ++ // Only interpreter should have to clear fp ++ reset_last_Java_frame(true); + -+/** -+ * Multiply 128 bit by 128. Unrolled inner loop. -+ * -+ */ -+void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, -+ Register carry, Register carry2, -+ Register idx, Register jdx, -+ Register yz_idx1, Register yz_idx2, -+ Register tmp, Register tmp3, Register tmp4, -+ Register tmp6, Register product_hi) -+{ -+ // jlong carry, x[], y[], z[]; -+ // int kdx = xstart+1; -+ // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop -+ // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; -+ // jlong carry2 = (jlong)(tmp3 >>> 64); -+ // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; -+ // carry = (jlong)(tmp4 >>> 64); -+ // z[kdx+idx+1] = (jlong)tmp3; -+ // z[kdx+idx] = (jlong)tmp4; -+ // } -+ // idx += 2; -+ // if (idx > 0) { -+ // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; -+ // z[kdx+idx] = (jlong)yz_idx1; -+ // carry = (jlong)(yz_idx1 >>> 64); -+ // } -+ // -+ -+ Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; -+ -+ srli(jdx, idx, 2); -+ -+ bind(L_third_loop); ++ // C++ interp handles this in the interpreter ++ check_and_handle_popframe(java_thread); ++ check_and_handle_earlyret(java_thread); + -+ sub(jdx, jdx, 1); -+ bltz(jdx, L_third_loop_exit); -+ sub(idx, idx, 4); ++ if (check_exceptions) { ++ // check for pending exceptions (java_thread is set upon return) ++ ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); ++ Label ok; ++ beqz(t0, ok); ++ int32_t offset = 0; ++ la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset); ++ jalr(x0, t0, offset); ++ bind(ok); ++ } + -+ shadd(t0, idx, y, t0, LogBytesPerInt); -+ ld(yz_idx2, Address(t0, 0)); -+ ld(yz_idx1, Address(t0, wordSize)); ++ // get oop result if there is one and reset the value in the thread ++ if (oop_result->is_valid()) { ++ get_vm_result(oop_result, java_thread); ++ } ++} + -+ shadd(tmp6, idx, z, t0, LogBytesPerInt); ++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { ++ ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); ++ sd(zr, Address(java_thread, JavaThread::vm_result_offset())); ++ verify_oop(oop_result, "broken oop in call_VM_base"); ++} + -+ ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian -+ ror_imm(yz_idx2, yz_idx2, 32); ++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { ++ ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); ++ sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); ++} + -+ ld(t1, Address(tmp6, 0)); -+ ld(t0, Address(tmp6, wordSize)); ++void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { ++ assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); ++ assert_different_registers(klass, xthread, tmp); + -+ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 -+ mulhu(tmp4, product_hi, yz_idx1); ++ Label L_fallthrough, L_tmp; ++ if (L_fast_path == NULL) { ++ L_fast_path = &L_fallthrough; ++ } else if (L_slow_path == NULL) { ++ L_slow_path = &L_fallthrough; ++ } + -+ ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian -+ ror_imm(t1, t1, 32, tmp); ++ // Fast path check: class is fully initialized ++ lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); ++ sub(tmp, tmp, InstanceKlass::fully_initialized); ++ beqz(tmp, *L_fast_path); + -+ mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp -+ mulhu(carry2, product_hi, yz_idx2); ++ // Fast path check: current thread is initializer thread ++ ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); + -+ cad(tmp3, tmp3, carry, carry); -+ adc(tmp4, tmp4, zr, carry); -+ cad(tmp3, tmp3, t0, t0); -+ cadc(tmp4, tmp4, tmp, t0); -+ adc(carry, carry2, zr, t0); -+ cad(tmp4, tmp4, t1, carry2); -+ adc(carry, carry, zr, carry2); ++ if (L_slow_path == &L_fallthrough) { ++ beq(xthread, tmp, *L_fast_path); ++ bind(*L_slow_path); ++ } else if (L_fast_path == &L_fallthrough) { ++ bne(xthread, tmp, *L_slow_path); ++ bind(*L_fast_path); ++ } else { ++ Unimplemented(); ++ } ++} + -+ ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian -+ ror_imm(tmp4, tmp4, 32); -+ sd(tmp4, Address(tmp6, 0)); -+ sd(tmp3, Address(tmp6, wordSize)); ++void MacroAssembler::verify_oop(Register reg, const char* s) { ++ if (!VerifyOops) { return; } + -+ j(L_third_loop); ++ // Pass register number to verify_oop_subroutine ++ const char* b = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("verify_oop: %s: %s", reg->name(), s); ++ b = code_string(ss.as_string()); ++ } ++ BLOCK_COMMENT("verify_oop {"); + -+ bind(L_third_loop_exit); ++ push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + -+ andi(idx, idx, 0x3); -+ beqz(idx, L_post_third_loop_done); ++ mv(c_rarg0, reg); // c_rarg0 : x10 ++ li(t0, (uintptr_t)(address)b); + -+ Label L_check_1; -+ sub(idx, idx, 2); -+ bltz(idx, L_check_1); ++ // call indirectly to solve generation ordering problem ++ int32_t offset = 0; ++ la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); ++ ld(t1, Address(t1, offset)); ++ jalr(t1); + -+ shadd(t0, idx, y, t0, LogBytesPerInt); -+ ld(yz_idx1, Address(t0, 0)); -+ ror_imm(yz_idx1, yz_idx1, 32); ++ pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + -+ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 -+ mulhu(tmp4, product_hi, yz_idx1); ++ BLOCK_COMMENT("} verify_oop"); ++} + -+ shadd(t0, idx, z, t0, LogBytesPerInt); -+ ld(yz_idx2, Address(t0, 0)); -+ ror_imm(yz_idx2, yz_idx2, 32, tmp); ++void MacroAssembler::verify_oop_addr(Address addr, const char* s) { ++ if (!VerifyOops) { ++ return; ++ } + -+ add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); ++ const char* b = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("verify_oop_addr: %s", s); ++ b = code_string(ss.as_string()); ++ } ++ BLOCK_COMMENT("verify_oop_addr {"); + -+ ror_imm(tmp3, tmp3, 32, tmp); -+ sd(tmp3, Address(t0, 0)); ++ push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + -+ bind(L_check_1); ++ if (addr.uses(sp)) { ++ la(x10, addr); ++ ld(x10, Address(x10, 4 * wordSize)); ++ } else { ++ ld(x10, addr); ++ } + -+ andi(idx, idx, 0x1); -+ sub(idx, idx, 1); -+ bltz(idx, L_post_third_loop_done); -+ shadd(t0, idx, y, t0, LogBytesPerInt); -+ lwu(tmp4, Address(t0, 0)); -+ mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 -+ mulhu(carry2, tmp4, product_hi); ++ li(t0, (uintptr_t)(address)b); + -+ shadd(t0, idx, z, t0, LogBytesPerInt); -+ lwu(tmp4, Address(t0, 0)); ++ // call indirectly to solve generation ordering problem ++ int32_t offset = 0; ++ la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); ++ ld(t1, Address(t1, offset)); ++ jalr(t1); + -+ add2_with_carry(carry2, carry2, tmp3, tmp4, carry); ++ pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + -+ shadd(t0, idx, z, t0, LogBytesPerInt); -+ sw(tmp3, Address(t0, 0)); -+ slli(t0, carry2, 32); -+ srli(carry, tmp3, 32); -+ orr(carry, carry, t0); ++ BLOCK_COMMENT("} verify_oop_addr"); ++} + -+ bind(L_post_third_loop_done); ++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, ++ int extra_slot_offset) { ++ // cf. TemplateTable::prepare_invoke(), if (load_receiver). ++ int stackElementSize = Interpreter::stackElementSize; ++ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); ++#ifdef ASSERT ++ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); ++ assert(offset1 - offset == stackElementSize, "correct arithmetic"); ++#endif ++ if (arg_slot.is_constant()) { ++ return Address(esp, arg_slot.as_constant() * stackElementSize + offset); ++ } else { ++ assert_different_registers(t0, arg_slot.as_register()); ++ shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); ++ return Address(t0, offset); ++ } +} + -+/** -+ * Code for BigInteger::multiplyToLen() instrinsic. -+ * -+ * x10: x -+ * x11: xlen -+ * x12: y -+ * x13: ylen -+ * x14: z -+ * x15: zlen -+ * x16: tmp1 -+ * x17: tmp2 -+ * x7: tmp3 -+ * x28: tmp4 -+ * x29: tmp5 -+ * x30: tmp6 -+ * x31: tmp7 -+ */ -+void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, -+ Register z, Register zlen, -+ Register tmp1, Register tmp2, Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, Register product_hi) -+{ -+ assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); ++#ifndef PRODUCT ++extern "C" void findpc(intptr_t x); ++#endif + -+ const Register idx = tmp1; -+ const Register kdx = tmp2; -+ const Register xstart = tmp3; ++void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) ++{ ++ // In order to get locks to work, we need to fake a in_VM state ++ if (ShowMessageBoxOnError) { ++ JavaThread* thread = JavaThread::current(); ++ JavaThreadState saved_state = thread->thread_state(); ++ thread->set_thread_state(_thread_in_vm); ++#ifndef PRODUCT ++ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { ++ ttyLocker ttyl; ++ BytecodeCounter::print(); ++ } ++#endif ++ if (os::message_box(msg, "Execution stopped, print registers?")) { ++ ttyLocker ttyl; ++ tty->print_cr(" pc = 0x%016lx", pc); ++#ifndef PRODUCT ++ tty->cr(); ++ findpc(pc); ++ tty->cr(); ++#endif ++ tty->print_cr(" x0 = 0x%016lx", regs[0]); ++ tty->print_cr(" x1 = 0x%016lx", regs[1]); ++ tty->print_cr(" x2 = 0x%016lx", regs[2]); ++ tty->print_cr(" x3 = 0x%016lx", regs[3]); ++ tty->print_cr(" x4 = 0x%016lx", regs[4]); ++ tty->print_cr(" x5 = 0x%016lx", regs[5]); ++ tty->print_cr(" x6 = 0x%016lx", regs[6]); ++ tty->print_cr(" x7 = 0x%016lx", regs[7]); ++ tty->print_cr(" x8 = 0x%016lx", regs[8]); ++ tty->print_cr(" x9 = 0x%016lx", regs[9]); ++ tty->print_cr("x10 = 0x%016lx", regs[10]); ++ tty->print_cr("x11 = 0x%016lx", regs[11]); ++ tty->print_cr("x12 = 0x%016lx", regs[12]); ++ tty->print_cr("x13 = 0x%016lx", regs[13]); ++ tty->print_cr("x14 = 0x%016lx", regs[14]); ++ tty->print_cr("x15 = 0x%016lx", regs[15]); ++ tty->print_cr("x16 = 0x%016lx", regs[16]); ++ tty->print_cr("x17 = 0x%016lx", regs[17]); ++ tty->print_cr("x18 = 0x%016lx", regs[18]); ++ tty->print_cr("x19 = 0x%016lx", regs[19]); ++ tty->print_cr("x20 = 0x%016lx", regs[20]); ++ tty->print_cr("x21 = 0x%016lx", regs[21]); ++ tty->print_cr("x22 = 0x%016lx", regs[22]); ++ tty->print_cr("x23 = 0x%016lx", regs[23]); ++ tty->print_cr("x24 = 0x%016lx", regs[24]); ++ tty->print_cr("x25 = 0x%016lx", regs[25]); ++ tty->print_cr("x26 = 0x%016lx", regs[26]); ++ tty->print_cr("x27 = 0x%016lx", regs[27]); ++ tty->print_cr("x28 = 0x%016lx", regs[28]); ++ tty->print_cr("x30 = 0x%016lx", regs[30]); ++ tty->print_cr("x31 = 0x%016lx", regs[31]); ++ BREAKPOINT; ++ } ++ } ++ fatal("DEBUG MESSAGE: %s", msg); ++} + -+ const Register y_idx = tmp4; -+ const Register carry = tmp5; -+ const Register product = xlen; -+ const Register x_xstart = zlen; // reuse register ++void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) { ++ Label done, not_weak; ++ beqz(value, done); // Use NULL as-is. + -+ mv(idx, ylen); // idx = ylen; -+ mv(kdx, zlen); // kdx = xlen+ylen; -+ mv(carry, zr); // carry = 0; ++ // Test for jweak tag. ++ andi(t0, value, JNIHandles::weak_tag_mask); ++ beqz(t0, not_weak); + -+ Label L_multiply_64_or_128, L_done; ++ // Resolve jweak. ++ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, ++ Address(value, -JNIHandles::weak_tag_value), tmp, thread); ++ verify_oop(value); ++ j(done); + -+ sub(xstart, xlen, 1); -+ bltz(xstart, L_done); ++ bind(not_weak); ++ // Resolve (untagged) jobject. ++ access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); ++ verify_oop(value); ++ bind(done); ++} + -+ const Register jdx = tmp1; ++void MacroAssembler::stop(const char* msg) { ++ address ip = pc(); ++ pusha(); ++ li(c_rarg0, (uintptr_t)(address)msg); ++ li(c_rarg1, (uintptr_t)(address)ip); ++ mv(c_rarg2, sp); ++ mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); ++ jalr(c_rarg3); ++ ebreak(); ++} + -+ if (AvoidUnalignedAccesses) { -+ // if x and y are both 8 bytes aligend. -+ orr(t0, xlen, ylen); -+ andi(t0, t0, 0x1); -+ beqz(t0, L_multiply_64_or_128); -+ } else { -+ j(L_multiply_64_or_128); ++void MacroAssembler::unimplemented(const char* what) { ++ const char* buf = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("unimplemented: %s", what); ++ buf = code_string(ss.as_string()); + } ++ stop(buf); ++} + -+ multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); -+ shadd(t0, xstart, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); ++void MacroAssembler::emit_static_call_stub() { ++ // CompiledDirectStaticCall::set_to_interpreted knows the ++ // exact layout of this stub. + -+ Label L_second_loop_1; -+ bind(L_second_loop_1); -+ mv(carry, zr); -+ mv(jdx, ylen); -+ sub(xstart, xstart, 1); -+ bltz(xstart, L_done); -+ sub(sp, sp, 2 * wordSize); -+ sd(z, Address(sp, 0)); -+ sd(zr, Address(sp, wordSize)); -+ shadd(t0, xstart, z, t0, LogBytesPerInt); -+ addi(z, t0, 4); -+ shadd(t0, xstart, x, t0, LogBytesPerInt); -+ lwu(product, Address(t0, 0)); -+ Label L_third_loop, L_third_loop_exit; ++ ifence(); ++ mov_metadata(xmethod, (Metadata*)NULL); + -+ bind(L_third_loop); -+ sub(jdx, jdx, 1); -+ bltz(jdx, L_third_loop_exit); ++ // Jump to the entry point of the i2c stub. ++ int32_t offset = 0; ++ movptr_with_offset(t0, 0, offset); ++ jalr(x0, t0, offset); ++} + -+ shadd(t0, jdx, y, t0, LogBytesPerInt); -+ lwu(t0, Address(t0, 0)); -+ mul(t1, t0, product); -+ add(t0, t1, carry); -+ shadd(tmp6, jdx, z, t1, LogBytesPerInt); -+ lwu(t1, Address(tmp6, 0)); -+ add(t0, t0, t1); -+ sw(t0, Address(tmp6, 0)); -+ srli(carry, t0, 32); -+ j(L_third_loop); ++void MacroAssembler::call_VM_leaf_base(address entry_point, ++ int number_of_arguments, ++ Label *retaddr) { ++ call_native_base(entry_point, retaddr); ++} + -+ bind(L_third_loop_exit); -+ ld(z, Address(sp, 0)); -+ addi(sp, sp, 2 * wordSize); -+ shadd(t0, xstart, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); ++void MacroAssembler::call_native(address entry_point, Register arg_0) { ++ pass_arg0(this, arg_0); ++ call_native_base(entry_point); ++} + -+ j(L_second_loop_1); ++void MacroAssembler::call_native_base(address entry_point, Label *retaddr) { ++ Label E, L; ++ int32_t offset = 0; ++ push_reg(0x80000040, sp); // push << t0 & xmethod >> to sp ++ movptr_with_offset(t0, entry_point, offset); ++ jalr(x1, t0, offset); ++ if (retaddr != NULL) { ++ bind(*retaddr); ++ } ++ pop_reg(0x80000040, sp); // pop << t0 & xmethod >> from sp ++} + -+ bind(L_multiply_64_or_128); -+ multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); ++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { ++ call_VM_leaf_base(entry_point, number_of_arguments); ++} + -+ Label L_second_loop_2; -+ beqz(kdx, L_second_loop_2); ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { ++ pass_arg0(this, arg_0); ++ call_VM_leaf_base(entry_point, 1); ++} + -+ Label L_carry; -+ sub(kdx, kdx, 1); -+ beqz(kdx, L_carry); ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++ pass_arg0(this, arg_0); ++ pass_arg1(this, arg_1); ++ call_VM_leaf_base(entry_point, 2); ++} + -+ shadd(t0, kdx, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); -+ srli(carry, carry, 32); -+ sub(kdx, kdx, 1); ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, ++ Register arg_1, Register arg_2) { ++ pass_arg0(this, arg_0); ++ pass_arg1(this, arg_1); ++ pass_arg2(this, arg_2); ++ call_VM_leaf_base(entry_point, 3); ++} + -+ bind(L_carry); -+ shadd(t0, kdx, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); ++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { ++ pass_arg0(this, arg_0); ++ MacroAssembler::call_VM_leaf_base(entry_point, 1); ++} + -+ // Second and third (nested) loops. -+ // -+ // for (int i = xstart-1; i >= 0; i--) { // Second loop -+ // carry = 0; -+ // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop -+ // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + -+ // (z[k] & LONG_MASK) + carry; -+ // z[k] = (int)product; -+ // carry = product >>> 32; -+ // } -+ // z[i] = (int)carry; -+ // } -+ // -+ // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi ++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + -+ bind(L_second_loop_2); -+ mv(carry, zr); // carry = 0; -+ mv(jdx, ylen); // j = ystart+1 ++ assert(arg_0 != c_rarg1, "smashed arg"); ++ pass_arg1(this, arg_1); ++ pass_arg0(this, arg_0); ++ MacroAssembler::call_VM_leaf_base(entry_point, 2); ++} + -+ sub(xstart, xstart, 1); // i = xstart-1; -+ bltz(xstart, L_done); ++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { ++ assert(arg_0 != c_rarg2, "smashed arg"); ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ assert(arg_0 != c_rarg1, "smashed arg"); ++ pass_arg1(this, arg_1); ++ pass_arg0(this, arg_0); ++ MacroAssembler::call_VM_leaf_base(entry_point, 3); ++} + -+ sub(sp, sp, 4 * wordSize); -+ sd(z, Address(sp, 0)); ++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { ++ assert(arg_0 != c_rarg3, "smashed arg"); ++ assert(arg_1 != c_rarg3, "smashed arg"); ++ assert(arg_2 != c_rarg3, "smashed arg"); ++ pass_arg3(this, arg_3); ++ assert(arg_0 != c_rarg2, "smashed arg"); ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ assert(arg_0 != c_rarg1, "smashed arg"); ++ pass_arg1(this, arg_1); ++ pass_arg0(this, arg_0); ++ MacroAssembler::call_VM_leaf_base(entry_point, 4); ++} + -+ Label L_last_x; -+ shadd(t0, xstart, z, t0, LogBytesPerInt); -+ addi(z, t0, 4); -+ sub(xstart, xstart, 1); // i = xstart-1; -+ bltz(xstart, L_last_x); ++void MacroAssembler::nop() { ++ addi(x0, x0, 0); ++} + -+ shadd(t0, xstart, x, t0, LogBytesPerInt); -+ ld(product_hi, Address(t0, 0)); -+ ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian ++void MacroAssembler::mv(Register Rd, Register Rs) { ++ if (Rd != Rs) { ++ addi(Rd, Rs, 0); ++ } ++} + -+ Label L_third_loop_prologue; -+ bind(L_third_loop_prologue); ++void MacroAssembler::notr(Register Rd, Register Rs) { ++ xori(Rd, Rs, -1); ++} + -+ sd(ylen, Address(sp, wordSize)); -+ sd(x, Address(sp, 2 * wordSize)); -+ sd(xstart, Address(sp, 3 * wordSize)); -+ multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, -+ tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); -+ ld(z, Address(sp, 0)); -+ ld(ylen, Address(sp, wordSize)); -+ ld(x, Address(sp, 2 * wordSize)); -+ ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen -+ addi(sp, sp, 4 * wordSize); ++void MacroAssembler::neg(Register Rd, Register Rs) { ++ sub(Rd, x0, Rs); ++} + -+ addi(tmp3, xlen, 1); -+ shadd(t0, tmp3, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); ++void MacroAssembler::negw(Register Rd, Register Rs) { ++ subw(Rd, x0, Rs); ++} + -+ sub(tmp3, tmp3, 1); -+ bltz(tmp3, L_done); ++void MacroAssembler::sext_w(Register Rd, Register Rs) { ++ addiw(Rd, Rs, 0); ++} + -+ // z[i] = (int) carry; -+ srli(carry, carry, 32); -+ shadd(t0, tmp3, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); -+ j(L_second_loop_2); ++void MacroAssembler::zext_b(Register Rd, Register Rs) { ++ andi(Rd, Rs, 0xFF); ++} + -+ // Next infrequent code is moved outside loops. -+ bind(L_last_x); -+ lwu(product_hi, Address(x, 0)); -+ j(L_third_loop_prologue); ++void MacroAssembler::seqz(Register Rd, Register Rs) { ++ sltiu(Rd, Rs, 1); ++} + -+ bind(L_done); ++void MacroAssembler::snez(Register Rd, Register Rs) { ++ sltu(Rd, x0, Rs); +} -+#endif // COMPILER2 + -+// Count bits of trailing zero chars from lsb to msb until first non-zero element. -+// For LL case, one byte for one element, so shift 8 bits once, and for other case, -+// shift 16 bits once. -+void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) -+{ -+ if (UseZbb) { -+ assert_different_registers(Rd, Rs, tmp1); -+ int step = isLL ? 8 : 16; -+ ctz(Rd, Rs); -+ andi(tmp1, Rd, step - 1); -+ sub(Rd, Rd, tmp1); -+ return; -+ } -+ assert_different_registers(Rd, Rs, tmp1, tmp2); -+ Label Loop; -+ int step = isLL ? 8 : 16; -+ mv(Rd, -step); -+ mv(tmp2, Rs); ++void MacroAssembler::sltz(Register Rd, Register Rs) { ++ slt(Rd, Rs, x0); ++} + -+ bind(Loop); -+ addi(Rd, Rd, step); -+ andi(tmp1, tmp2, ((1 << step) - 1)); -+ srli(tmp2, tmp2, step); -+ beqz(tmp1, Loop); ++void MacroAssembler::sgtz(Register Rd, Register Rs) { ++ slt(Rd, x0, Rs); +} + -+// This instruction reads adjacent 4 bytes from the lower half of source register, -+// inflate into a register, for example: -+// Rs: A7A6A5A4A3A2A1A0 -+// Rd: 00A300A200A100A0 -+void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) -+{ -+ assert_different_registers(Rd, Rs, tmp1, tmp2); -+ mv(tmp1, 0xFF000000); // first byte mask at lower word -+ andr(Rd, Rs, tmp1); -+ for (int i = 0; i < 2; i++) { -+ slli(Rd, Rd, wordSize); -+ srli(tmp1, tmp1, wordSize); -+ andr(tmp2, Rs, tmp1); -+ orr(Rd, Rd, tmp2); ++void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) { ++ if (Rd != Rs) { ++ fsgnj_s(Rd, Rs, Rs); + } -+ slli(Rd, Rd, wordSize); -+ andi(tmp2, Rs, 0xFF); // last byte mask at lower word -+ orr(Rd, Rd, tmp2); +} + -+// This instruction reads adjacent 4 bytes from the upper half of source register, -+// inflate into a register, for example: -+// Rs: A7A6A5A4A3A2A1A0 -+// Rd: 00A700A600A500A4 -+void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) -+{ -+ assert_different_registers(Rd, Rs, tmp1, tmp2); -+ srli(Rs, Rs, 32); // only upper 32 bits are needed -+ inflate_lo32(Rd, Rs, tmp1, tmp2); ++void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) { ++ fsgnjx_s(Rd, Rs, Rs); +} + -+// The size of the blocks erased by the zero_blocks stub. We must -+// handle anything smaller than this ourselves in zero_words(). -+const int MacroAssembler::zero_words_block_size = 8; -+ -+// zero_words() is used by C2 ClearArray patterns. It is as small as -+// possible, handling small word counts locally and delegating -+// anything larger to the zero_blocks stub. It is expanded many times -+// in compiled code, so it is important to keep it short. -+ -+// ptr: Address of a buffer to be zeroed. -+// cnt: Count in HeapWords. -+// -+// ptr, cnt, and t0 are clobbered. -+address MacroAssembler::zero_words(Register ptr, Register cnt) -+{ -+ assert(is_power_of_2(zero_words_block_size), "adjust this"); -+ assert(ptr == x28 && cnt == x29, "mismatch in register usage"); -+ assert_different_registers(cnt, t0); ++void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) { ++ fsgnjn_s(Rd, Rs, Rs); ++} + -+ BLOCK_COMMENT("zero_words {"); -+ mv(t0, zero_words_block_size); -+ Label around, done, done16; -+ bltu(cnt, t0, around); -+ { -+ RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks()); -+ assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); -+ if (StubRoutines::riscv::complete()) { -+ address tpc = trampoline_call(zero_blocks); -+ if (tpc == NULL) { -+ DEBUG_ONLY(reset_labels1(around)); -+ postcond(pc() == badAddress); -+ return NULL; -+ } -+ } else { -+ jal(zero_blocks); -+ } -+ } -+ bind(around); -+ for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { -+ Label l; -+ andi(t0, cnt, i); -+ beqz(t0, l); -+ for (int j = 0; j < i; j++) { -+ sd(zr, Address(ptr, 0)); -+ addi(ptr, ptr, 8); -+ } -+ bind(l); -+ } -+ { -+ Label l; -+ andi(t0, cnt, 1); -+ beqz(t0, l); -+ sd(zr, Address(ptr, 0)); -+ bind(l); ++void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) { ++ if (Rd != Rs) { ++ fsgnj_d(Rd, Rs, Rs); + } -+ BLOCK_COMMENT("} zero_words"); -+ postcond(pc() != badAddress); -+ return pc(); +} + -+// base: Address of a buffer to be zeroed, 8 bytes aligned. -+// cnt: Immediate count in HeapWords. -+#define SmallArraySize (18 * BytesPerLong) -+void MacroAssembler::zero_words(Register base, uint64_t cnt) -+{ -+ assert_different_registers(base, t0, t1); ++void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) { ++ fsgnjx_d(Rd, Rs, Rs); ++} + -+ BLOCK_COMMENT("zero_words {"); ++void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) { ++ fsgnjn_d(Rd, Rs, Rs); ++} + -+ if (cnt <= SmallArraySize / BytesPerLong) { -+ for (int i = 0; i < (int)cnt; i++) { -+ sd(zr, Address(base, i * wordSize)); -+ } ++void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) { ++ vmnand_mm(vd, vs, vs); ++} ++ ++void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) { ++ vnsrl_wx(vd, vs, x0, vm); ++} ++ ++void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) { ++ vfsgnjn_vv(vd, vs, vs); ++} ++ ++void MacroAssembler::la(Register Rd, const address &dest) { ++ int64_t offset = dest - pc(); ++ if (is_offset_in_range(offset, 32)) { ++ auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit ++ addi(Rd, Rd, ((int64_t)offset << 52) >> 52); + } else { -+ const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll -+ int remainder = cnt % unroll; -+ for (int i = 0; i < remainder; i++) { -+ sd(zr, Address(base, i * wordSize)); -+ } ++ movptr(Rd, dest); ++ } ++} + -+ Label loop; -+ Register cnt_reg = t0; -+ Register loop_base = t1; -+ cnt = cnt - remainder; -+ mv(cnt_reg, cnt); -+ add(loop_base, base, remainder * wordSize); -+ bind(loop); -+ sub(cnt_reg, cnt_reg, unroll); -+ for (int i = 0; i < unroll; i++) { -+ sd(zr, Address(loop_base, i * wordSize)); ++void MacroAssembler::la(Register Rd, const Address &adr) { ++ InstructionMark im(this); ++ code_section()->relocate(inst_mark(), adr.rspec()); ++ relocInfo::relocType rtype = adr.rspec().reloc()->type(); ++ ++ switch (adr.getMode()) { ++ case Address::literal: { ++ if (rtype == relocInfo::none) { ++ li(Rd, (intptr_t)(adr.target())); ++ } else { ++ movptr(Rd, adr.target()); ++ } ++ break; + } -+ add(loop_base, loop_base, unroll * wordSize); -+ bnez(cnt_reg, loop); ++ case Address::base_plus_offset: { ++ int32_t offset = 0; ++ baseOffset(Rd, adr, offset); ++ addi(Rd, Rd, offset); ++ break; ++ } ++ default: ++ ShouldNotReachHere(); + } -+ BLOCK_COMMENT("} zero_words"); +} + -+// base: Address of a buffer to be filled, 8 bytes aligned. -+// cnt: Count in 8-byte unit. -+// value: Value to be filled with. -+// base will point to the end of the buffer after filling. -+void MacroAssembler::fill_words(Register base, Register cnt, Register value) -+{ -+// Algorithm: -+// -+// t0 = cnt & 7 -+// cnt -= t0 -+// p += t0 -+// switch (t0): -+// switch start: -+// do while cnt -+// cnt -= 8 -+// p[-8] = value -+// case 7: -+// p[-7] = value -+// case 6: -+// p[-6] = value -+// // ... -+// case 1: -+// p[-1] = value -+// case 0: -+// p += 8 -+// do-while end -+// switch end ++void MacroAssembler::la(Register Rd, Label &label) { ++ la(Rd, target(label)); ++} + -+ assert_different_registers(base, cnt, value, t0, t1); ++#define INSN(NAME) \ ++ void MacroAssembler::NAME##z(Register Rs, const address &dest) { \ ++ NAME(Rs, zr, dest); \ ++ } \ ++ void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ ++ NAME(Rs, zr, l, is_far); \ ++ } \ + -+ Label fini, skip, entry, loop; -+ const int unroll = 8; // Number of sd instructions we'll unroll ++ INSN(beq); ++ INSN(bne); ++ INSN(blt); ++ INSN(ble); ++ INSN(bge); ++ INSN(bgt); + -+ beqz(cnt, fini); ++#undef INSN + -+ andi(t0, cnt, unroll - 1); -+ sub(cnt, cnt, t0); -+ // align 8, so first sd n % 8 = mod, next loop sd 8 * n. -+ shadd(base, t0, base, t1, 3); -+ la(t1, entry); -+ slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) -+ sub(t1, t1, t0); -+ jr(t1); ++// Float compare branch instructions + -+ bind(loop); -+ add(base, base, unroll * 8); -+ for (int i = -unroll; i < 0; i++) { -+ sd(value, Address(base, i * 8)); ++#define INSN(NAME, FLOATCMP, BRANCH) \ ++ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ ++ FLOATCMP##_s(t0, Rs1, Rs2); \ ++ BRANCH(t0, l, is_far); \ ++ } \ ++ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ ++ FLOATCMP##_d(t0, Rs1, Rs2); \ ++ BRANCH(t0, l, is_far); \ + } -+ bind(entry); -+ sub(cnt, cnt, unroll); -+ bgez(cnt, loop); + -+ bind(fini); -+} ++ INSN(beq, feq, bnez); ++ INSN(bne, feq, beqz); + -+#define FCVT_SAFE(FLOATCVT, FLOATEQ) \ -+void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ -+ Label L_Okay; \ -+ fscsr(zr); \ -+ FLOATCVT(dst, src); \ -+ frcsr(tmp); \ -+ andi(tmp, tmp, 0x1E); \ -+ beqz(tmp, L_Okay); \ -+ FLOATEQ(tmp, src, src); \ -+ bnez(tmp, L_Okay); \ -+ mv(dst, zr); \ -+ bind(L_Okay); \ -+} ++#undef INSN + -+FCVT_SAFE(fcvt_w_s, feq_s) -+FCVT_SAFE(fcvt_l_s, feq_s) -+FCVT_SAFE(fcvt_w_d, feq_d) -+FCVT_SAFE(fcvt_l_d, feq_d) + -+#undef FCVT_SAFE ++#define INSN(NAME, FLOATCMP1, FLOATCMP2) \ ++ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ ++ bool is_far, bool is_unordered) { \ ++ if (is_unordered) { \ ++ /* jump if either source is NaN or condition is expected */ \ ++ FLOATCMP2##_s(t0, Rs2, Rs1); \ ++ beqz(t0, l, is_far); \ ++ } else { \ ++ /* jump if no NaN in source and condition is expected */ \ ++ FLOATCMP1##_s(t0, Rs1, Rs2); \ ++ bnez(t0, l, is_far); \ ++ } \ ++ } \ ++ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ ++ bool is_far, bool is_unordered) { \ ++ if (is_unordered) { \ ++ /* jump if either source is NaN or condition is expected */ \ ++ FLOATCMP2##_d(t0, Rs2, Rs1); \ ++ beqz(t0, l, is_far); \ ++ } else { \ ++ /* jump if no NaN in source and condition is expected */ \ ++ FLOATCMP1##_d(t0, Rs1, Rs2); \ ++ bnez(t0, l, is_far); \ ++ } \ ++ } + -+#define FCMP(FLOATTYPE, FLOATSIG) \ -+void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ -+ FloatRegister Rs2, int unordered_result) { \ -+ Label Ldone; \ -+ if (unordered_result < 0) { \ -+ /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ -+ /* installs 1 if gt else 0 */ \ -+ flt_##FLOATSIG(result, Rs2, Rs1); \ -+ /* Rs1 > Rs2, install 1 */ \ -+ bgtz(result, Ldone); \ -+ feq_##FLOATSIG(result, Rs1, Rs2); \ -+ addi(result, result, -1); \ -+ /* Rs1 = Rs2, install 0 */ \ -+ /* NaN or Rs1 < Rs2, install -1 */ \ -+ bind(Ldone); \ -+ } else { \ -+ /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ -+ /* installs 1 if gt or unordered else 0 */ \ -+ flt_##FLOATSIG(result, Rs1, Rs2); \ -+ /* Rs1 < Rs2, install -1 */ \ -+ bgtz(result, Ldone); \ -+ feq_##FLOATSIG(result, Rs1, Rs2); \ -+ addi(result, result, -1); \ -+ /* Rs1 = Rs2, install 0 */ \ -+ /* NaN or Rs1 > Rs2, install 1 */ \ -+ bind(Ldone); \ -+ neg(result, result); \ -+ } \ -+} ++ INSN(ble, fle, flt); ++ INSN(blt, flt, fle); + -+FCMP(float, s); -+FCMP(double, d); ++#undef INSN + -+#undef FCMP ++#define INSN(NAME, CMP) \ ++ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ ++ bool is_far, bool is_unordered) { \ ++ float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ ++ } \ ++ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ ++ bool is_far, bool is_unordered) { \ ++ double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ ++ } + -+// Zero words; len is in bytes -+// Destroys all registers except addr -+// len must be a nonzero multiple of wordSize -+void MacroAssembler::zero_memory(Register addr, Register len, Register tmp1) { -+ assert_different_registers(addr, len, tmp1, t0, t1); ++ INSN(bgt, blt); ++ INSN(bge, ble); + -+#ifdef ASSERT -+ { -+ Label L; -+ andi(t0, len, BytesPerWord - 1); -+ beqz(t0, L); -+ stop("len is not a multiple of BytesPerWord"); -+ bind(L); -+ } -+#endif // ASSERT ++#undef INSN + -+#ifndef PRODUCT -+ block_comment("zero memory"); -+#endif // PRODUCT + -+ Label loop; -+ Label entry; ++#define INSN(NAME, CSR) \ ++ void MacroAssembler::NAME(Register Rd) { \ ++ csrr(Rd, CSR); \ ++ } + -+ // Algorithm: -+ // -+ // t0 = cnt & 7 -+ // cnt -= t0 -+ // p += t0 -+ // switch (t0) { -+ // do { -+ // cnt -= 8 -+ // p[-8] = 0 -+ // case 7: -+ // p[-7] = 0 -+ // case 6: -+ // p[-6] = 0 -+ // ... -+ // case 1: -+ // p[-1] = 0 -+ // case 0: -+ // p += 8 -+ // } while (cnt) -+ // } ++ INSN(rdinstret, CSR_INSTERT); ++ INSN(rdcycle, CSR_CYCLE); ++ INSN(rdtime, CSR_TIME); ++ INSN(frcsr, CSR_FCSR); ++ INSN(frrm, CSR_FRM); ++ INSN(frflags, CSR_FFLAGS); + -+ const int unroll = 8; // Number of sd(zr) instructions we'll unroll ++#undef INSN + -+ srli(len, len, LogBytesPerWord); -+ andi(t0, len, unroll - 1); // t0 = cnt % unroll -+ sub(len, len, t0); // cnt -= unroll -+ // tmp1 always points to the end of the region we're about to zero -+ shadd(tmp1, t0, addr, t1, LogBytesPerWord); -+ la(t1, entry); -+ slli(t0, t0, 2); -+ sub(t1, t1, t0); -+ jr(t1); -+ bind(loop); -+ sub(len, len, unroll); -+ for (int i = -unroll; i < 0; i++) { -+ Assembler::sd(zr, Address(tmp1, i * wordSize)); -+ } -+ bind(entry); -+ add(tmp1, tmp1, unroll * wordSize); -+ bnez(len, loop); ++void MacroAssembler::csrr(Register Rd, unsigned csr) { ++ csrrs(Rd, csr, x0); +} + -+// shift left by shamt and add -+// Rd = (Rs1 << shamt) + Rs2 -+void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { -+ if (UseZba) { -+ if (shamt == 1) { -+ sh1add(Rd, Rs1, Rs2); -+ return; -+ } else if (shamt == 2) { -+ sh2add(Rd, Rs1, Rs2); -+ return; -+ } else if (shamt == 3) { -+ sh3add(Rd, Rs1, Rs2); -+ return; -+ } ++#define INSN(NAME, OPFUN) \ ++ void MacroAssembler::NAME(unsigned csr, Register Rs) { \ ++ OPFUN(x0, csr, Rs); \ + } + -+ if (shamt != 0) { -+ slli(tmp, Rs1, shamt); -+ add(Rd, Rs2, tmp); -+ } else { -+ add(Rd, Rs1, Rs2); -+ } -+} ++ INSN(csrw, csrrw); ++ INSN(csrs, csrrs); ++ INSN(csrc, csrrc); + -+void MacroAssembler::zero_extend(Register dst, Register src, int bits) { -+ if (UseZba && bits == 32) { -+ zext_w(dst, src); -+ return; -+ } ++#undef INSN + -+ if (UseZbb && bits == 16) { -+ zext_h(dst, src); -+ return; ++#define INSN(NAME, OPFUN) \ ++ void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ ++ OPFUN(x0, csr, imm); \ + } + -+ if (bits == 8) { -+ zext_b(dst, src); -+ } else { -+ slli(dst, src, XLEN - bits); -+ srli(dst, dst, XLEN - bits); -+ } -+} ++ INSN(csrwi, csrrwi); ++ INSN(csrsi, csrrsi); ++ INSN(csrci, csrrci); + -+void MacroAssembler::sign_extend(Register dst, Register src, int bits) { -+ if (UseZbb) { -+ if (bits == 8) { -+ sext_b(dst, src); -+ return; -+ } else if (bits == 16) { -+ sext_h(dst, src); -+ return; -+ } -+ } ++#undef INSN + -+ if (bits == 32) { -+ sext_w(dst, src); -+ } else { -+ slli(dst, src, XLEN - bits); -+ srai(dst, dst, XLEN - bits); ++#define INSN(NAME, CSR) \ ++ void MacroAssembler::NAME(Register Rd, Register Rs) { \ ++ csrrw(Rd, CSR, Rs); \ + } -+} + -+void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) -+{ -+ if (src1 == src2) { -+ mv(dst, zr); -+ return; -+ } -+ Label done; -+ Register left = src1; -+ Register right = src2; -+ if (dst == src1) { -+ assert_different_registers(dst, src2, tmp); -+ mv(tmp, src1); -+ left = tmp; -+ } else if (dst == src2) { -+ assert_different_registers(dst, src1, tmp); -+ mv(tmp, src2); -+ right = tmp; ++ INSN(fscsr, CSR_FCSR); ++ INSN(fsrm, CSR_FRM); ++ INSN(fsflags, CSR_FFLAGS); ++ ++#undef INSN ++ ++#define INSN(NAME) \ ++ void MacroAssembler::NAME(Register Rs) { \ ++ NAME(x0, Rs); \ + } + -+ // installs 1 if gt else 0 -+ slt(dst, right, left); -+ bnez(dst, done); -+ slt(dst, left, right); -+ // dst = -1 if lt; else if eq , dst = 0 -+ neg(dst, dst); -+ bind(done); -+} ++ INSN(fscsr); ++ INSN(fsrm); ++ INSN(fsflags); + -+void MacroAssembler::load_constant_pool_cache(Register cpool, Register method) -+{ -+ ld(cpool, Address(method, Method::const_offset())); -+ ld(cpool, Address(cpool, ConstMethod::constants_offset())); -+ ld(cpool, Address(cpool, ConstantPool::cache_offset_in_bytes())); ++#undef INSN ++ ++void MacroAssembler::fsrmi(Register Rd, unsigned imm) { ++ guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); ++ csrrwi(Rd, CSR_FRM, imm); +} + -+void MacroAssembler::load_max_stack(Register dst, Register method) -+{ -+ ld(dst, Address(xmethod, Method::const_offset())); -+ lhu(dst, Address(dst, ConstMethod::max_stack_offset())); ++void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { ++ csrrwi(Rd, CSR_FFLAGS, imm); +} + -+// The java_calling_convention describes stack locations as ideal slots on -+// a frame with no abi restrictions. Since we must observe abi restrictions -+// (like the placement of the register window) the slots must be biased by -+// the following value. -+static int reg2offset_in(VMReg r) { -+ // Account for saved fp and ra -+ // This should really be in_preserve_stack_slots -+ return r->reg2stack() * VMRegImpl::stack_slot_size; ++#define INSN(NAME) \ ++ void MacroAssembler::NAME(unsigned imm) { \ ++ NAME(x0, imm); \ ++ } ++ ++ INSN(fsrmi); ++ INSN(fsflagsi); ++ ++#undef INSN ++ ++void MacroAssembler::push_reg(Register Rs) ++{ ++ addi(esp, esp, 0 - wordSize); ++ sd(Rs, Address(esp, 0)); +} + -+static int reg2offset_out(VMReg r) { -+ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++void MacroAssembler::pop_reg(Register Rd) ++{ ++ ld(Rd, esp, 0); ++ addi(esp, esp, wordSize); +} + -+// On 64 bit we will store integer like items to the stack as -+// 64 bits items (riscv64 abi) even though java would only store -+// 32bits for a parameter. On 32bit it will simply be 32 bits -+// So this routine will do 32->32 on 32bit and 32->64 on 64bit -+void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) { -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ // stack to stack -+ ld(tmp, Address(fp, reg2offset_in(src.first()))); -+ sd(tmp, Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ // stack to reg -+ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } -+ } else if (dst.first()->is_stack()) { -+ // reg to stack -+ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ if (dst.first() != src.first()) { -+ // 32bits extend sign -+ addw(dst.first()->as_Register(), src.first()->as_Register(), zr); ++int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { ++ int count = 0; ++ // Scan bitset to accumulate register pairs ++ for (int reg = 31; reg >= 0; reg--) { ++ if ((1U << 31) & bitset) { ++ regs[count++] = reg; + } ++ bitset <<= 1; + } ++ return count; +} + -+// An oop arg. Must pass a handle not the oop itself -+void MacroAssembler::object_move(OopMap* map, -+ int oop_handle_offset, -+ int framesize_in_slots, -+ VMRegPair src, -+ VMRegPair dst, -+ bool is_receiver, -+ int* receiver_offset) { -+ assert_cond(map != NULL && receiver_offset != NULL); -+ // must pass a handle. First figure out the location we use as a handle -+ Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); -+ -+ // See if oop is NULL if it is we need no handle ++// Push lots of registers in the bit set supplied. Don't push sp. ++// Return the number of words pushed ++int MacroAssembler::push_reg(unsigned int bitset, Register stack) { ++ DEBUG_ONLY(int words_pushed = 0;) ++ CompressibleRegion cr(this); + -+ if (src.first()->is_stack()) { -+ // Oop is already on the stack as an argument -+ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); -+ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); -+ if (is_receiver) { -+ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; -+ } ++ unsigned char regs[32]; ++ int count = bitset_to_regs(bitset, regs); ++ // reserve one slot to align for odd count ++ int offset = is_even(count) ? 0 : wordSize; + -+ ld(t0, Address(fp, reg2offset_in(src.first()))); -+ la(rHandle, Address(fp, reg2offset_in(src.first()))); -+ // conditionally move a NULL -+ Label notZero1; -+ bnez(t0, notZero1); -+ mv(rHandle, zr); -+ bind(notZero1); -+ } else { ++ if (count) { ++ addi(stack, stack, - count * wordSize - offset); ++ } ++ for (int i = count - 1; i >= 0; i--) { ++ sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); ++ DEBUG_ONLY(words_pushed ++;) ++ } + -+ // Oop is in a register we must store it to the space we reserve -+ // on the stack for oop_handles and pass a handle if oop is non-NULL ++ assert(words_pushed == count, "oops, pushed != count"); + -+ const Register rOop = src.first()->as_Register(); -+ int oop_slot = -1; -+ if (rOop == j_rarg0) { -+ oop_slot = 0; -+ } else if (rOop == j_rarg1) { -+ oop_slot = 1; -+ } else if (rOop == j_rarg2) { -+ oop_slot = 2; -+ } else if (rOop == j_rarg3) { -+ oop_slot = 3; -+ } else if (rOop == j_rarg4) { -+ oop_slot = 4; -+ } else if (rOop == j_rarg5) { -+ oop_slot = 5; -+ } else if (rOop == j_rarg6) { -+ oop_slot = 6; -+ } else { -+ assert(rOop == j_rarg7, "wrong register"); -+ oop_slot = 7; -+ } ++ return count; ++} + -+ oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; -+ int offset = oop_slot * VMRegImpl::stack_slot_size; ++int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { ++ DEBUG_ONLY(int words_popped = 0;) ++ CompressibleRegion cr(this); + -+ map->set_oop(VMRegImpl::stack2reg(oop_slot)); -+ // Store oop in handle area, may be NULL -+ sd(rOop, Address(sp, offset)); -+ if (is_receiver) { -+ *receiver_offset = offset; -+ } ++ unsigned char regs[32]; ++ int count = bitset_to_regs(bitset, regs); ++ // reserve one slot to align for odd count ++ int offset = is_even(count) ? 0 : wordSize; + -+ //rOop maybe the same as rHandle -+ if (rOop == rHandle) { -+ Label isZero; -+ beqz(rOop, isZero); -+ la(rHandle, Address(sp, offset)); -+ bind(isZero); -+ } else { -+ Label notZero2; -+ la(rHandle, Address(sp, offset)); -+ bnez(rOop, notZero2); -+ mv(rHandle, zr); -+ bind(notZero2); -+ } ++ for (int i = count - 1; i >= 0; i--) { ++ ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); ++ DEBUG_ONLY(words_popped ++;) + } + -+ // If arg is on the stack then place it otherwise it is already in correct reg. -+ if (dst.first()->is_stack()) { -+ sd(rHandle, Address(sp, reg2offset_out(dst.first()))); ++ if (count) { ++ addi(stack, stack, count * wordSize + offset); + } ++ assert(words_popped == count, "oops, popped != count"); ++ ++ return count; +} + -+// A float arg may have to do float reg int reg conversion -+void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) { -+ assert(src.first()->is_stack() && dst.first()->is_stack() || -+ src.first()->is_reg() && dst.first()->is_reg() || -+ src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ lwu(tmp, Address(fp, reg2offset_in(src.first()))); -+ sw(tmp, Address(sp, reg2offset_out(dst.first()))); -+ } else if (dst.first()->is_Register()) { -+ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } else if (src.first() != dst.first()) { -+ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { -+ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); -+ } else { -+ ShouldNotReachHere(); -+ } ++// Push float registers in the bitset, except sp. ++// Return the number of heapwords pushed. ++int MacroAssembler::push_fp(unsigned int bitset, Register stack) { ++ CompressibleRegion cr(this); ++ int words_pushed = 0; ++ unsigned char regs[32]; ++ int count = bitset_to_regs(bitset, regs); ++ int push_slots = count + (count & 1); ++ ++ if (count) { ++ addi(stack, stack, -push_slots * wordSize); + } -+} + -+// A long move -+void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) { -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ // stack to stack -+ ld(tmp, Address(fp, reg2offset_in(src.first()))); -+ sd(tmp, Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ // stack to reg -+ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } -+ } else if (dst.first()->is_stack()) { -+ // reg to stack -+ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ if (dst.first() != src.first()) { -+ mv(dst.first()->as_Register(), src.first()->as_Register()); -+ } ++ for (int i = count - 1; i >= 0; i--) { ++ fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); ++ words_pushed++; + } ++ ++ assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); ++ return count; +} + -+// A double move -+void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) { -+ assert(src.first()->is_stack() && dst.first()->is_stack() || -+ src.first()->is_reg() && dst.first()->is_reg() || -+ src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ ld(tmp, Address(fp, reg2offset_in(src.first()))); -+ sd(tmp, Address(sp, reg2offset_out(dst.first()))); -+ } else if (dst.first()-> is_Register()) { -+ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } else if (src.first() != dst.first()) { -+ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { -+ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); -+ } else { -+ ShouldNotReachHere(); -+ } ++int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { ++ CompressibleRegion cr(this); ++ int words_popped = 0; ++ unsigned char regs[32]; ++ int count = bitset_to_regs(bitset, regs); ++ int pop_slots = count + (count & 1); ++ ++ for (int i = count - 1; i >= 0; i--) { ++ fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); ++ words_popped++; + } -+} + -+void MacroAssembler::rt_call(address dest, Register tmp) { -+ CodeBlob *cb = CodeCache::find_blob(dest); -+ if (cb) { -+ far_call(RuntimeAddress(dest)); -+ } else { -+ int32_t offset = 0; -+ la_patchable(tmp, RuntimeAddress(dest), offset); -+ jalr(x1, tmp, offset); ++ if (count) { ++ addi(stack, stack, pop_slots * wordSize); + } -+} -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -new file mode 100644 -index 000000000..a4d5ce0e0 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -0,0 +1,975 @@ -+/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ + -+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP -+#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP ++ assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); ++ return count; ++} + -+#include "asm/assembler.inline.hpp" -+#include "code/vmreg.hpp" -+// MacroAssembler extends Assembler by frequently used macros. -+// -+// Instructions for which a 'better' code sequence exists depending -+// on arguments should also go in here. ++#ifdef COMPILER2 ++int MacroAssembler::push_vp(unsigned int bitset, Register stack) { ++ CompressibleRegion cr(this); ++ int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); + -+class MacroAssembler: public Assembler { ++ // Scan bitset to accumulate register pairs ++ unsigned char regs[32]; ++ int count = 0; ++ for (int reg = 31; reg >= 0; reg--) { ++ if ((1U << 31) & bitset) { ++ regs[count++] = reg; ++ } ++ bitset <<= 1; ++ } + -+ public: -+ MacroAssembler(CodeBuffer* code) : Assembler(code) { ++ for (int i = 0; i < count; i++) { ++ sub(stack, stack, vector_size_in_bytes); ++ vs1r_v(as_VectorRegister(regs[i]), stack); + } -+ virtual ~MacroAssembler() {} + -+ void safepoint_poll(Label& slow_path); -+ void safepoint_poll_acquire(Label& slow_path); ++ return count * vector_size_in_bytes / wordSize; ++} + -+ // Alignment -+ void align(int modulus); ++int MacroAssembler::pop_vp(unsigned int bitset, Register stack) { ++ CompressibleRegion cr(this); ++ int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); + -+ // Stack frame creation/removal -+ // Note that SP must be updated to the right place before saving/restoring RA and FP -+ // because signal based thread suspend/resume could happend asychronously -+ void enter() { -+ addi(sp, sp, - 2 * wordSize); -+ sd(ra, Address(sp, wordSize)); -+ sd(fp, Address(sp)); -+ addi(fp, sp, 2 * wordSize); ++ // Scan bitset to accumulate register pairs ++ unsigned char regs[32]; ++ int count = 0; ++ for (int reg = 31; reg >= 0; reg--) { ++ if ((1U << 31) & bitset) { ++ regs[count++] = reg; ++ } ++ bitset <<= 1; + } + -+ void leave() { -+ addi(sp, fp, - 2 * wordSize); -+ ld(fp, Address(sp)); -+ ld(ra, Address(sp, wordSize)); -+ addi(sp, sp, 2 * wordSize); ++ for (int i = count - 1; i >= 0; i--) { ++ vl1r_v(as_VectorRegister(regs[i]), stack); ++ add(stack, stack, vector_size_in_bytes); + } + ++ return count * vector_size_in_bytes / wordSize; ++} ++#endif // COMPILER2 + -+ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) -+ // The pointer will be loaded into the thread register. -+ void get_thread(Register thread); -+ -+ // Support for VM calls -+ // -+ // It is imperative that all calls into the VM are handled via the call_VM macros. -+ // They make sure that the stack linkage is setup correctly. call_VM's correspond -+ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. ++void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { ++ CompressibleRegion cr(this); ++ // Push integer registers x7, x10-x17, x28-x31. ++ push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); + -+ void call_VM(Register oop_result, -+ address entry_point, -+ bool check_exceptions = true); -+ void call_VM(Register oop_result, -+ address entry_point, -+ Register arg_1, -+ bool check_exceptions = true); -+ void call_VM(Register oop_result, -+ address entry_point, -+ Register arg_1, Register arg_2, -+ bool check_exceptions = true); -+ void call_VM(Register oop_result, -+ address entry_point, -+ Register arg_1, Register arg_2, Register arg_3, -+ bool check_exceptions = true); ++ // Push float registers f0-f7, f10-f17, f28-f31. ++ addi(sp, sp, - wordSize * 20); ++ int offset = 0; ++ for (int i = 0; i < 32; i++) { ++ if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { ++ fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++))); ++ } ++ } ++} + -+ // Overloadings with last_Java_sp -+ void call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ int number_of_arguments = 0, -+ bool check_exceptions = true); -+ void call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ Register arg_1, -+ bool check_exceptions = true); -+ void call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ Register arg_1, Register arg_2, -+ bool check_exceptions = true); -+ void call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ Register arg_1, Register arg_2, Register arg_3, -+ bool check_exceptions = true); ++void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { ++ CompressibleRegion cr(this); ++ int offset = 0; ++ for (int i = 0; i < 32; i++) { ++ if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { ++ fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++))); ++ } ++ } ++ addi(sp, sp, wordSize * 20); + -+ void get_vm_result(Register oop_result, Register java_thread); -+ void get_vm_result_2(Register metadata_result, Register java_thread); ++ pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); ++} + -+ // These always tightly bind to MacroAssembler::call_VM_leaf_base -+ // bypassing the virtual implementation -+ void call_VM_leaf(address entry_point, -+ int number_of_arguments = 0); -+ void call_VM_leaf(address entry_point, -+ Register arg_0); -+ void call_VM_leaf(address entry_point, -+ Register arg_0, Register arg_1); -+ void call_VM_leaf(address entry_point, -+ Register arg_0, Register arg_1, Register arg_2); ++// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). ++void MacroAssembler::pusha() { ++ CompressibleRegion cr(this); ++ push_reg(0xffffffe2, sp); ++} + -+ // These always tightly bind to MacroAssembler::call_VM_base -+ // bypassing the virtual implementation -+ void super_call_VM_leaf(address entry_point, Register arg_0); -+ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1); -+ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2); -+ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3); ++// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). ++void MacroAssembler::popa() { ++ CompressibleRegion cr(this); ++ pop_reg(0xffffffe2, sp); ++} + -+ // last Java Frame (fills frame anchor) -+ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp); -+ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp); -+ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp); ++void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { ++ CompressibleRegion cr(this); ++ // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) ++ push_reg(0xffffffe0, sp); + -+ // thread in the default location (xthread) -+ void reset_last_Java_frame(bool clear_fp); ++ // float registers ++ addi(sp, sp, - 32 * wordSize); ++ for (int i = 0; i < 32; i++) { ++ fsd(as_FloatRegister(i), Address(sp, i * wordSize)); ++ } + -+ virtual void call_VM_leaf_base( -+ address entry_point, // the entry point -+ int number_of_arguments, // the number of arguments to pop after the call -+ Label* retaddr = NULL -+ ); ++ // vector registers ++ if (save_vectors) { ++ sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers); ++ vsetvli(t0, x0, Assembler::e64, Assembler::m8); ++ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { ++ add(t0, sp, vector_size_in_bytes * i); ++ vse64_v(as_VectorRegister(i), t0); ++ } ++ } ++} + -+ virtual void call_VM_leaf_base( -+ address entry_point, // the entry point -+ int number_of_arguments, // the number of arguments to pop after the call -+ Label& retaddr) { -+ call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); ++void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { ++ CompressibleRegion cr(this); ++ // vector registers ++ if (restore_vectors) { ++ vsetvli(t0, x0, Assembler::e64, Assembler::m8); ++ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { ++ vle64_v(as_VectorRegister(i), sp); ++ add(sp, sp, vector_size_in_bytes * 8); ++ } + } + -+ virtual void call_VM_base( // returns the register containing the thread upon return -+ Register oop_result, // where an oop-result ends up if any; use noreg otherwise -+ Register java_thread, // the thread if computed before ; use noreg otherwise -+ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise -+ address entry_point, // the entry point -+ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call -+ bool check_exceptions // whether to check for pending exceptions after return -+ ); ++ // float registers ++ for (int i = 0; i < 32; i++) { ++ fld(as_FloatRegister(i), Address(sp, i * wordSize)); ++ } ++ addi(sp, sp, 32 * wordSize); + -+ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions); ++ // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) ++ pop_reg(0xffffffe0, sp); ++} + -+ virtual void check_and_handle_earlyret(Register java_thread); -+ virtual void check_and_handle_popframe(Register java_thread); ++static int patch_offset_in_jal(address branch, int64_t offset) { ++ assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n"); ++ Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] ++ Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] ++ Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] ++ Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] ++ return NativeInstruction::instruction_size; // only one instruction ++} + -+ void resolve_oop_handle(Register result, Register tmp = x15); -+ void resolve_jobject(Register value, Register thread, Register tmp); ++static int patch_offset_in_conditional_branch(address branch, int64_t offset) { ++ assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n"); ++ Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] ++ Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] ++ Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] ++ Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] ++ return NativeInstruction::instruction_size; // only one instruction ++} + -+ void movoop(Register dst, jobject obj, bool immediate = false); -+ void mov_metadata(Register dst, Metadata* obj); -+ void bang_stack_size(Register size, Register tmp); -+ void set_narrow_oop(Register dst, jobject obj); -+ void set_narrow_klass(Register dst, Klass* k); ++static int patch_offset_in_pc_relative(address branch, int64_t offset) { ++ const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load ++ Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] ++ Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] ++ return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size; ++} + -+ void load_mirror(Register dst, Register method, Register tmp = x15); -+ void access_load_at(BasicType type, DecoratorSet decorators, Register dst, -+ Address src, Register tmp1, Register thread_tmp); -+ void access_store_at(BasicType type, DecoratorSet decorators, Address dst, -+ Register src, Register tmp1, Register tmp2, Register tmp3); -+ void load_klass(Register dst, Register src); -+ void store_klass(Register dst, Register src); -+ void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L); ++static int patch_addr_in_movptr(address branch, address target) { ++ const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load ++ int32_t lower = ((intptr_t)target << 36) >> 36; ++ int64_t upper = ((intptr_t)target - lower) >> 28; ++ Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[47:28] + target[27] ==> branch[31:12] ++ Assembler::patch(branch + 4, 31, 20, (lower >> 16) & 0xfff); // Addi. target[27:16] ==> branch[31:20] ++ Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff); // Addi. target[15: 5] ==> branch[31:20] ++ Assembler::patch(branch + 20, 31, 20, lower & 0x1f); // Addi/Jalr/Load. target[ 4: 0] ==> branch[31:20] ++ return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; ++} + -+ void encode_klass_not_null(Register r); -+ void decode_klass_not_null(Register r); -+ void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); -+ void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); -+ void decode_heap_oop_not_null(Register r); -+ void decode_heap_oop_not_null(Register dst, Register src); -+ void decode_heap_oop(Register d, Register s); -+ void decode_heap_oop(Register r) { decode_heap_oop(r, r); } -+ void encode_heap_oop(Register d, Register s); -+ void encode_heap_oop(Register r) { encode_heap_oop(r, r); }; -+ void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, -+ Register thread_tmp = noreg, DecoratorSet decorators = 0); -+ void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, -+ Register thread_tmp = noreg, DecoratorSet decorators = 0); -+ void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, -+ Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0); ++static int patch_imm_in_li64(address branch, address target) { ++ const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi ++ int64_t lower = (intptr_t)target & 0xffffffff; ++ lower = lower - ((lower << 44) >> 44); ++ int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower; ++ int32_t upper = (tmp_imm - (int32_t)lower) >> 32; ++ int64_t tmp_upper = upper, tmp_lower = upper; ++ tmp_lower = (tmp_lower << 52) >> 52; ++ tmp_upper -= tmp_lower; ++ tmp_upper >>= 12; ++ // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1), ++ // upper = target[63:32] + 1. ++ Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui. ++ Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi. ++ // Load the rest 32 bits. ++ Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi. ++ Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi. ++ Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi. ++ return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; ++} + -+ void store_klass_gap(Register dst, Register src); ++static int patch_imm_in_li32(address branch, int32_t target) { ++ const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw ++ int64_t upper = (intptr_t)target; ++ int32_t lower = (((int32_t)target) << 20) >> 20; ++ upper -= lower; ++ upper = (int32_t)upper; ++ Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. ++ Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. ++ return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; ++} + -+ // currently unimplemented -+ // Used for storing NULL. All other oop constants should be -+ // stored using routines that take a jobject. -+ void store_heap_oop_null(Address dst); ++static long get_offset_of_jal(address insn_addr) { ++ assert_cond(insn_addr != NULL); ++ long offset = 0; ++ unsigned insn = *(unsigned*)insn_addr; ++ long val = (long)Assembler::sextract(insn, 31, 12); ++ offset |= ((val >> 19) & 0x1) << 20; ++ offset |= (val & 0xff) << 12; ++ offset |= ((val >> 8) & 0x1) << 11; ++ offset |= ((val >> 9) & 0x3ff) << 1; ++ offset = (offset << 43) >> 43; ++ return offset; ++} + -+ // This dummy is to prevent a call to store_heap_oop from -+ // converting a zero (linke NULL) into a Register by giving -+ // the compiler two choices it can't resolve ++static long get_offset_of_conditional_branch(address insn_addr) { ++ long offset = 0; ++ assert_cond(insn_addr != NULL); ++ unsigned insn = *(unsigned*)insn_addr; ++ offset = (long)Assembler::sextract(insn, 31, 31); ++ offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); ++ offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); ++ offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); ++ offset = (offset << 41) >> 41; ++ return offset; ++} + -+ void store_heap_oop(Address dst, void* dummy); ++static long get_offset_of_pc_relative(address insn_addr) { ++ long offset = 0; ++ assert_cond(insn_addr != NULL); ++ offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12; // Auipc. ++ offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addi/Jalr/Load. ++ offset = (offset << 32) >> 32; ++ return offset; ++} + -+ // Support for NULL-checks -+ // -+ // Generates code that causes a NULL OS exception if the content of reg is NULL. -+ // If the accessed location is M[reg + offset] and the offset is known, provide the -+ // offset. No explicit code generateion is needed if the offset is within a certain -+ // range (0 <= offset <= page_size). ++static address get_target_of_movptr(address insn_addr) { ++ assert_cond(insn_addr != NULL); ++ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28; // Lui. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)); // Addi/Jalr/Load. ++ return (address) target_address; ++} + -+ virtual void null_check(Register reg, int offset = -1); -+ static bool needs_explicit_null_check(intptr_t offset); ++static address get_target_of_li64(address insn_addr) { ++ assert_cond(insn_addr != NULL); ++ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44; // Lui. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20)); // Addi. ++ return (address)target_address; ++} + -+ // idiv variant which deals with MINLONG as dividend and -1 as divisor -+ int corrected_idivl(Register result, Register rs1, Register rs2, -+ bool want_remainder); -+ int corrected_idivq(Register result, Register rs1, Register rs2, -+ bool want_remainder); ++static address get_target_of_li32(address insn_addr) { ++ assert_cond(insn_addr != NULL); ++ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12; // Lui. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addiw. ++ return (address)target_address; ++} + -+ // interface method calling -+ void lookup_interface_method(Register recv_klass, -+ Register intf_klass, -+ RegisterOrConstant itable_index, -+ Register method_result, -+ Register scan_tmp, -+ Label& no_such_interface, -+ bool return_method = true); ++// Patch any kind of instruction; there may be several instructions. ++// Return the total length (in bytes) of the instructions. ++int MacroAssembler::pd_patch_instruction_size(address branch, address target) { ++ assert_cond(branch != NULL); ++ int64_t offset = target - branch; ++ if (NativeInstruction::is_jal_at(branch)) { // jal ++ return patch_offset_in_jal(branch, offset); ++ } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne ++ return patch_offset_in_conditional_branch(branch, offset); ++ } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load ++ return patch_offset_in_pc_relative(branch, offset); ++ } else if (NativeInstruction::is_movptr_at(branch)) { // movptr ++ return patch_addr_in_movptr(branch, target); ++ } else if (NativeInstruction::is_li64_at(branch)) { // li64 ++ return patch_imm_in_li64(branch, target); ++ } else if (NativeInstruction::is_li32_at(branch)) { // li32 ++ int64_t imm = (intptr_t)target; ++ return patch_imm_in_li32(branch, (int32_t)imm); ++ } else { ++#ifdef ASSERT ++ tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", ++ *(unsigned*)branch, p2i(branch)); ++ Disassembler::decode(branch - 16, branch + 16); ++#endif ++ ShouldNotReachHere(); ++ return -1; ++ } ++} + -+ // virtual method calling -+ // n.n. x86 allows RegisterOrConstant for vtable_index -+ void lookup_virtual_method(Register recv_klass, -+ RegisterOrConstant vtable_index, -+ Register method_result); ++address MacroAssembler::target_addr_for_insn(address insn_addr) { ++ long offset = 0; ++ assert_cond(insn_addr != NULL); ++ if (NativeInstruction::is_jal_at(insn_addr)) { // jal ++ offset = get_offset_of_jal(insn_addr); ++ } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne ++ offset = get_offset_of_conditional_branch(insn_addr); ++ } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load ++ offset = get_offset_of_pc_relative(insn_addr); ++ } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr ++ return get_target_of_movptr(insn_addr); ++ } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64 ++ return get_target_of_li64(insn_addr); ++ } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32 ++ return get_target_of_li32(insn_addr); ++ } else { ++ ShouldNotReachHere(); ++ } ++ return address(((uintptr_t)insn_addr + offset)); ++} + -+ // allocation -+ void eden_allocate( -+ Register obj, // result: pointer to object after successful allocation -+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise -+ int con_size_in_bytes, // object size in bytes if known at compile time -+ Register tmp1, // temp register -+ Label& slow_case, // continuation point if fast allocation fails -+ bool is_far = false -+ ); -+ void tlab_allocate( -+ Register obj, // result: pointer to object after successful allocation -+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise -+ int con_size_in_bytes, // object size in bytes if known at compile time -+ Register tmp1, // temp register -+ Register tmp2, // temp register -+ Label& slow_case, // continuation point of fast allocation fails -+ bool is_far = false -+ ); ++int MacroAssembler::patch_oop(address insn_addr, address o) { ++ // OOPs are either narrow (32 bits) or wide (48 bits). We encode ++ // narrow OOPs by setting the upper 16 bits in the first ++ // instruction. ++ if (NativeInstruction::is_li32_at(insn_addr)) { ++ // Move narrow OOP ++ uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); ++ return patch_imm_in_li32(insn_addr, (int32_t)n); ++ } else if (NativeInstruction::is_movptr_at(insn_addr)) { ++ // Move wide OOP ++ return patch_addr_in_movptr(insn_addr, o); ++ } ++ ShouldNotReachHere(); ++ return -1; ++} + -+ // Test sub_klass against super_klass, with fast and slow paths. ++void MacroAssembler::reinit_heapbase() { ++ if (UseCompressedOops) { ++ if (Universe::is_fully_initialized()) { ++ mv(xheapbase, CompressedOops::ptrs_base()); ++ } else { ++ int32_t offset = 0; ++ la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset); ++ ld(xheapbase, Address(xheapbase, offset)); ++ } ++ } ++} + -+ // The fast path produces a tri-state answer: yes / no / maybe-slow. -+ // One of the three labels can be NULL, meaning take the fall-through. -+ // If super_check_offset is -1, the value is loaded up from super_klass. -+ // No registers are killed, except tmp_reg -+ void check_klass_subtype_fast_path(Register sub_klass, -+ Register super_klass, -+ Register tmp_reg, -+ Label* L_success, -+ Label* L_failure, -+ Label* L_slow_path, -+ Register super_check_offset = noreg); ++void MacroAssembler::mv(Register Rd, Address dest) { ++ assert(dest.getMode() == Address::literal, "Address mode should be Address::literal"); ++ code_section()->relocate(pc(), dest.rspec()); ++ movptr(Rd, dest.target()); ++} + -+ // The reset of the type cehck; must be wired to a corresponding fast path. -+ // It does not repeat the fast path logic, so don't use it standalone. -+ // The tmp_reg and tmp2_reg can be noreg, if no tmps are avaliable. -+ // Updates the sub's secondary super cache as necessary. -+ void check_klass_subtype_slow_path(Register sub_klass, -+ Register super_klass, -+ Register tmp_reg, -+ Register tmp2_reg, -+ Label* L_success, -+ Label* L_failure); ++void MacroAssembler::mv(Register Rd, address addr) { ++ // Here in case of use with relocation, use fix length instruciton ++ // movptr instead of li ++ movptr(Rd, addr); ++} + -+ void check_klass_subtype(Register sub_klass, -+ Register super_klass, -+ Register tmp_reg, -+ Label& L_success); ++void MacroAssembler::mv(Register Rd, RegisterOrConstant src) { ++ if (src.is_register()) { ++ mv(Rd, src.as_register()); ++ } else { ++ mv(Rd, src.as_constant()); ++ } ++} + -+ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); ++void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { ++ andr(Rd, Rs1, Rs2); ++ // addw: The result is clipped to 32 bits, then the sign bit is extended, ++ // and the result is stored in Rd ++ addw(Rd, Rd, zr); ++} + -+ // only if +VerifyOops -+ void verify_oop(Register reg, const char* s = "broken oop"); -+ void verify_oop_addr(Address addr, const char* s = "broken oop addr"); ++void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { ++ orr(Rd, Rs1, Rs2); ++ // addw: The result is clipped to 32 bits, then the sign bit is extended, ++ // and the result is stored in Rd ++ addw(Rd, Rd, zr); ++} + -+ void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {} -+ void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {} ++void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { ++ xorr(Rd, Rs1, Rs2); ++ // addw: The result is clipped to 32 bits, then the sign bit is extended, ++ // and the result is stored in Rd ++ addw(Rd, Rd, zr); ++} + -+#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) -+#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) ++// Note: load_unsigned_short used to be called load_unsigned_word. ++int MacroAssembler::load_unsigned_short(Register dst, Address src) { ++ int off = offset(); ++ lhu(dst, src); ++ return off; ++} + -+ // A more convenient access to fence for our purposes -+ // We used four bit to indicate the read and write bits in the predecessors and successors, -+ // and extended i for r, o for w if UseConservativeFence enabled. -+ enum Membar_mask_bits { -+ StoreStore = 0b0101, // (pred = ow + succ = ow) -+ LoadStore = 0b1001, // (pred = ir + succ = ow) -+ StoreLoad = 0b0110, // (pred = ow + succ = ir) -+ LoadLoad = 0b1010, // (pred = ir + succ = ir) -+ AnyAny = LoadStore | StoreLoad // (pred = iorw + succ = iorw) -+ }; ++int MacroAssembler::load_unsigned_byte(Register dst, Address src) { ++ int off = offset(); ++ lbu(dst, src); ++ return off; ++} + -+ void membar(uint32_t order_constraint); ++int MacroAssembler::load_signed_short(Register dst, Address src) { ++ int off = offset(); ++ lh(dst, src); ++ return off; ++} + -+ static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) { -+ predecessor = (order_constraint >> 2) & 0x3; -+ successor = order_constraint & 0x3; ++int MacroAssembler::load_signed_byte(Register dst, Address src) { ++ int off = offset(); ++ lb(dst, src); ++ return off; ++} + -+ // extend rw -> iorw: -+ // 01(w) -> 0101(ow) -+ // 10(r) -> 1010(ir) -+ // 11(rw)-> 1111(iorw) -+ if (UseConservativeFence) { -+ predecessor |= predecessor << 2; -+ successor |= successor << 2; -+ } ++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { ++ switch (size_in_bytes) { ++ case 8: ld(dst, src); break; ++ case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; ++ case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; ++ case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; ++ default: ShouldNotReachHere(); + } ++} + -+ static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) { -+ return ((predecessor & 0x3) << 2) | (successor & 0x3); ++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { ++ switch (size_in_bytes) { ++ case 8: sd(src, dst); break; ++ case 4: sw(src, dst); break; ++ case 2: sh(src, dst); break; ++ case 1: sb(src, dst); break; ++ default: ShouldNotReachHere(); + } ++} + -+ // prints msg, dumps registers and stops execution -+ void stop(const char* msg); -+ -+ static void debug64(char* msg, int64_t pc, int64_t regs[]); ++// reverse bytes in halfword in lower 16 bits and sign-extend ++// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) ++void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { ++ if (UseRVB) { ++ rev8(Rd, Rs); ++ srai(Rd, Rd, 48); ++ return; ++ } ++ assert_different_registers(Rs, tmp); ++ assert_different_registers(Rd, tmp); ++ srli(tmp, Rs, 8); ++ andi(tmp, tmp, 0xFF); ++ slli(Rd, Rs, 56); ++ srai(Rd, Rd, 48); // sign-extend ++ orr(Rd, Rd, tmp); ++} + -+ void unimplemented(const char* what = ""); ++// reverse bytes in lower word and sign-extend ++// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) ++void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ if (UseRVB) { ++ rev8(Rd, Rs); ++ srai(Rd, Rd, 32); ++ return; ++ } ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1, tmp2); ++ revb_h_w_u(Rd, Rs, tmp1, tmp2); ++ slli(tmp2, Rd, 48); ++ srai(tmp2, tmp2, 32); // sign-extend ++ srli(Rd, Rd, 16); ++ orr(Rd, Rd, tmp2); ++} + -+ void should_not_reach_here() { stop("should not reach here"); } ++// reverse bytes in halfword in lower 16 bits and zero-extend ++// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) ++void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { ++ if (UseRVB) { ++ rev8(Rd, Rs); ++ srli(Rd, Rd, 48); ++ return; ++ } ++ assert_different_registers(Rs, tmp); ++ assert_different_registers(Rd, tmp); ++ srli(tmp, Rs, 8); ++ andi(tmp, tmp, 0xFF); ++ andi(Rd, Rs, 0xFF); ++ slli(Rd, Rd, 8); ++ orr(Rd, Rd, tmp); ++} + -+ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, -+ Register tmp, -+ int offset) { -+ return RegisterOrConstant(tmp); ++// reverse bytes in halfwords in lower 32 bits and zero-extend ++// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) ++void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ if (UseRVB) { ++ rev8(Rd, Rs); ++ rori(Rd, Rd, 32); ++ roriw(Rd, Rd, 16); ++ zext_w(Rd, Rd); ++ return; + } ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1, tmp2); ++ srli(tmp2, Rs, 16); ++ revb_h_h_u(tmp2, tmp2, tmp1); ++ revb_h_h_u(Rd, Rs, tmp1); ++ slli(tmp2, tmp2, 16); ++ orr(Rd, Rd, tmp2); ++} + -+ static address target_addr_for_insn(address insn_addr); ++// This method is only used for revb_h ++// Rd = Rs[47:0] Rs[55:48] Rs[63:56] ++void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1); ++ srli(tmp1, Rs, 48); ++ andi(tmp2, tmp1, 0xFF); ++ slli(tmp2, tmp2, 8); ++ srli(tmp1, tmp1, 8); ++ orr(tmp1, tmp1, tmp2); ++ slli(Rd, Rs, 16); ++ orr(Rd, Rd, tmp1); ++} + -+ // Required platform-specific helpers for Label::patch_instructions. -+ // They _shadow_ the declarations in AbstractAssembler, which are undefined. -+ static int pd_patch_instruction_size(address branch, address target) ; -+ void pd_patch_instruction(address branch, address target) { -+ pd_patch_instruction_size(branch, target); ++// reverse bytes in each halfword ++// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] ++void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ if (UseRVB) { ++ assert_different_registers(Rs, tmp1); ++ assert_different_registers(Rd, tmp1); ++ rev8(Rd, Rs); ++ zext_w(tmp1, Rd); ++ roriw(tmp1, tmp1, 16); ++ slli(tmp1, tmp1, 32); ++ srli(Rd, Rd, 32); ++ roriw(Rd, Rd, 16); ++ zext_w(Rd, Rd); ++ orr(Rd, Rd, tmp1); ++ return; + } -+ static address pd_call_destination(address branch) { -+ return target_addr_for_insn(branch); ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1, tmp2); ++ revb_h_helper(Rd, Rs, tmp1, tmp2); ++ for (int i = 0; i < 3; ++i) { ++ revb_h_helper(Rd, Rd, tmp1, tmp2); + } ++} + -+ static int patch_oop(address insn_addr, address o); -+ address emit_trampoline_stub(int insts_call_instruction_offset, address target); -+ void emit_static_call_stub(); ++// reverse bytes in each word ++// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] ++void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ if (UseRVB) { ++ rev8(Rd, Rs); ++ rori(Rd, Rd, 32); ++ return; ++ } ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1, tmp2); ++ revb(Rd, Rs, tmp1, tmp2); ++ ror_imm(Rd, Rd, 32); ++} + -+ // The following 4 methods return the offset of the appropriate move instruction ++// reverse bytes in doubleword ++// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] ++void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ if (UseRVB) { ++ rev8(Rd, Rs); ++ return; ++ } ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1, tmp2); ++ andi(tmp1, Rs, 0xFF); ++ slli(tmp1, tmp1, 8); ++ for (int step = 8; step < 56; step += 8) { ++ srli(tmp2, Rs, step); ++ andi(tmp2, tmp2, 0xFF); ++ orr(tmp1, tmp1, tmp2); ++ slli(tmp1, tmp1, 8); ++ } ++ srli(Rd, Rs, 56); ++ andi(Rd, Rd, 0xFF); ++ orr(Rd, tmp1, Rd); ++} + -+ // Support for fast byte/short loading with zero extension (depending on particular CPU) -+ int load_unsigned_byte(Register dst, Address src); -+ int load_unsigned_short(Register dst, Address src); ++// rotate right with shift bits ++void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) ++{ ++ if (UseRVB) { ++ rori(dst, src, shift); ++ return; ++ } + -+ // Support for fast byte/short loading with sign extension (depending on particular CPU) -+ int load_signed_byte(Register dst, Address src); -+ int load_signed_short(Register dst, Address src); ++ assert_different_registers(dst, tmp); ++ assert_different_registers(src, tmp); ++ assert(shift < 64, "shift amount must be < 64"); ++ slli(tmp, src, 64 - shift); ++ srli(dst, src, shift); ++ orr(dst, dst, tmp); ++} + -+ // Load and store values by size and signed-ness -+ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); -+ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); ++void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { ++ if (is_imm_in_range(imm, 12, 0)) { ++ and_imm12(Rd, Rn, imm); ++ } else { ++ assert_different_registers(Rn, tmp); ++ li(tmp, imm); ++ andr(Rd, Rn, tmp); ++ } ++} + -+ public: -+ // enum used for riscv--x86 linkage to define return type of x86 function -+ enum ret_type { ret_type_void, ret_type_integral, ret_type_float, ret_type_double}; ++void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { ++ ld(tmp1, adr); ++ if (src.is_register()) { ++ orr(tmp1, tmp1, src.as_register()); ++ } else { ++ if (is_imm_in_range(src.as_constant(), 12, 0)) { ++ ori(tmp1, tmp1, src.as_constant()); ++ } else { ++ assert_different_registers(tmp1, tmp2); ++ li(tmp2, src.as_constant()); ++ orr(tmp1, tmp1, tmp2); ++ } ++ } ++ sd(tmp1, adr); ++} + -+ // Standard pseudoinstruction -+ void nop(); -+ void mv(Register Rd, Register Rs) ; -+ void notr(Register Rd, Register Rs); -+ void neg(Register Rd, Register Rs); -+ void negw(Register Rd, Register Rs); -+ void sext_w(Register Rd, Register Rs); -+ void zext_b(Register Rd, Register Rs); -+ void seqz(Register Rd, Register Rs); // set if = zero -+ void snez(Register Rd, Register Rs); // set if != zero -+ void sltz(Register Rd, Register Rs); // set if < zero -+ void sgtz(Register Rd, Register Rs); // set if > zero ++void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) { ++ if (UseCompressedClassPointers) { ++ lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); ++ if (CompressedKlassPointers::base() == NULL) { ++ slli(tmp, tmp, CompressedKlassPointers::shift()); ++ beq(trial_klass, tmp, L); ++ return; ++ } ++ decode_klass_not_null(tmp); ++ } else { ++ ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); ++ } ++ beq(trial_klass, tmp, L); ++} + -+ // Float pseudoinstruction -+ void fmv_s(FloatRegister Rd, FloatRegister Rs); -+ void fabs_s(FloatRegister Rd, FloatRegister Rs); // single-precision absolute value -+ void fneg_s(FloatRegister Rd, FloatRegister Rs); ++// Move an oop into a register. immediate is true if we want ++// immediate instructions and nmethod entry barriers are not enabled. ++// i.e. we are not going to patch this instruction while the code is being ++// executed by another thread. ++void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { ++ int oop_index; ++ if (obj == NULL) { ++ oop_index = oop_recorder()->allocate_oop_index(obj); ++ } else { ++#ifdef ASSERT ++ { ++ ThreadInVMfromUnknown tiv; ++ assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); ++ } ++#endif ++ oop_index = oop_recorder()->find_index(obj); ++ } ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); + -+ // Double pseudoinstruction -+ void fmv_d(FloatRegister Rd, FloatRegister Rs); -+ void fabs_d(FloatRegister Rd, FloatRegister Rs); -+ void fneg_d(FloatRegister Rd, FloatRegister Rs); ++ // nmethod entry barrier necessitate using the constant pool. They have to be ++ // ordered with respected to oop access. ++ // Using immediate literals would necessitate fence.i. ++ if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) { ++ address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address ++ ld_constant(dst, Address(dummy, rspec)); ++ } else ++ mv(dst, Address((address)obj, rspec)); ++} + -+ // Pseudoinstruction for control and status register -+ void rdinstret(Register Rd); // read instruction-retired counter -+ void rdcycle(Register Rd); // read cycle counter -+ void rdtime(Register Rd); // read time -+ void csrr(Register Rd, unsigned csr); // read csr -+ void csrw(unsigned csr, Register Rs); // write csr -+ void csrs(unsigned csr, Register Rs); // set bits in csr -+ void csrc(unsigned csr, Register Rs); // clear bits in csr -+ void csrwi(unsigned csr, unsigned imm); -+ void csrsi(unsigned csr, unsigned imm); -+ void csrci(unsigned csr, unsigned imm); -+ void frcsr(Register Rd); // read float-point csr -+ void fscsr(Register Rd, Register Rs); // swap float-point csr -+ void fscsr(Register Rs); // write float-point csr -+ void frrm(Register Rd); // read float-point rounding mode -+ void fsrm(Register Rd, Register Rs); // swap float-point rounding mode -+ void fsrm(Register Rs); // write float-point rounding mode -+ void fsrmi(Register Rd, unsigned imm); -+ void fsrmi(unsigned imm); -+ void frflags(Register Rd); // read float-point exception flags -+ void fsflags(Register Rd, Register Rs); // swap float-point exception flags -+ void fsflags(Register Rs); // write float-point exception flags -+ void fsflagsi(Register Rd, unsigned imm); -+ void fsflagsi(unsigned imm); ++// Move a metadata address into a register. ++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { ++ int oop_index; ++ if (obj == NULL) { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } else { ++ oop_index = oop_recorder()->find_index(obj); ++ } ++ RelocationHolder rspec = metadata_Relocation::spec(oop_index); ++ mv(dst, Address((address)obj, rspec)); ++} + -+ void beqz(Register Rs, const address &dest); -+ void blez(Register Rs, const address &dest); -+ void bgez(Register Rs, const address &dest); -+ void bltz(Register Rs, const address &dest); -+ void bgtz(Register Rs, const address &dest); -+ void bnez(Register Rs, const address &dest); -+ void la(Register Rd, Label &label); -+ void la(Register Rd, const address &dest); -+ void la(Register Rd, const Address &adr); -+ //label -+ void beqz(Register Rs, Label &l, bool is_far = false); -+ void bnez(Register Rs, Label &l, bool is_far = false); -+ void blez(Register Rs, Label &l, bool is_far = false); -+ void bgez(Register Rs, Label &l, bool is_far = false); -+ void bltz(Register Rs, Label &l, bool is_far = false); -+ void bgtz(Register Rs, Label &l, bool is_far = false); -+ void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ -+ void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } } -+ void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } } -+ void push_reg(Register Rs); -+ void pop_reg(Register Rd); -+ int push_reg(unsigned int bitset, Register stack); -+ int pop_reg(unsigned int bitset, Register stack); -+ static RegSet call_clobbered_registers(); -+ void push_call_clobbered_registers(); -+ void pop_call_clobbered_registers(); -+ void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0); -+ void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0); -+ -+ // if heap base register is used - reinit it with the correct value -+ void reinit_heapbase(); ++// Writes to stack successive pages until offset reached to check for ++// stack overflow + shadow pages. This clobbers tmp. ++void MacroAssembler::bang_stack_size(Register size, Register tmp) { ++ assert_different_registers(tmp, size, t0); ++ // Bang stack for total size given plus shadow page size. ++ // Bang one page at a time because large size can bang beyond yellow and ++ // red zones. ++ mv(t0, os::vm_page_size()); ++ Label loop; ++ bind(loop); ++ sub(tmp, sp, t0); ++ subw(size, size, t0); ++ sd(size, Address(tmp)); ++ bgtz(size, loop); + -+ void bind(Label& L) { -+ Assembler::bind(L); -+ // fences across basic blocks should not be merged -+ code()->clear_last_insn(); ++ // Bang down shadow pages too. ++ // At this point, (tmp-0) is the last address touched, so don't ++ // touch it again. (It was touched as (tmp-pagesize) but then tmp ++ // was post-decremented.) Skip this address by starting at i=1, and ++ // touch a few more pages below. N.B. It is important to touch all ++ // the way down to and including i=StackShadowPages. ++ for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { ++ // this could be any sized move but this is can be a debugging crumb ++ // so the bigger the better. ++ sub(tmp, tmp, os::vm_page_size()); ++ sd(size, Address(tmp, 0)); + } ++} + -+ // mv -+ void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); } -+ -+ inline void mv(Register Rd, int imm64) { li(Rd, (int64_t)imm64); } -+ inline void mv(Register Rd, long imm64) { li(Rd, (int64_t)imm64); } -+ inline void mv(Register Rd, long long imm64) { li(Rd, (int64_t)imm64); } -+ inline void mv(Register Rd, unsigned int imm64) { li(Rd, (int64_t)imm64); } -+ inline void mv(Register Rd, unsigned long imm64) { li(Rd, (int64_t)imm64); } -+ inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); } -+ -+ inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } ++SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { ++ assert_cond(masm != NULL); ++ int32_t offset = 0; ++ _masm = masm; ++ _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset); ++ _masm->lbu(t0, Address(t0, offset)); ++ _masm->beqz(t0, _label); ++} + -+ void mv(Register Rd, Address dest); -+ void mv(Register Rd, RegisterOrConstant src); ++SkipIfEqual::~SkipIfEqual() { ++ assert_cond(_masm != NULL); ++ _masm->bind(_label); ++ _masm = NULL; ++} + -+ // logic -+ void andrw(Register Rd, Register Rs1, Register Rs2); -+ void orrw(Register Rd, Register Rs1, Register Rs2); -+ void xorrw(Register Rd, Register Rs1, Register Rs2); ++void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) { ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ ld(dst, Address(xmethod, Method::const_offset())); ++ ld(dst, Address(dst, ConstMethod::constants_offset())); ++ ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes())); ++ ld(dst, Address(dst, mirror_offset)); ++ resolve_oop_handle(dst, tmp); ++} + -+ // vext -+ void vmnot_m(VectorRegister vd, VectorRegister vs); -+ void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); -+ void vfneg_v(VectorRegister vd, VectorRegister vs); ++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { ++ // OopHandle::resolve is an indirection. ++ assert_different_registers(result, tmp); ++ access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg); ++} + -+ // support for argument shuffling -+ void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0); -+ void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0); -+ void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0); -+ void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0); -+ void object_move(OopMap* map, -+ int oop_handle_offset, -+ int framesize_in_slots, -+ VMRegPair src, -+ VMRegPair dst, -+ bool is_receiver, -+ int* receiver_offset); -+ -+ void rt_call(address dest, Register tmp = t0); ++// ((WeakHandle)result).resolve() ++void MacroAssembler::resolve_weak_handle(Register result, Register tmp) { ++ assert_different_registers(result, tmp); ++ Label resolved; + -+ // revb -+ void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend -+ void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend -+ void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend -+ void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend -+ void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower -+ void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword -+ void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word -+ void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword ++ // A null weak handle resolves to null. ++ beqz(result, resolved); + -+ void andi(Register Rd, Register Rn, int64_t increment, Register tmp = t0); -+ void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1); ++ // Only 64 bit platforms support GCs that require a tmp register ++ // Only IN_HEAP loads require a thread_tmp register ++ // WeakHandle::resolve is an indirection like jweak. ++ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, ++ result, Address(result), tmp, noreg /* tmp_thread */); ++ bind(resolved); ++} + -+ // Support for serializing memory accesses between threads -+ void serialize_memory(Register thread, Register tmp1, Register tmp2); ++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, ++ Register dst, Address src, ++ Register tmp1, Register thread_tmp) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } else { ++ bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } ++} + -+ void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail); -+ void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail) ; -+ void cmpxchg(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result, bool result_as_bool = false); -+ void cmpxchg_weak(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result); -+ void cmpxchg_narrow_value_helper(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Register tmp1, Register tmp2, Register tmp3); -+ void cmpxchg_narrow_value(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result, bool result_as_bool, -+ Register tmp1, Register tmp2, Register tmp3); -+ void weak_cmpxchg_narrow_value(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result, -+ Register tmp1, Register tmp2, Register tmp3); ++void MacroAssembler::null_check(Register reg, int offset) { ++ if (needs_explicit_null_check(offset)) { ++ // provoke OS NULL exception if reg = NULL by ++ // accessing M[reg] w/o changing any registers ++ // NOTE: this is plenty to provoke a segv ++ ld(zr, Address(reg, 0)); ++ } else { ++ // nothing to do, (later) access of M[reg + offset] ++ // will provoke OS NULL exception if reg = NULL ++ } ++} + -+ void atomic_add(Register prev, RegisterOrConstant incr, Register addr); -+ void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); -+ void atomic_addal(Register prev, RegisterOrConstant incr, Register addr); -+ void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr); ++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, ++ Address dst, Register src, ++ Register tmp1, Register thread_tmp) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } else { ++ bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } ++} + -+ void atomic_xchg(Register prev, Register newv, Register addr); -+ void atomic_xchgw(Register prev, Register newv, Register addr); -+ void atomic_xchgal(Register prev, Register newv, Register addr); -+ void atomic_xchgalw(Register prev, Register newv, Register addr); -+ void atomic_xchgwu(Register prev, Register newv, Register addr); -+ void atomic_xchgalwu(Register prev, Register newv, Register addr); ++// Algorithm must match CompressedOops::encode. ++void MacroAssembler::encode_heap_oop(Register d, Register s) { ++ verify_oop(s, "broken oop in encode_heap_oop"); ++ if (CompressedOops::base() == NULL) { ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ srli(d, s, LogMinObjAlignmentInBytes); ++ } else { ++ mv(d, s); ++ } ++ } else { ++ Label notNull; ++ sub(d, s, xheapbase); ++ bgez(d, notNull); ++ mv(d, zr); ++ bind(notNull); ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ srli(d, d, CompressedOops::shift()); ++ } ++ } ++} + -+ // Biased locking support -+ // lock_reg and obj_reg must be loaded up with the appropriate values. -+ // swap_reg is killed. -+ // tmp_reg must be supplied and must not be t0 or t1 -+ // Optional slow case is for implementations (interpreter and C1) which branch to -+ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. -+ // Returns offset of first potentially-faulting instruction for null -+ // check info (currently consumed only by C1). If -+ // swap_reg_contains_mark is true then returns -1 as it is assumed -+ // the calling code has already passed any potential faults. -+ int biased_locking_enter(Register lock_reg, Register obj_reg, -+ Register swap_reg, Register tmp_reg, -+ bool swap_reg_contains_mark, -+ Label& done, Label* slow_case = NULL, -+ BiasedLockingCounters* counters = NULL, -+ Register flag = noreg); -+ void biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag = noreg); ++void MacroAssembler::load_klass(Register dst, Register src) { ++ if (UseCompressedClassPointers) { ++ lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); ++ decode_klass_not_null(dst); ++ } else { ++ ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); ++ } ++} + -+ static bool far_branches() { -+ return ReservedCodeCacheSize > branch_range; ++void MacroAssembler::store_klass(Register dst, Register src) { ++ // FIXME: Should this be a store release? concurrent gcs assumes ++ // klass length is valid if klass field is not null. ++ if (UseCompressedClassPointers) { ++ encode_klass_not_null(src); ++ sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); ++ } else { ++ sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); + } ++} + -+ //atomic -+ void atomic_incw(Register counter_addr, Register tmp1); -+ void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { -+ la(tmp1, counter_addr); -+ atomic_incw(tmp1, tmp2); ++void MacroAssembler::store_klass_gap(Register dst, Register src) { ++ if (UseCompressedClassPointers) { ++ // Store to klass gap in destination ++ sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); + } ++} + -+ // Jumps that can reach anywhere in the code cache. -+ // Trashes tmp. -+ void far_call(Address entry, Register tmp = t0); -+ void far_jump(Address entry, Register tmp = t0); ++void MacroAssembler::decode_klass_not_null(Register r) { ++ decode_klass_not_null(r, r); ++} + -+ static int far_branch_size() { -+ if (far_branches()) { -+ return 2 * 4; // auipc + jalr, see far_call() & far_jump() ++void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { ++ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ ++ if (CompressedKlassPointers::base() == NULL) { ++ if (CompressedKlassPointers::shift() != 0) { ++ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ slli(dst, src, LogKlassAlignmentInBytes); + } else { -+ return 4; ++ mv(dst, src); + } ++ return; + } + -+ void load_byte_map_base(Register reg); -+ -+ void bang_stack_with_offset(int offset) { -+ // stack grows down, caller passes positive offset -+ assert(offset > 0, "must bang with negative offset"); -+ sub(t1, sp, offset); -+ sd(zr, Address(t1)); ++ Register xbase = dst; ++ if (dst == src) { ++ xbase = tmp; + } + -+ void la_patchable(Register reg1, const Address &dest, int32_t &offset); ++ assert_different_registers(src, xbase); ++ li(xbase, (uintptr_t)CompressedKlassPointers::base()); + -+ virtual void _call_Unimplemented(address call_site) { -+ mv(t1, call_site); ++ if (CompressedKlassPointers::shift() != 0) { ++ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ assert_different_registers(t0, xbase); ++ shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); ++ } else { ++ add(dst, xbase, src); + } -+ #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) + -+#ifdef COMPILER2 -+ void spill(Register Rx, bool is64, int offset) { -+ is64 ? sd(Rx, Address(sp, offset)) -+ : sw(Rx, Address(sp, offset)); -+ } ++ if (xbase == xheapbase) { reinit_heapbase(); } ++} + -+ void spill(FloatRegister Rx, bool is64, int offset) { -+ is64 ? fsd(Rx, Address(sp, offset)) -+ : fsw(Rx, Address(sp, offset)); -+ } ++void MacroAssembler::encode_klass_not_null(Register r) { ++ encode_klass_not_null(r, r); ++} + -+ void spill(VectorRegister Vx, int offset) { -+ add(t0, sp, offset); -+ vs1r_v(Vx, t0); ++void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { ++ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ ++ if (CompressedKlassPointers::base() == NULL) { ++ if (CompressedKlassPointers::shift() != 0) { ++ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ srli(dst, src, LogKlassAlignmentInBytes); ++ } else { ++ mv(dst, src); ++ } ++ return; + } + -+ void unspill(Register Rx, bool is64, int offset) { -+ is64 ? ld(Rx, Address(sp, offset)) -+ : lw(Rx, Address(sp, offset)); ++ if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && ++ CompressedKlassPointers::shift() == 0) { ++ zero_extend(dst, src, 32); ++ return; + } + -+ void unspillu(Register Rx, bool is64, int offset) { -+ is64 ? ld(Rx, Address(sp, offset)) -+ : lwu(Rx, Address(sp, offset)); ++ Register xbase = dst; ++ if (dst == src) { ++ xbase = tmp; + } + -+ void unspill(FloatRegister Rx, bool is64, int offset) { -+ is64 ? fld(Rx, Address(sp, offset)) -+ : flw(Rx, Address(sp, offset)); ++ assert_different_registers(src, xbase); ++ li(xbase, (intptr_t)CompressedKlassPointers::base()); ++ sub(dst, src, xbase); ++ if (CompressedKlassPointers::shift() != 0) { ++ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ srli(dst, dst, LogKlassAlignmentInBytes); ++ } ++ if (xbase == xheapbase) { ++ reinit_heapbase(); + } ++} + -+ void unspill(VectorRegister Vx, int offset) { -+ add(t0, sp, offset); -+ vl1r_v(Vx, t0); ++void MacroAssembler::decode_heap_oop_not_null(Register r) { ++ decode_heap_oop_not_null(r, r); ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { ++ assert(UseCompressedOops, "should only be used for compressed headers"); ++ assert(Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ slli(dst, src, LogMinObjAlignmentInBytes); ++ if (CompressedOops::base() != NULL) { ++ add(dst, xheapbase, dst); ++ } ++ } else { ++ assert(CompressedOops::base() == NULL, "sanity"); ++ mv(dst, src); + } ++} + -+ void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, -+ int vec_reg_size_in_bytes) { -+ assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size"); -+ unspill(v0, src_offset); -+ spill(v0, dst_offset); ++void MacroAssembler::decode_heap_oop(Register d, Register s) { ++ if (CompressedOops::base() == NULL) { ++ if (CompressedOops::shift() != 0 || d != s) { ++ slli(d, s, CompressedOops::shift()); ++ } ++ } else { ++ Label done; ++ mv(d, s); ++ beqz(s, done); ++ shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); ++ bind(done); + } ++ verify_oop(d, "broken oop in decode_heap_oop"); ++} + -+#endif // COMPILER2 ++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); ++} + -+ // Frame creation and destruction shared between JITs. -+ void build_frame(int framesize); -+ void remove_frame(int framesize); ++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); ++} + -+ void reserved_stack_check(); -+ void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype); -+ void read_polling_page(Register r, address page, relocInfo::relocType rtype); -+ void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); -+ // Return: the call PC -+ address trampoline_call(Address entry); -+ address ic_call(address entry, jint method_index = 0); -+ // Support for memory inc/dec -+ // n.b. increment/decrement calls with an Address destination will -+ // need to use a scratch register to load the value to be -+ // incremented. increment/decrement calls which add or subtract a -+ // constant value other than sign-extended 12-bit immediate will need -+ // to use a 2nd scratch register to hold the constant. so, an address -+ // increment/decrement may trash both t0 and t1. -+ -+ void increment(const Address dst, int64_t value = 1); -+ void incrementw(const Address dst, int32_t value = 1); -+ -+ void decrement(const Address dst, int64_t value = 1); -+ void decrementw(const Address dst, int32_t value = 1); -+ void cmpptr(Register src1, Address src2, Label& equal); -+ void oop_equal(Register obj1, Register obj2, Label& equal, bool is_far = false); // cmpoop -+ void oop_nequal(Register obj1, Register obj2, Label& nequal, bool is_far = false); -+ void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); -+#ifdef COMPILER2 -+ void minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, bool is_double, bool is_min); ++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp); ++} + -+ address arrays_equals(Register a1, Register a2, Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, Register result, Register cnt1, int elem_size); ++// Used for storing NULLs. ++void MacroAssembler::store_heap_oop_null(Address dst) { ++ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); ++} + -+ void string_equals(Register a1, Register a2, Register result, Register cnt1, -+ int elem_size); -+ void string_compare(Register str1, Register str2, -+ Register cnt1, Register cnt2, Register result, -+ Register tmp1, Register tmp2, Register tmp3, int ae); -+ void string_indexof_char_short(Register str1, Register cnt1, -+ Register ch, Register result, -+ bool isL); -+ void string_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ bool isL); -+ void string_indexof(Register str1, Register str2, -+ Register cnt1, Register cnt2, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, -+ Register result, int ae); -+ void string_indexof_linearscan(Register haystack, Register needle, -+ Register haystack_len, Register needle_len, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ int needle_con_cnt, Register result, int ae); -+ void compute_index(Register str1, Register trailing_zero, Register match_mask, -+ Register result, Register char_tmp, Register tmp, -+ bool haystack_isL); -+ void compute_match_mask(Register src, Register pattern, Register match_mask, -+ Register mask1, Register mask2); -+ void cad(Register dst, Register src1, Register src2, Register carry); -+ void cadc(Register dst, Register src1, Register src2, Register carry); -+ void adc(Register dst, Register src1, Register src2, Register carry); -+ void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, -+ Register src1, Register src2, Register carry = t0); -+ void mul_add(Register out, Register in, Register offset, -+ Register len, Register k, Register tmp1, Register tmp2); -+ void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, -+ Register y, Register y_idx, Register z, -+ Register carry, Register product, -+ Register idx, Register kdx); -+ void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, -+ Register y, Register y_idx, Register z, -+ Register carry, Register product, -+ Register idx, Register kdx); -+ void multiply_128_x_128_loop(Register y, Register z, -+ Register carry, Register carry2, -+ Register idx, Register jdx, -+ Register yz_idx1, Register yz_idx2, -+ Register tmp, Register tmp3, Register tmp4, -+ Register tmp6, Register product_hi); -+ void multiply_to_len(Register x, Register xlen, Register y, Register ylen, -+ Register z, Register zlen, -+ Register tmp1, Register tmp2, Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, Register product_hi); -+#endif // COMPILER2 -+ void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); -+ void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); -+ -+ void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1); -+ void zero_words(Register base, uint64_t cnt); -+ address zero_words(Register ptr, Register cnt); -+ void fill_words(Register base, Register cnt, Register value); -+ void zero_memory(Register addr, Register len, Register tmp1); ++int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, ++ bool want_remainder) ++{ ++ // Full implementation of Java idiv and irem. The function ++ // returns the (pc) offset of the div instruction - may be needed ++ // for implicit exceptions. ++ // ++ // input : rs1: dividend ++ // rs2: divisor ++ // ++ // result: either ++ // quotient (= rs1 idiv rs2) ++ // remainder (= rs1 irem rs2) + -+ // shift left by shamt and add -+ void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt); + -+#ifdef COMPILER2 -+ // refer to conditional_branches and float_conditional_branches -+ static const int bool_test_bits = 3; -+ static const int neg_cond_bits = 2; -+ static const int unsigned_branch_mask = 1 << bool_test_bits; -+ static const int double_branch_mask = 1 << bool_test_bits; ++ int idivl_offset = offset(); ++ if (!want_remainder) { ++ divw(result, rs1, rs2); ++ } else { ++ remw(result, rs1, rs2); // result = rs1 % rs2; ++ } ++ return idivl_offset; ++} + -+ void enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src); ++int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, ++ bool want_remainder) ++{ ++ // Full implementation of Java ldiv and lrem. The function ++ // returns the (pc) offset of the div instruction - may be needed ++ // for implicit exceptions. ++ // ++ // input : rs1: dividend ++ // rs2: divisor ++ // ++ // result: either ++ // quotient (= rs1 idiv rs2) ++ // remainder (= rs1 irem rs2) + -+ // cmp -+ void cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far = false); -+ void float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far = false); ++ int idivq_offset = offset(); ++ if (!want_remainder) { ++ div(result, rs1, rs2); ++ } else { ++ rem(result, rs1, rs2); // result = rs1 % rs2; ++ } ++ return idivq_offset; ++} + -+ void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far = false); -+ void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far = false); ++// Look up the method for a megamorpic invkkeinterface call. ++// The target method is determined by . ++// The receiver klass is in recv_klass. ++// On success, the result will be in method_result, and execution falls through. ++// On failure, execution transfers to the given label. ++void MacroAssembler::lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_tmp, ++ Label& L_no_such_interface, ++ bool return_method) { ++ assert_different_registers(recv_klass, intf_klass, scan_tmp); ++ assert_different_registers(method_result, intf_klass, scan_tmp); ++ assert(recv_klass != method_result || !return_method, ++ "recv_klass can be destroyed when mehtid isn't needed"); ++ assert(itable_index.is_constant() || itable_index.as_register() == method_result, ++ "caller must be same register for non-constant itable index as for method"); + -+ // intrinsic methods implemented by vector instructions -+ void string_equals_v(Register a1, Register a2, Register result, Register cnt1, int elem_size); -+ void arrays_equals_v(Register a1, Register a2, Register result, Register cnt1, int elem_size); -+ void string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, -+ Register result, Register tmp1, Register tmp2, int encForm); ++ // Compute start of first itableOffsetEntry (which is at the end of the vtable). ++ int vtable_base = in_bytes(Klass::vtable_start_offset()); ++ int itentry_off = itableMethodEntry::method_offset_in_bytes(); ++ int scan_step = itableOffsetEntry::size() * wordSize; ++ int vte_size = vtableEntry::size_in_bytes(); ++ assert(vte_size == wordSize, "else adjust times_vte_scale"); + -+ void clear_array_v(Register base, Register cnt); -+ address byte_array_inflate_v(Register src, Register dst, Register len, Register tmp); -+ void char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp); -+ void encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp); ++ lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); + -+ address has_negatives_v(Register ary, Register len, Register result, Register tmp); -+#endif ++ // %%% Could store the aligned, prescaled offset in the klassoop. ++ shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); ++ add(scan_tmp, scan_tmp, vtable_base); + -+ // Here the float instructions with safe deal with some exceptions. -+ // e.g. convert from NaN, +Inf, -Inf to int, float, double -+ // will trigger exception, we need to deal with these situations -+ // to get correct results. -+ void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0); -+ void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0); -+ void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0); -+ void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0); ++ if (return_method) { ++ // Adjust recv_klass by scaled itable_index, so we can free itable_index. ++ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ if (itable_index.is_register()) { ++ slli(t0, itable_index.as_register(), 3); ++ } else { ++ li(t0, itable_index.as_constant() << 3); ++ } ++ add(recv_klass, recv_klass, t0); ++ if (itentry_off) { ++ add(recv_klass, recv_klass, itentry_off); ++ } ++ } + -+ // vector load/store unit-stride instructions -+ void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { -+ switch (sew) { -+ case Assembler::e64: -+ vle64_v(vd, base, vm); -+ break; -+ case Assembler::e32: -+ vle32_v(vd, base, vm); -+ break; -+ case Assembler::e16: -+ vle16_v(vd, base, vm); -+ break; -+ case Assembler::e8: // fall through -+ default: -+ vle8_v(vd, base, vm); -+ break; -+ } ++ Label search, found_method; ++ ++ ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); ++ beq(intf_klass, method_result, found_method); ++ bind(search); ++ // Check that the previous entry is non-null. A null entry means that ++ // the receiver class doens't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ beqz(method_result, L_no_such_interface, /* is_far */ true); ++ addi(scan_tmp, scan_tmp, scan_step); ++ ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); ++ bne(intf_klass, method_result, search); ++ ++ bind(found_method); ++ ++ // Got a hit. ++ if (return_method) { ++ lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes())); ++ add(method_result, recv_klass, scan_tmp); ++ ld(method_result, Address(method_result)); + } ++} + -+ void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { -+ switch (sew) { -+ case Assembler::e64: -+ vse64_v(store_data, base, vm); -+ break; -+ case Assembler::e32: -+ vse32_v(store_data, base, vm); -+ break; -+ case Assembler::e16: -+ vse16_v(store_data, base, vm); -+ break; -+ case Assembler::e8: // fall through -+ default: -+ vse8_v(store_data, base, vm); -+ break; -+ } ++// virtual method calling ++void MacroAssembler::lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result) { ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == 8, ++ "adjust the scaling in the code below"); ++ int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes(); ++ ++ if (vtable_index.is_register()) { ++ shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); ++ ld(method_result, Address(method_result, vtable_offset_in_bytes)); ++ } else { ++ vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; ++ ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); + } ++} + -+ static const int zero_words_block_size; ++void MacroAssembler::membar(uint32_t order_constraint) { ++ address prev = pc() - NativeMembar::instruction_size; ++ address last = code()->last_insn(); + -+ void cast_primitive_type(BasicType type, Register Rt) { -+ switch (type) { -+ case T_BOOLEAN: -+ sltu(Rt, zr, Rt); -+ break; -+ case T_CHAR : -+ zero_extend(Rt, Rt, 16); -+ break; -+ case T_BYTE : -+ sign_extend(Rt, Rt, 8); -+ break; -+ case T_SHORT : -+ sign_extend(Rt, Rt, 16); -+ break; -+ case T_INT : -+ addw(Rt, Rt, zr); -+ break; -+ case T_LONG : /* nothing to do */ break; -+ case T_VOID : /* nothing to do */ break; -+ case T_FLOAT : /* nothing to do */ break; -+ case T_DOUBLE : /* nothing to do */ break; -+ default: ShouldNotReachHere(); -+ } ++ if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) { ++ NativeMembar *bar = NativeMembar_at(prev); ++ // We are merging two memory barrier instructions. On RISCV we ++ // can do this simply by ORing them together. ++ bar->set_kind(bar->get_kind() | order_constraint); ++ BLOCK_COMMENT("merged membar"); ++ } else { ++ code()->set_last_insn(pc()); ++ ++ uint32_t predecessor = 0; ++ uint32_t successor = 0; ++ ++ membar_mask_to_pred_succ(order_constraint, predecessor, successor); ++ fence(predecessor, successor); + } ++} + -+ // float cmp with unordered_result -+ void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); -+ void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); ++// Form an addres from base + offset in Rd. Rd my or may not ++// actually be used: you must use the Address that is returned. It ++// is up to you to ensure that the shift provided mathces the size ++// of your data. ++Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) { ++ if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12 ++ return Address(base, byte_offset); ++ } + -+ // Zero/Sign-extend -+ void zero_extend(Register dst, Register src, int bits); -+ void sign_extend(Register dst, Register src, int bits); ++ // Do it the hard way ++ mv(Rd, byte_offset); ++ add(Rd, base, Rd); ++ return Address(Rd); ++} + -+ // compare src1 and src2 and get -1/0/1 in dst. -+ // if [src1 > src2], dst = 1; -+ // if [src1 == src2], dst = 0; -+ // if [src1 < src2], dst = -1; -+ void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0); ++void MacroAssembler::check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register tmp_reg, ++ Label& L_success) { ++ Label L_failure; ++ check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL); ++ check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL); ++ bind(L_failure); ++} + -+ void load_constant_pool_cache(Register cpool, Register method); ++void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { ++ ld(t0, Address(xthread, JavaThread::polling_word_offset())); ++ if (acquire) { ++ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ } ++ if (at_return) { ++ bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */); ++ } else { ++ andi(t0, t0, SafepointMechanism::poll_bit()); ++ bnez(t0, slow_path, true /* is_far */); ++ } ++} + -+ void load_max_stack(Register dst, Register method); ++void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, ++ Label &succeed, Label *fail) { ++ // oldv holds comparison value ++ // newv holds value to write in exchange ++ // addr identifies memory word to compare against/update ++ Label retry_load, nope; ++ bind(retry_load); ++ // Load reserved from the memory location ++ lr_d(tmp, addr, Assembler::aqrl); ++ // Fail and exit if it is not what we expect ++ bne(tmp, oldv, nope); ++ // If the store conditional succeeds, tmp will be zero ++ sc_d(tmp, newv, addr, Assembler::rl); ++ beqz(tmp, succeed); ++ // Retry only when the store conditional failed ++ j(retry_load); + -+private: -+ void load_prototype_header(Register dst, Register src); -+ void repne_scan(Register addr, Register value, Register count, Register tmp); ++ bind(nope); ++ membar(AnyAny); ++ mv(oldv, tmp); ++ if (fail != NULL) { ++ j(*fail); ++ } ++} + -+#ifdef ASSERT -+ // Macro short-hand support to clean-up after a failed call to trampoline -+ // call generation (see trampoline_call() below), when a set of Labels must -+ // be reset (before returning). -+#define reset_labels1(L1) L1.reset() -+#define reset_labels2(L1, L2) L1.reset(); L2.reset() -+#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3) -+#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5) -+#endif ++void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, ++ Label &succeed, Label *fail) { ++ assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); ++ cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); ++} + -+ // Return true if an address is within the 48-bit RISCV64 address space. -+ bool is_valid_riscv64_address(address addr) { -+ // sv48: must have bits 63-48 all equal to bit 47 -+ return ((uintptr_t)addr >> 47) == 0; ++void MacroAssembler::load_reserved(Register addr, ++ enum operand_size size, ++ Assembler::Aqrl acquire) { ++ switch (size) { ++ case int64: ++ lr_d(t0, addr, acquire); ++ break; ++ case int32: ++ lr_w(t0, addr, acquire); ++ break; ++ case uint32: ++ lr_w(t0, addr, acquire); ++ zero_extend(t0, t0, 32); ++ break; ++ default: ++ ShouldNotReachHere(); + } ++} + -+ void ld_constant(Register dest, const Address &const_addr) { -+ if (NearCpool) { -+ ld(dest, const_addr); -+ } else { -+ int32_t offset = 0; -+ la_patchable(dest, InternalAddress(const_addr.target()), offset); -+ ld(dest, Address(dest, offset)); -+ } ++void MacroAssembler::store_conditional(Register addr, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl release) { ++ switch (size) { ++ case int64: ++ sc_d(t0, new_val, addr, release); ++ break; ++ case int32: ++ case uint32: ++ sc_w(t0, new_val, addr, release); ++ break; ++ default: ++ ShouldNotReachHere(); + } ++} + -+ int bitset_to_regs(unsigned int bitset, unsigned char* regs); -+ Address add_memory_helper(const Address dst); + -+ void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); -+ void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); ++void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Register tmp1, Register tmp2, Register tmp3) { ++ assert(size == int8 || size == int16, "unsupported operand size"); + -+#ifdef COMPILER2 -+ void element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, -+ VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE); -+#endif // COMPILER2 -+}; ++ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; + -+#ifdef ASSERT -+inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } -+#endif ++ andi(shift, addr, 3); ++ slli(shift, shift, 3); + -+/** -+ * class SkipIfEqual: -+ * -+ * Instantiating this class will result in assembly code being output that will -+ * jump around any code emitted between the creation of the instance and it's -+ * automatic destruction at the end of a scope block, depending on the value of -+ * the flag passed to the constructor, which will be checked at run-time. -+ */ -+class SkipIfEqual { -+ private: -+ MacroAssembler* _masm; -+ Label _label; ++ andi(aligned_addr, addr, ~3); + -+ public: -+ SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); -+ ~SkipIfEqual(); -+}; -+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp -new file mode 100644 -index 000000000..fc2b191c0 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ if (size == int8) { ++ addi(mask, zr, 0xff); ++ } else { ++ // size == int16 case ++ addi(mask, zr, -1); ++ zero_extend(mask, mask, 16); ++ } ++ sll(mask, mask, shift); + -+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP -+#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP ++ xori(not_mask, mask, -1); + -+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP -diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -new file mode 100644 -index 000000000..d049193d4 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -@@ -0,0 +1,440 @@ -+/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ sll(expected, expected, shift); ++ andr(expected, expected, mask); + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "classfile/javaClasses.inline.hpp" -+#include "interpreter/interpreter.hpp" -+#include "interpreter/interpreterRuntime.hpp" -+#include "memory/allocation.inline.hpp" -+#include "prims/methodHandles.hpp" -+#include "runtime/flags/flagSetting.hpp" -+#include "runtime/frame.inline.hpp" ++ sll(new_val, new_val, shift); ++ andr(new_val, new_val, mask); ++} + -+#define __ _masm-> ++// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. ++// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w, ++// which are forced to work with 4-byte aligned address. ++void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, bool result_as_bool, ++ Register tmp1, Register tmp2, Register tmp3) { ++ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; ++ assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); ++ cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); + -+#ifdef PRODUCT -+#define BLOCK_COMMENT(str) /* nothing */ -+#else -+#define BLOCK_COMMENT(str) __ block_comment(str) -+#endif ++ Label retry, fail, done; + -+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ bind(retry); ++ lr_w(old, aligned_addr, acquire); ++ andr(tmp, old, mask); ++ bne(tmp, expected, fail); + -+void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { -+ if (VerifyMethodHandles) { -+ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), -+ "MH argument is a Class"); -+ } -+ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); -+} ++ andr(tmp, old, not_mask); ++ orr(tmp, tmp, new_val); ++ sc_w(tmp, tmp, aligned_addr, release); ++ bnez(tmp, retry); + -+#ifdef ASSERT -+static int check_nonzero(const char* xname, int x) { -+ assert(x != 0, "%s should be nonzero", xname); -+ return x; -+} -+#define NONZERO(x) check_nonzero(#x, x) -+#else //ASSERT -+#define NONZERO(x) (x) -+#endif //PRODUCT ++ if (result_as_bool) { ++ addi(result, zr, 1); ++ j(done); + -+#ifdef ASSERT -+void MethodHandles::verify_klass(MacroAssembler* _masm, -+ Register obj, SystemDictionary::WKID klass_id, -+ const char* error_message) { -+ InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id); -+ Klass* klass = SystemDictionary::well_known_klass(klass_id); -+ Register temp = t1; -+ Register temp2 = t0; // used by MacroAssembler::cmpptr -+ Label L_ok, L_bad; -+ BLOCK_COMMENT("verify_klass {"); -+ __ verify_oop(obj); -+ __ beqz(obj, L_bad); -+ __ push_reg(RegSet::of(temp, temp2), sp); -+ __ load_klass(temp, obj); -+ __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok); -+ intptr_t super_check_offset = klass->super_check_offset(); -+ __ ld(temp, Address(temp, super_check_offset)); -+ __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok); -+ __ pop_reg(RegSet::of(temp, temp2), sp); -+ __ bind(L_bad); -+ __ stop(error_message); -+ __ BIND(L_ok); -+ __ pop_reg(RegSet::of(temp, temp2), sp); -+ BLOCK_COMMENT("} verify_klass"); ++ bind(fail); ++ mv(result, zr); ++ ++ bind(done); ++ } else { ++ andr(tmp, old, mask); ++ ++ bind(fail); ++ srl(result, tmp, shift); ++ ++ if (size == int8) { ++ sign_extend(result, result, 8); ++ } else { ++ // size == int16 case ++ sign_extend(result, result, 16); ++ } ++ } +} + -+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { } ++// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement ++// the weak CAS stuff. The major difference is that it just failed when store conditional ++// failed. ++void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, ++ Register tmp1, Register tmp2, Register tmp3) { ++ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; ++ assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); ++ cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); + -+#endif //ASSERT ++ Label succ, fail, done; + -+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, -+ bool for_compiler_entry) { -+ assert(method == xmethod, "interpreter calling convention"); -+ Label L_no_such_method; -+ __ beqz(xmethod, L_no_such_method); -+ __ verify_method_ptr(method); ++ lr_w(old, aligned_addr, acquire); ++ andr(tmp, old, mask); ++ bne(tmp, expected, fail); + -+ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { -+ Label run_compiled_code; -+ // JVMTI events, such as single-stepping, are implemented partly by avoiding running -+ // compiled code in threads for which the event is enabled. Check here for -+ // interp_only_mode if these events CAN be enabled. ++ andr(tmp, old, not_mask); ++ orr(tmp, tmp, new_val); ++ sc_w(tmp, tmp, aligned_addr, release); ++ beqz(tmp, succ); + -+ __ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset())); -+ __ beqz(t0, run_compiled_code); -+ __ ld(t0, Address(method, Method::interpreter_entry_offset())); -+ __ jr(t0); -+ __ BIND(run_compiled_code); -+ } ++ bind(fail); ++ addi(result, zr, 1); ++ j(done); + -+ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : -+ Method::from_interpreted_offset(); -+ __ ld(t0,Address(method, entry_offset)); -+ __ jr(t0); -+ __ bind(L_no_such_method); -+ __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry())); ++ bind(succ); ++ mv(result, zr); ++ ++ bind(done); +} + -+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, -+ Register recv, Register method_temp, -+ Register temp2, -+ bool for_compiler_entry) { -+ BLOCK_COMMENT("jump_to_lambda_form {"); -+ // This is the initial entry point of a lazy method handle. -+ // After type checking, it picks up the invoker from the LambdaForm. -+ assert_different_registers(recv, method_temp, temp2); -+ assert(recv != noreg, "required register"); -+ assert(method_temp == xmethod, "required register for loading method"); ++void MacroAssembler::cmpxchg(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, bool result_as_bool) { ++ assert(size != int8 && size != int16, "unsupported operand size"); + -+ // Load the invoker, as MH -> MH.form -> LF.vmentry -+ __ verify_oop(recv); -+ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2); -+ __ verify_oop(method_temp); -+ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2); -+ __ verify_oop(method_temp); -+ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2); -+ __ verify_oop(method_temp); -+ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg); ++ Label retry_load, done, ne_done; ++ bind(retry_load); ++ load_reserved(addr, size, acquire); ++ bne(t0, expected, ne_done); ++ store_conditional(addr, new_val, size, release); ++ bnez(t0, retry_load); + -+ if (VerifyMethodHandles && !for_compiler_entry) { -+ // make sure recv is already on stack -+ __ ld(temp2, Address(method_temp, Method::const_offset())); -+ __ load_sized_value(temp2, -+ Address(temp2, ConstMethod::size_of_parameters_offset()), -+ sizeof(u2), /*is_signed*/ false); -+ Label L; -+ __ ld(t0, __ argument_address(temp2, -1)); -+ __ oop_equal(recv, t0, L); -+ __ ld(x10, __ argument_address(temp2, -1)); -+ __ ebreak(); -+ __ BIND(L); ++ // equal, succeed ++ if (result_as_bool) { ++ li(result, 1); ++ } else { ++ mv(result, expected); + } ++ j(done); + -+ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); -+ BLOCK_COMMENT("} jump_to_lambda_form"); ++ // not equal, failed ++ bind(ne_done); ++ if (result_as_bool) { ++ mv(result, zr); ++ } else { ++ mv(result, t0); ++ } ++ ++ bind(done); +} + -+// Code generation -+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, -+ vmIntrinsics::ID iid) { -+ const bool not_for_compiler_entry = false; // this is the interpreter entry -+ assert(is_signature_polymorphic(iid), "expected invoke iid"); -+ if (iid == vmIntrinsics::_invokeGeneric || -+ iid == vmIntrinsics::_compiledLambdaForm) { -+ // Perhaps surprisingly, the symbolic references visible to Java are not directly used. -+ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. -+ // They all allow an appendix argument. -+ __ ebreak(); // empty stubs make SG sick -+ return NULL; -+ } -+ -+ // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted) -+ // xmethod: Method* -+ // x13: argument locator (parameter slot count, added to sp) -+ // x11: used as temp to hold mh or receiver -+ Register argp = x13; // argument list ptr, live on error paths -+ Register mh = x11; // MH receiver; dies quickly and is recycled ++void MacroAssembler::cmpxchg_weak(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result) { ++ Label fail, done, sc_done; ++ load_reserved(addr, size, acquire); ++ bne(t0, expected, fail); ++ store_conditional(addr, new_val, size, release); ++ beqz(t0, sc_done); + -+ // here's where control starts out: -+ __ align(CodeEntryAlignment); -+ address entry_point = __ pc(); ++ // fail ++ bind(fail); ++ li(result, 1); ++ j(done); + -+ if (VerifyMethodHandles) { -+ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ // sc_done ++ bind(sc_done); ++ mv(result, 0); ++ bind(done); ++} + -+ Label L; -+ BLOCK_COMMENT("verify_intrinsic_id {"); -+ __ lhu(t0, Address(xmethod, Method::intrinsic_id_offset_in_bytes())); -+ __ mv(t1, (int) iid); -+ __ beq(t0, t1, L); -+ if (iid == vmIntrinsics::_linkToVirtual || -+ iid == vmIntrinsics::_linkToSpecial) { -+ // could do this for all kinds, but would explode assembly code size -+ trace_method_handle(_masm, "bad Method*::intrinsic_id"); -+ } -+ __ ebreak(); -+ __ bind(L); -+ BLOCK_COMMENT("} verify_intrinsic_id"); -+ } ++#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ ++void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ ++ prev = prev->is_valid() ? prev : zr; \ ++ if (incr.is_register()) { \ ++ AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ ++ } else { \ ++ mv(t0, incr.as_constant()); \ ++ AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ ++ } \ ++ return; \ ++} + -+ // First task: Find out how big the argument list is. -+ Address x13_first_arg_addr; -+ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); -+ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); -+ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { -+ __ ld(argp, Address(xmethod, Method::const_offset())); -+ __ load_sized_value(argp, -+ Address(argp, ConstMethod::size_of_parameters_offset()), -+ sizeof(u2), /*is_signed*/ false); -+ x13_first_arg_addr = __ argument_address(argp, -1); -+ } else { -+ DEBUG_ONLY(argp = noreg); -+ } ++ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) ++ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) ++ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) ++ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) + -+ if (!is_signature_polymorphic_static(iid)) { -+ __ ld(mh, x13_first_arg_addr); -+ DEBUG_ONLY(argp = noreg); -+ } ++#undef ATOMIC_OP + -+ // x13_first_arg_addr is live! ++#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ ++void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ ++ prev = prev->is_valid() ? prev : zr; \ ++ AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ ++ return; \ ++} + -+ trace_method_handle_interpreter_entry(_masm, iid); -+ if (iid == vmIntrinsics::_invokeBasic) { -+ generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry); ++ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) ++ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) ++ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) ++ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) + -+ } else { -+ // Adjust argument list by popping the trailing MemberName argument. -+ Register recv = noreg; -+ if (MethodHandles::ref_kind_has_receiver(ref_kind)) { -+ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. -+ __ ld(recv = x12, x13_first_arg_addr); -+ } -+ DEBUG_ONLY(argp = noreg); -+ Register xmember = xmethod; // MemberName ptr; incoming method ptr is dead now -+ __ pop_reg(xmember); // extract last argument -+ generate_method_handle_dispatch(_masm, iid, recv, xmember, not_for_compiler_entry); -+ } ++#undef ATOMIC_XCHG + -+ return entry_point; ++#define ATOMIC_XCHGU(OP1, OP2) \ ++void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ ++ atomic_##OP2(prev, newv, addr); \ ++ zero_extend(prev, prev, 32); \ ++ return; \ +} + ++ATOMIC_XCHGU(xchgwu, xchgw) ++ATOMIC_XCHGU(xchgalwu, xchgalw) + -+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, -+ vmIntrinsics::ID iid, -+ Register receiver_reg, -+ Register member_reg, -+ bool for_compiler_entry) { -+ assert(is_signature_polymorphic(iid), "expected invoke iid"); -+ // temps used in this code are not used in *either* compiled or interpreted calling sequences -+ Register temp1 = x7; -+ Register temp2 = x28; -+ Register temp3 = x29; // x30 is live by this point: it contains the sender SP -+ if (for_compiler_entry) { -+ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); -+ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); -+ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); -+ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); ++#undef ATOMIC_XCHGU ++ ++void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) { ++ assert(ReservedCodeCacheSize < 4*G, "branch out of range"); ++ assert(CodeCache::find_blob(entry.target()) != NULL, ++ "destination of far call not found in code cache"); ++ int32_t offset = 0; ++ if (far_branches()) { ++ // We can use auipc + jalr here because we know that the total size of ++ // the code cache cannot exceed 2Gb. ++ la_patchable(tmp, entry, offset); ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } ++ jalr(x0, tmp, offset); ++ } else { ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } ++ j(entry); + } ++} + -+ assert_different_registers(temp1, temp2, temp3, receiver_reg); -+ assert_different_registers(temp1, temp2, temp3, member_reg); ++void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) { ++ assert(ReservedCodeCacheSize < 4*G, "branch out of range"); ++ assert(CodeCache::find_blob(entry.target()) != NULL, ++ "destination of far call not found in code cache"); ++ int32_t offset = 0; ++ if (far_branches()) { ++ // We can use auipc + jalr here because we know that the total size of ++ // the code cache cannot exceed 2Gb. ++ la_patchable(tmp, entry, offset); ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } ++ jalr(x1, tmp, offset); // link ++ } else { ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } ++ jal(entry); // link ++ } ++} + -+ if (iid == vmIntrinsics::_invokeBasic) { -+ // indirect through MH.form.vmentry.vmtarget -+ jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry); ++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register tmp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ Register super_check_offset) { ++ assert_different_registers(sub_klass, super_klass, tmp_reg); ++ bool must_load_sco = (super_check_offset == noreg); ++ if (must_load_sco) { ++ assert(tmp_reg != noreg, "supply either a temp or a register offset"); + } else { -+ // The method is a member invoker used by direct method handles. -+ if (VerifyMethodHandles) { -+ // make sure the trailing argument really is a MemberName (caller responsibility) -+ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), -+ "MemberName required for invokeVirtual etc."); -+ } ++ assert_different_registers(sub_klass, super_klass, super_check_offset); ++ } + -+ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); -+ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); -+ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())); -+ Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())); ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in batch"); + -+ Register temp1_recv_klass = temp1; -+ if (iid != vmIntrinsics::_linkToStatic) { -+ __ verify_oop(receiver_reg); -+ if (iid == vmIntrinsics::_linkToSpecial) { -+ // Don't actually load the klass; just null-check the receiver. -+ __ null_check(receiver_reg); -+ } else { -+ // load receiver klass itself -+ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); -+ __ load_klass(temp1_recv_klass, receiver_reg); -+ __ verify_klass_ptr(temp1_recv_klass); -+ } -+ BLOCK_COMMENT("check_receiver {"); -+ // The receiver for the MemberName must be in receiver_reg. -+ // Check the receiver against the MemberName.clazz -+ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { -+ // Did not load it above... -+ __ load_klass(temp1_recv_klass, receiver_reg); -+ __ verify_klass_ptr(temp1_recv_klass); -+ } -+ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { -+ Label L_ok; -+ Register temp2_defc = temp2; -+ __ load_heap_oop(temp2_defc, member_clazz, temp3); -+ load_klass_from_Class(_masm, temp2_defc); -+ __ verify_klass_ptr(temp2_defc); -+ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); -+ // If we get here, the type check failed! -+ __ ebreak(); -+ __ bind(L_ok); -+ } -+ BLOCK_COMMENT("} check_receiver"); -+ } -+ if (iid == vmIntrinsics::_linkToSpecial || -+ iid == vmIntrinsics::_linkToStatic) { -+ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass -+ } ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ Address super_check_offset_addr(super_klass, sco_offset); + -+ // Live registers at this point: -+ // member_reg - MemberName that was the trailing argument -+ // temp1_recv_klass - klass of stacked receiver, if needed -+ // x30 - interpreter linkage (if interpreted) -+ // x11 ... x10 - compiler arguments (if compiled) ++ // Hacked jmp, which may only be used just before L_fallthrough. ++#define final_jmp(label) \ ++ if (&(label) == &L_fallthrough) { /*do nothing*/ } \ ++ else j(label) /*omit semi*/ + -+ Label L_incompatible_class_change_error; -+ switch (iid) { -+ case vmIntrinsics::_linkToSpecial: -+ if (VerifyMethodHandles) { -+ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); -+ } -+ __ load_heap_oop(xmethod, member_vmtarget); -+ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg); -+ break; ++ // If the pointers are equal, we are done (e.g., String[] elements). ++ // This self-check enables sharing of secondary supertype arrays among ++ // non-primary types such as array-of-interface. Otherwise, each such ++ // type would need its own customized SSA. ++ // We move this check to the front fo the fast path because many ++ // type checks are in fact trivially successful in this manner, ++ // so we get a nicely predicted branch right at the start of the check. ++ beq(sub_klass, super_klass, *L_success); + -+ case vmIntrinsics::_linkToStatic: -+ if (VerifyMethodHandles) { -+ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); -+ } -+ __ load_heap_oop(xmethod, member_vmtarget); -+ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg); -+ break; ++ // Check the supertype display: ++ if (must_load_sco) { ++ lwu(tmp_reg, super_check_offset_addr); ++ super_check_offset = tmp_reg; ++ } ++ add(t0, sub_klass, super_check_offset); ++ Address super_check_addr(t0); ++ ld(t0, super_check_addr); // load displayed supertype + -+ case vmIntrinsics::_linkToVirtual: { -+ // same as TemplateTable::invokevirtual, -+ // minus the CP setup and profiling: ++ // Ths check has worked decisively for primary supers. ++ // Secondary supers are sought in the super_cache ('super_cache_addr'). ++ // (Secondary supers are interfaces and very deeply nested subtypes.) ++ // This works in the same check above because of a tricky aliasing ++ // between the super_Cache and the primary super dispaly elements. ++ // (The 'super_check_addr' can address either, as the case requires.) ++ // Note that the cache is updated below if it does not help us find ++ // what we need immediately. ++ // So if it was a primary super, we can just fail immediately. ++ // Otherwise, it's the slow path for us (no success at this point). + -+ if (VerifyMethodHandles) { -+ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); -+ } ++ beq(super_klass, t0, *L_success); ++ mv(t1, sc_offset); ++ if (L_failure == &L_fallthrough) { ++ beq(super_check_offset, t1, *L_slow_path); ++ } else { ++ bne(super_check_offset, t1, *L_failure, /* is_far */ true); ++ final_jmp(*L_slow_path); ++ } + -+ // pick out the vtable index from the MemberName, and then we can discard it: -+ Register temp2_index = temp2; -+ __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); ++ bind(L_fallthrough); + -+ if (VerifyMethodHandles) { -+ Label L_index_ok; -+ __ bgez(temp2_index, L_index_ok); -+ __ ebreak(); -+ __ BIND(L_index_ok); -+ } ++#undef final_jmp ++} + -+ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget -+ // at this point. And VerifyMethodHandles has already checked clazz, if needed. ++// Scans count pointer sized words at [addr] for occurence of value, ++// generic ++void MacroAssembler::repne_scan(Register addr, Register value, Register count, ++ Register tmp) { ++ Label Lloop, Lexit; ++ beqz(count, Lexit); ++ bind(Lloop); ++ ld(tmp, addr); ++ beq(value, tmp, Lexit); ++ add(addr, addr, wordSize); ++ sub(count, count, 1); ++ bnez(count, Lloop); ++ bind(Lexit); ++} + -+ // get target Method* & entry point -+ __ lookup_virtual_method(temp1_recv_klass, temp2_index, xmethod); -+ break; -+ } ++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register tmp1_reg, ++ Register tmp2_reg, ++ Label* L_success, ++ Label* L_failure) { ++ assert_different_registers(sub_klass, super_klass, tmp1_reg); ++ if (tmp2_reg != noreg) { ++ assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); ++ } ++#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) + -+ case vmIntrinsics::_linkToInterface: { -+ // same as TemplateTable::invokeinterface -+ // (minus the CP setup and profiling, with different argument motion) -+ if (VerifyMethodHandles) { -+ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); -+ } ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + -+ Register temp3_intf = temp3; -+ __ load_heap_oop(temp3_intf, member_clazz); -+ load_klass_from_Class(_masm, temp3_intf); -+ __ verify_klass_ptr(temp3_intf); ++ assert(label_nulls <= 1, "at most one NULL in the batch"); + -+ Register rindex = xmethod; -+ __ access_load_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg); -+ if (VerifyMethodHandles) { -+ Label L; -+ __ bgez(rindex, L); -+ __ ebreak(); -+ __ bind(L); -+ } ++ // A couple of usefule fields in sub_klass: ++ int ss_offset = in_bytes(Klass::secondary_supers_offset()); ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ Address secondary_supers_addr(sub_klass, ss_offset); ++ Address super_cache_addr( sub_klass, sc_offset); + -+ // given intf, index, and recv klass, dispatch to the implementation method -+ __ lookup_interface_method(temp1_recv_klass, temp3_intf, -+ // note: next two args must be the same: -+ rindex, xmethod, -+ temp2, -+ L_incompatible_class_change_error); -+ break; -+ } ++ BLOCK_COMMENT("check_klass_subtype_slow_path"); + -+ default: -+ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); -+ break; -+ } ++ // Do a linear scan of the secondary super-klass chain. ++ // This code is rarely used, so simplicity is a virtue here. ++ // The repne_scan instruction uses fixed registers, which we must spill. ++ // Don't worry too much about pre-existing connecitons with the input regs. + -+ // live at this point: xmethod, x30 (if interpreted) ++ assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) ++ assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) + -+ // After figuring out which concrete method to call, jump into it. -+ // Note that this works in the interpreter with no data motion. -+ // But the compiled version will require that r2_recv be shifted out. -+ __ verify_method_ptr(xmethod); -+ jump_from_method_handle(_masm, xmethod, temp1, for_compiler_entry); -+ if (iid == vmIntrinsics::_linkToInterface) { -+ __ bind(L_incompatible_class_change_error); -+ __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); ++ RegSet pushed_registers; ++ if (!IS_A_TEMP(x12)) { ++ pushed_registers += x12; ++ } ++ if (!IS_A_TEMP(x15)) { ++ pushed_registers += x15; ++ } ++ ++ if (super_klass != x10 || UseCompressedOops) { ++ if (!IS_A_TEMP(x10)) { ++ pushed_registers += x10; + } + } + -+} ++ push_reg(pushed_registers, sp); + -+#ifndef PRODUCT -+void trace_method_handle_stub(const char* adaptername, -+ oop mh, -+ intptr_t* saved_regs, -+ intptr_t* entry_sp) { } ++ // Get super_klass value into x10 (even if it was in x15 or x12) ++ mv(x10, super_klass); + -+// The stub wraps the arguments in a struct on the stack to avoid -+// dealing with the different calling conventions for passing 6 -+// arguments. -+struct MethodHandleStubArguments { -+ const char* adaptername; -+ oopDesc* mh; -+ intptr_t* saved_regs; -+ intptr_t* entry_sp; -+}; -+void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { } ++#ifndef PRODUCT ++ mv(t1, (address)&SharedRuntime::_partial_subtype_ctr); ++ Address pst_counter_addr(t1); ++ ld(t0, pst_counter_addr); ++ add(t0, t0, 1); ++ sd(t0, pst_counter_addr); ++#endif // PRODUCT + -+void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { } -+#endif //PRODUCT -diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp -new file mode 100644 -index 000000000..8ed69efe8 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp -@@ -0,0 +1,58 @@ -+/* -+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ // We will consult the secondary-super array. ++ ld(x15, secondary_supers_addr); ++ // Load the array length. ++ lwu(x12, Address(x15, Array::length_offset_in_bytes())); ++ // Skip to start of data. ++ add(x15, x15, Array::base_offset_in_bytes()); + -+// Platform-specific definitions for method handles. -+// These definitions are inlined into class MethodHandles. ++ // Set t0 to an obvious invalid value, falling through by default ++ li(t0, -1); ++ // Scan X12 words at [X15] for an occurrence of X10. ++ repne_scan(x15, x10, x12, t0); + -+// Adapters -+enum /* platform_dependent_constants */ { -+ adapter_code_size = 32000 DEBUG_ONLY(+ 120000) -+}; ++ // pop will restore x10, so we should use a temp register to keep its value ++ mv(t1, x10); + -+public: ++ // Unspill the temp registers: ++ pop_reg(pushed_registers, sp); + -+ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); ++ bne(t1, t0, *L_failure); + -+ static void verify_klass(MacroAssembler* _masm, -+ Register obj, SystemDictionary::WKID klass_id, -+ const char* error_message = "wrong klass") NOT_DEBUG_RETURN; ++ // Success. Cache the super we found an proceed in triumph. ++ sd(super_klass, super_cache_addr); + -+ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { -+ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), -+ "reference is a MH"); ++ if (L_success != &L_fallthrough) { ++ j(*L_success); + } + -+ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; ++#undef IS_A_TEMP + -+ // Similar to InterpreterMacroAssembler::jump_from_interpreted. -+ // Takes care of special dispatch from single stepping too. -+ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, -+ bool for_compiler_entry); ++ bind(L_fallthrough); ++} + -+ static void jump_to_lambda_form(MacroAssembler* _masm, -+ Register recv, Register method_temp, -+ Register temp2, -+ bool for_compiler_entry); -diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp -new file mode 100644 -index 000000000..4b1573130 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp -@@ -0,0 +1,404 @@ -+/* -+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "memory/resourceArea.hpp" -+#include "nativeInst_riscv.hpp" -+#include "oops/oop.inline.hpp" -+#include "runtime/handles.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/stubRoutines.hpp" -+#include "utilities/ostream.hpp" -+#ifdef COMPILER1 -+#include "c1/c1_Runtime1.hpp" -+#endif -+ -+Register NativeInstruction::extract_rs1(address instr) { -+ assert_cond(instr != NULL); -+ return as_Register(Assembler::extract(((unsigned*)instr)[0], 19, 15)); ++// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. ++void MacroAssembler::tlab_allocate(Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register tmp1, ++ Register tmp2, ++ Label& slow_case, ++ bool is_far) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); +} + -+Register NativeInstruction::extract_rs2(address instr) { -+ assert_cond(instr != NULL); -+ return as_Register(Assembler::extract(((unsigned*)instr)[0], 24, 20)); ++// Defines obj, preserves var_size_in_bytes ++void MacroAssembler::eden_allocate(Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register tmp, ++ Label& slow_case, ++ bool is_far) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far); +} + -+Register NativeInstruction::extract_rd(address instr) { -+ assert_cond(instr != NULL); -+ return as_Register(Assembler::extract(((unsigned*)instr)[0], 11, 7)); -+} + -+uint32_t NativeInstruction::extract_opcode(address instr) { -+ assert_cond(instr != NULL); -+ return Assembler::extract(((unsigned*)instr)[0], 6, 0); -+} ++// get_thread() can be called anywhere inside generated code so we ++// need to save whatever non-callee save context might get clobbered ++// by the call to Thread::current() or, indeed, the call setup code. ++void MacroAssembler::get_thread(Register thread) { ++ // save all call-clobbered regs except thread ++ RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + ++ RegSet::range(x28, x31) + ra - thread; ++ push_reg(saved_regs, sp); + -+uint32_t NativeInstruction::extract_funct3(address instr) { -+ assert_cond(instr != NULL); -+ return Assembler::extract(((unsigned*)instr)[0], 14, 12); -+} ++ int32_t offset = 0; ++ movptr_with_offset(ra, CAST_FROM_FN_PTR(address, Thread::current), offset); ++ jalr(ra, ra, offset); ++ if (thread != x10) { ++ mv(thread, x10); ++ } + -+bool NativeInstruction::is_pc_relative_at(address instr) { -+ // auipc + jalr -+ // auipc + addi -+ // auipc + load -+ // auipc + fload_load -+ return (is_auipc_at(instr)) && -+ (is_addi_at(instr + instruction_size) || -+ is_jalr_at(instr + instruction_size) || -+ is_load_at(instr + instruction_size) || -+ is_float_load_at(instr + instruction_size)) && -+ check_pc_relative_data_dependency(instr); ++ // restore pushed registers ++ pop_reg(saved_regs, sp); +} + -+// ie:ld(Rd, Label) -+bool NativeInstruction::is_load_pc_relative_at(address instr) { -+ return is_auipc_at(instr) && // auipc -+ is_ld_at(instr + instruction_size) && // ld -+ check_load_pc_relative_data_dependency(instr); ++void MacroAssembler::load_byte_map_base(Register reg) { ++ CardTable::CardValue* byte_map_base = ++ ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); ++ li(reg, (uint64_t)byte_map_base); +} + -+bool NativeInstruction::is_movptr_at(address instr) { -+ return is_lui_at(instr) && // Lui -+ is_addi_at(instr + instruction_size) && // Addi -+ is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11 -+ is_addi_at(instr + instruction_size * 3) && // Addi -+ is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6 -+ (is_addi_at(instr + instruction_size * 5) || -+ is_jalr_at(instr + instruction_size * 5) || -+ is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load -+ check_movptr_data_dependency(instr); -+} ++void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) { ++ relocInfo::relocType rtype = dest.rspec().reloc()->type(); ++ unsigned long low_address = (uintptr_t)CodeCache::low_bound(); ++ unsigned long high_address = (uintptr_t)CodeCache::high_bound(); ++ unsigned long dest_address = (uintptr_t)dest.target(); ++ long offset_low = dest_address - low_address; ++ long offset_high = dest_address - high_address; + -+bool NativeInstruction::is_li32_at(address instr) { -+ return is_lui_at(instr) && // lui -+ is_addiw_at(instr + instruction_size) && // addiw -+ check_li32_data_dependency(instr); -+} ++ assert(is_valid_riscv64_address(dest.target()), "bad address"); ++ assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address"); + -+bool NativeInstruction::is_li64_at(address instr) { -+ return is_lui_at(instr) && // lui -+ is_addi_at(instr + instruction_size) && // addi -+ is_slli_shift_at(instr + instruction_size * 2, 12) && // Slli Rd, Rs, 12 -+ is_addi_at(instr + instruction_size * 3) && // addi -+ is_slli_shift_at(instr + instruction_size * 4, 12) && // Slli Rd, Rs, 12 -+ is_addi_at(instr + instruction_size * 5) && // addi -+ is_slli_shift_at(instr + instruction_size * 6, 8) && // Slli Rd, Rs, 8 -+ is_addi_at(instr + instruction_size * 7) && // addi -+ check_li64_data_dependency(instr); ++ InstructionMark im(this); ++ code_section()->relocate(inst_mark(), dest.rspec()); ++ // RISC-V doesn't compute a page-aligned address, in order to partially ++ // compensate for the use of *signed* offsets in its base+disp12 ++ // addressing mode (RISC-V's PC-relative reach remains asymmetric ++ // [-(2G + 2K), 2G - 2k). ++ if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) { ++ int64_t distance = dest.target() - pc(); ++ auipc(reg1, (int32_t)distance + 0x800); ++ offset = ((int32_t)distance << 20) >> 20; ++ } else { ++ movptr_with_offset(reg1, dest.target(), offset); ++ } +} + -+void NativeCall::verify() { -+ assert(NativeCall::is_call_at((address)this), "unexpected code at call site"); ++void MacroAssembler::build_frame(int framesize) { ++ assert(framesize >= 2, "framesize must include space for FP/RA"); ++ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); ++ sub(sp, sp, framesize); ++ sd(fp, Address(sp, framesize - 2 * wordSize)); ++ sd(ra, Address(sp, framesize - wordSize)); ++ if (PreserveFramePointer) { add(fp, sp, framesize); } ++ verify_cross_modify_fence_not_required(); +} + -+address NativeCall::destination() const { -+ address addr = (address)this; -+ assert(NativeInstruction::is_jal_at(instruction_address()), "inst must be jal."); -+ address destination = MacroAssembler::target_addr_for_insn(instruction_address()); -+ -+ // Do we use a trampoline stub for this call? -+ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. -+ assert(cb && cb->is_nmethod(), "sanity"); -+ nmethod *nm = (nmethod *)cb; -+ if (nm != NULL && nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) { -+ // Yes we do, so get the destination from the trampoline stub. -+ const address trampoline_stub_addr = destination; -+ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); -+ } -+ -+ return destination; ++void MacroAssembler::remove_frame(int framesize) { ++ assert(framesize >= 2, "framesize must include space for FP/RA"); ++ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); ++ ld(fp, Address(sp, framesize - 2 * wordSize)); ++ ld(ra, Address(sp, framesize - wordSize)); ++ add(sp, sp, framesize); +} + -+// Similar to replace_mt_safe, but just changes the destination. The -+// important thing is that free-running threads are able to execute this -+// call instruction at all times. -+// -+// Used in the runtime linkage of calls; see class CompiledIC. -+// -+// Add parameter assert_lock to switch off assertion -+// during code generation, where no patching lock is needed. -+void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { -+ assert(!assert_lock || -+ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), -+ "concurrent code patching"); ++void MacroAssembler::reserved_stack_check() { ++ // testing if reserved zone needs to be enabled ++ Label no_reserved_zone_enabling; + -+ ResourceMark rm; -+ address addr_call = addr_at(0); -+ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); ++ ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); ++ bltu(sp, t0, no_reserved_zone_enabling); + -+ // Patch the constant in the call's trampoline stub. -+ address trampoline_stub_addr = get_trampoline(); -+ if (trampoline_stub_addr != NULL) { -+ assert (!is_NativeCallTrampolineStub_at(dest), "chained trampolines"); -+ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); -+ } ++ enter(); // RA and FP are live. ++ mv(c_rarg0, xthread); ++ int32_t offset = 0; ++ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset); ++ jalr(x1, t0, offset); ++ leave(); + -+ // Patch the call. -+ if (Assembler::reachable_from_branch_at(addr_call, dest)) { -+ set_destination(dest); -+ } else { -+ assert (trampoline_stub_addr != NULL, "we need a trampoline"); -+ set_destination(trampoline_stub_addr); -+ } ++ // We have already removed our own frame. ++ // throw_delayed_StackOverflowError will think that it's been ++ // called by our caller. ++ offset = 0; ++ la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset); ++ jalr(x0, t0, offset); ++ should_not_reach_here(); + -+ ICache::invalidate_range(addr_call, instruction_size); ++ bind(no_reserved_zone_enabling); +} + -+address NativeCall::get_trampoline() { -+ address call_addr = addr_at(0); -+ -+ CodeBlob *code = CodeCache::find_blob(call_addr); -+ assert(code != NULL, "Could not find the containing code blob"); -+ -+ address jal_destination = MacroAssembler::pd_call_destination(call_addr); -+ if (code != NULL && code->contains(jal_destination) && is_NativeCallTrampolineStub_at(jal_destination)) { -+ return jal_destination; -+ } ++// Move the address of the polling page into dest. ++void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { ++ ld(dest, Address(xthread, JavaThread::polling_page_offset())); ++} + -+ if (code != NULL && code->is_nmethod()) { -+ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); ++// Read the polling page. The address of the polling page must ++// already be in r. ++address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { ++ address mark; ++ { ++ InstructionMark im(this); ++ code_section()->relocate(inst_mark(), rtype); ++ lwu(zr, Address(r, offset)); ++ mark = inst_mark(); + } -+ -+ return NULL; ++ verify_cross_modify_fence_not_required(); ++ return mark; +} + -+// Inserts a native call instruction at a given pc -+void NativeCall::insert(address code_pos, address entry) { Unimplemented(); } -+ -+//------------------------------------------------------------------- -+ -+void NativeMovConstReg::verify() { -+ if (!(nativeInstruction_at(instruction_address())->is_movptr() || -+ is_auipc_at(instruction_address()))) { -+ fatal("should be MOVPTR or AUIPC"); ++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { ++#ifdef ASSERT ++ { ++ ThreadInVMfromUnknown tiv; ++ assert (UseCompressedOops, "should only be used for compressed oops"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); + } ++#endif ++ int oop_index = oop_recorder()->find_index(obj); ++ InstructionMark im(this); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ code_section()->relocate(inst_mark(), rspec); ++ li32(dst, 0xDEADBEEF); ++ zero_extend(dst, dst, 32); +} + -+intptr_t NativeMovConstReg::data() const { -+ address addr = MacroAssembler::target_addr_for_insn(instruction_address()); -+ if (maybe_cpool_ref(instruction_address())) { -+ return *(intptr_t*)addr; -+ } else { -+ return (intptr_t)addr; -+ } ++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { ++ assert (UseCompressedClassPointers, "should only be used for compressed headers"); ++ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ int index = oop_recorder()->find_index(k); ++ assert(!Universe::heap()->is_in(k), "should not be an oop"); ++ ++ InstructionMark im(this); ++ RelocationHolder rspec = metadata_Relocation::spec(index); ++ code_section()->relocate(inst_mark(), rspec); ++ narrowKlass nk = CompressedKlassPointers::encode(k); ++ li32(dst, nk); ++ zero_extend(dst, dst, 32); +} + -+void NativeMovConstReg::set_data(intptr_t x) { -+ if (maybe_cpool_ref(instruction_address())) { -+ address addr = MacroAssembler::target_addr_for_insn(instruction_address()); -+ *(intptr_t*)addr = x; -+ } else { -+ // Store x into the instruction stream. -+ MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x); -+ ICache::invalidate_range(instruction_address(), movptr_instruction_size); -+ } ++// Maybe emit a call via a trampoline. If the code cache is small ++// trampolines won't be emitted. ++address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) { ++ assert(JavaThread::current()->is_Compiler_thread(), "just checking"); ++ assert(entry.rspec().type() == relocInfo::runtime_call_type || ++ entry.rspec().type() == relocInfo::opt_virtual_call_type || ++ entry.rspec().type() == relocInfo::static_call_type || ++ entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); + -+ // Find and replace the oop/metadata corresponding to this -+ // instruction in oops section. -+ CodeBlob* cb = CodeCache::find_blob(instruction_address()); -+ if(cb != NULL) { -+ nmethod* nm = cb->as_nmethod_or_null(); -+ if (nm != NULL) { -+ RelocIterator iter(nm, instruction_address(), next_instruction_address()); -+ while (iter.next()) { -+ if (iter.type() == relocInfo::oop_type) { -+ oop* oop_addr = iter.oop_reloc()->oop_addr(); -+ *oop_addr = cast_to_oop(x); -+ break; -+ } else if (iter.type() == relocInfo::metadata_type) { -+ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); -+ *metadata_addr = (Metadata*)x; -+ break; -+ } ++ // We need a trampoline if branches are far. ++ if (far_branches()) { ++ bool in_scratch_emit_size = false; ++#ifdef COMPILER2 ++ // We don't want to emit a trampoline if C2 is generating dummy ++ // code during its branch shortening phase. ++ CompileTask* task = ciEnv::current()->task(); ++ in_scratch_emit_size = ++ (task != NULL && is_c2_compile(task->comp_level()) && ++ Compile::current()->output()->in_scratch_emit_size()); ++#endif ++ if (!in_scratch_emit_size) { ++ address stub = emit_trampoline_stub(offset(), entry.target()); ++ if (stub == NULL) { ++ postcond(pc() == badAddress); ++ return NULL; // CodeCache is full + } + } ++ } ++ ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } ++ relocate(entry.rspec()); ++ if (!far_branches()) { ++ jal(entry.target()); + } else { -+ ShouldNotReachHere(); ++ jal(pc()); + } ++ // just need to return a non-null address ++ postcond(pc() != badAddress); ++ return pc(); +} + -+void NativeMovConstReg::print() { -+ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, -+ p2i(instruction_address()), data()); ++address MacroAssembler::ic_call(address entry, jint method_index) { ++ RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); ++ movptr(t1, (address)Universe::non_oop_word()); ++ assert_cond(entry != NULL); ++ return trampoline_call(Address(entry, rh)); +} + -+//------------------------------------------------------------------- ++// Emit a trampoline stub for a call to a target which is too far away. ++// ++// code sequences: ++// ++// call-site: ++// branch-and-link to or ++// ++// Related trampoline stub for this call site in the stub section: ++// load the call target from the constant pool ++// branch (RA still points to the call site above) + -+int NativeMovRegMem::offset() const { -+ Unimplemented(); -+ return 0; -+} ++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, ++ address dest) { ++ address stub = start_a_stub(NativeInstruction::instruction_size ++ + NativeCallTrampolineStub::instruction_size); ++ if (stub == NULL) { ++ return NULL; // CodeBuffer::expand failed ++ } + -+void NativeMovRegMem::set_offset(int x) { Unimplemented(); } ++ // Create a trampoline stub relocation which relates this trampoline stub ++ // with the call instruction at insts_call_instruction_offset in the ++ // instructions code-section. + -+void NativeMovRegMem::verify() { -+ Unimplemented(); -+} ++ // make sure 4 byte aligned here, so that the destination address would be ++ // 8 byte aligned after 3 intructions ++ // when we reach here we may get a 2-byte alignment so need to align it ++ align(wordSize, NativeCallTrampolineStub::data_offset); + -+//-------------------------------------------------------------------------------- ++ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + ++ insts_call_instruction_offset)); ++ const int stub_start_offset = offset(); + -+void NativeJump::verify() { } ++ // Now, create the trampoline stub's code: ++ // - load the call ++ // - call ++ Label target; ++ ld(t0, target); // auipc + ld ++ jr(t0); // jalr ++ bind(target); ++ assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, ++ "should be"); ++ assert(offset() % wordSize == 0, "bad alignment"); ++ emit_int64((intptr_t)dest); + ++ const address stub_start_addr = addr_at(stub_start_offset); + -+void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) { ++ assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); ++ ++ end_a_stub(); ++ return stub_start_addr; +} + ++Address MacroAssembler::add_memory_helper(const Address dst) { ++ switch (dst.getMode()) { ++ case Address::base_plus_offset: ++ // This is the expected mode, although we allow all the other ++ // forms below. ++ return form_address(t1, dst.base(), dst.offset()); ++ default: ++ la(t1, dst); ++ return Address(t1); ++ } ++} + -+address NativeJump::jump_destination() const { -+ address dest = MacroAssembler::target_addr_for_insn(instruction_address()); ++void MacroAssembler::add_memory_int64(const Address dst, int64_t imm) { ++ Address adr = add_memory_helper(dst); ++ assert_different_registers(adr.base(), t0); ++ ld(t0, adr); ++ addi(t0, t0, imm); ++ sd(t0, adr); ++} + -+ // We use jump to self as the unresolved address which the inline -+ // cache code (and relocs) know about ++void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) { ++ Address adr = add_memory_helper(dst); ++ assert_different_registers(adr.base(), t0); ++ lwu(t0, adr); ++ addiw(t0, t0, imm); ++ sw(t0, adr); ++} + -+ // return -1 if jump to self -+ dest = (dest == (address) this) ? (address) -1 : dest; -+ return dest; -+}; ++void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { ++ assert_different_registers(src1, t0); ++ int32_t offset; ++ la_patchable(t0, src2, offset); ++ ld(t0, Address(t0, offset)); ++ beq(src1, t0, equal); ++} + -+//------------------------------------------------------------------- ++void MacroAssembler::load_method_holder_cld(Register result, Register method) { ++ load_method_holder(result, method); ++ ld(result, Address(result, InstanceKlass::class_loader_data_offset())); ++} + -+address NativeGeneralJump::jump_destination() const { -+ NativeMovConstReg* move = nativeMovConstReg_at(instruction_address()); -+ address dest = (address) move->data(); ++void MacroAssembler::load_method_holder(Register holder, Register method) { ++ ld(holder, Address(method, Method::const_offset())); // ConstMethod* ++ ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* ++ ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* ++} + -+ // We use jump to self as the unresolved address which the inline -+ // cache code (and relocs) know about ++// string indexof ++// compute index by trailing zeros ++void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, ++ Register match_mask, Register result, ++ Register ch2, Register tmp, ++ bool haystack_isL) ++{ ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ srl(match_mask, match_mask, trailing_zeros); ++ srli(match_mask, match_mask, 1); ++ srli(tmp, trailing_zeros, LogBitsPerByte); ++ if (!haystack_isL) andi(tmp, tmp, 0xE); ++ add(haystack, haystack, tmp); ++ ld(ch2, Address(haystack)); ++ if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); ++ add(result, result, tmp); ++} + -+ // return -1 if jump to self -+ dest = (dest == (address) this) ? (address) -1 : dest; -+ return dest; ++// string indexof ++// Find pattern element in src, compute match mask, ++// only the first occurrence of 0x80/0x8000 at low bits is the valid match index ++// match mask patterns and corresponding indices would be like: ++// - 0x8080808080808080 (Latin1) ++// - 7 6 5 4 3 2 1 0 (match index) ++// - 0x8000800080008000 (UTF16) ++// - 3 2 1 0 (match index) ++void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, ++ Register mask1, Register mask2) ++{ ++ xorr(src, pattern, src); ++ sub(match_mask, src, mask1); ++ orr(src, src, mask2); ++ notr(src, src); ++ andr(match_mask, match_mask, src); +} + -+//------------------------------------------------------------------- ++#ifdef COMPILER2 ++// Code for BigInteger::mulAdd instrinsic ++// out = x10 ++// in = x11 ++// offset = x12 (already out.length-offset) ++// len = x13 ++// k = x14 ++// tmp = x28 ++// ++// pseudo code from java implementation: ++// long kLong = k & LONG_MASK; ++// carry = 0; ++// offset = out.length-offset - 1; ++// for (int j = len - 1; j >= 0; j--) { ++// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; ++// out[offset--] = (int)product; ++// carry = product >>> 32; ++// } ++// return (int)carry; ++void MacroAssembler::mul_add(Register out, Register in, Register offset, ++ Register len, Register k, Register tmp) { ++ Label L_tail_loop, L_unroll, L_end; ++ mv(tmp, out); ++ mv(out, zr); ++ blez(len, L_end); ++ zero_extend(k, k, 32); ++ slliw(t0, offset, LogBytesPerInt); ++ add(offset, tmp, t0); ++ slliw(t0, len, LogBytesPerInt); ++ add(in, in, t0); ++ ++ const int unroll = 8; ++ li(tmp, unroll); ++ blt(len, tmp, L_tail_loop); ++ bind(L_unroll); ++ for (int i = 0; i < unroll; i++) { ++ sub(in, in, BytesPerInt); ++ lwu(t0, Address(in, 0)); ++ mul(t1, t0, k); ++ add(t0, t1, out); ++ sub(offset, offset, BytesPerInt); ++ lwu(t1, Address(offset, 0)); ++ add(t0, t0, t1); ++ sw(t0, Address(offset, 0)); ++ srli(out, t0, 32); ++ } ++ subw(len, len, tmp); ++ bge(len, tmp, L_unroll); ++ ++ bind(L_tail_loop); ++ blez(len, L_end); ++ sub(in, in, BytesPerInt); ++ lwu(t0, Address(in, 0)); ++ mul(t1, t0, k); ++ add(t0, t1, out); ++ sub(offset, offset, BytesPerInt); ++ lwu(t1, Address(offset, 0)); ++ add(t0, t0, t1); ++ sw(t0, Address(offset, 0)); ++ srli(out, t0, 32); ++ subw(len, len, 1); ++ j(L_tail_loop); + -+bool NativeInstruction::is_safepoint_poll() { -+ return is_lwu_to_zr(address(this)); ++ bind(L_end); +} + -+bool NativeInstruction::is_lwu_to_zr(address instr) { -+ return (extract_opcode(instr) == 0b0000011 && -+ extract_funct3(instr) == 0b110 && -+ extract_rd(instr) == zr); // zr ++// add two unsigned input and output carry ++void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) ++{ ++ assert_different_registers(dst, carry); ++ assert_different_registers(dst, src2); ++ add(dst, src1, src2); ++ sltu(carry, dst, src2); +} + -+// A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction. -+bool NativeInstruction::is_sigill_zombie_not_entrant() { -+ // jvmci -+ return uint_at(0) == 0xffffffff; ++// add two input with carry ++void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) ++{ ++ assert_different_registers(dst, carry); ++ add(dst, src1, src2); ++ add(dst, dst, carry); +} + -+void NativeIllegalInstruction::insert(address code_pos) { -+ assert_cond(code_pos != NULL); -+ *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction ++// add two unsigned input with carry and output carry ++void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) ++{ ++ assert_different_registers(dst, src2); ++ adc(dst, src1, src2, carry); ++ sltu(carry, dst, src2); +} + -+//------------------------------------------------------------------- -+ -+// MT-safe inserting of a jump over a jump or a nop (used by -+// nmethod::make_not_entrant_or_zombie) -+ -+void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { -+ -+ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); ++void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, ++ Register src1, Register src2, Register carry) ++{ ++ cad(dest_lo, dest_lo, src1, carry); ++ add(dest_hi, dest_hi, carry); ++ cad(dest_lo, dest_lo, src2, carry); ++ add(final_dest_hi, dest_hi, carry); ++} + -+ assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() || -+ nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(), -+ "riscv cannot replace non-jump with jump"); -+ -+ // Patch this nmethod atomically. -+ if (Assembler::reachable_from_branch_at(verified_entry, dest)) { -+ ptrdiff_t offset = dest - verified_entry; -+ guarantee(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction."); // 1M -+ -+ uint32_t insn = 0; -+ address pInsn = (address)&insn; -+ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); -+ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); -+ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); -+ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); -+ Assembler::patch(pInsn, 11, 7, 0); // zero, no link jump -+ Assembler::patch(pInsn, 6, 0, 0b1101111); // j, (jal x0 offset) -+ *(unsigned int*)verified_entry = insn; -+ } else { -+ // We use an illegal instruction for marking a method as -+ // not_entrant or zombie. -+ NativeIllegalInstruction::insert(verified_entry); -+ } ++/** ++ * Multiply 32 bit by 32 bit first loop. ++ */ ++void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, ++ Register y, Register y_idx, Register z, ++ Register carry, Register product, ++ Register idx, Register kdx) ++{ ++ // jlong carry, x[], y[], z[]; ++ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { ++ // long product = y[idx] * x[xstart] + carry; ++ // z[kdx] = (int)product; ++ // carry = product >>> 32; ++ // } ++ // z[xstart] = (int)carry; + -+ ICache::invalidate_range(verified_entry, instruction_size); -+} ++ Label L_first_loop, L_first_loop_exit; ++ blez(idx, L_first_loop_exit); + -+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { -+ CodeBuffer cb(code_pos, instruction_size); -+ MacroAssembler a(&cb); ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ lwu(x_xstart, Address(t0, 0)); + -+ int32_t offset = 0; -+ a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli -+ a.jalr(x0, t0, offset); // jalr ++ bind(L_first_loop); ++ subw(idx, idx, 1); ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ lwu(y_idx, Address(t0, 0)); ++ mul(product, x_xstart, y_idx); ++ add(product, product, carry); ++ srli(carry, product, 32); ++ subw(kdx, kdx, 1); ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sw(product, Address(t0, 0)); ++ bgtz(idx, L_first_loop); + -+ ICache::invalidate_range(code_pos, instruction_size); ++ bind(L_first_loop_exit); +} + -+// MT-safe patching of a long jump instruction. -+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { -+ ShouldNotCallThis(); -+} ++/** ++ * Multiply 64 bit by 64 bit first loop. ++ */ ++void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, ++ Register y, Register y_idx, Register z, ++ Register carry, Register product, ++ Register idx, Register kdx) ++{ ++ // ++ // jlong carry, x[], y[], z[]; ++ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { ++ // huge_128 product = y[idx] * x[xstart] + carry; ++ // z[kdx] = (jlong)product; ++ // carry = (jlong)(product >>> 64); ++ // } ++ // z[xstart] = carry; ++ // + ++ Label L_first_loop, L_first_loop_exit; ++ Label L_one_x, L_one_y, L_multiply; + -+address NativeCallTrampolineStub::destination(nmethod *nm) const { -+ return ptr_at(data_offset); -+} ++ subw(xstart, xstart, 1); ++ bltz(xstart, L_one_x); + -+void NativeCallTrampolineStub::set_destination(address new_destination) { -+ set_ptr_at(data_offset, new_destination); -+ OrderAccess::release(); -+} ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ ld(x_xstart, Address(t0, 0)); ++ ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian + -+uint32_t NativeMembar::get_kind() { -+ uint32_t insn = uint_at(0); ++ bind(L_first_loop); ++ subw(idx, idx, 1); ++ bltz(idx, L_first_loop_exit); ++ subw(idx, idx, 1); ++ bltz(idx, L_one_y); + -+ uint32_t predecessor = Assembler::extract(insn, 27, 24); -+ uint32_t successor = Assembler::extract(insn, 23, 20); ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ ld(y_idx, Address(t0, 0)); ++ ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian ++ bind(L_multiply); + -+ return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor); -+} ++ mulhu(t0, x_xstart, y_idx); ++ mul(product, x_xstart, y_idx); ++ cad(product, product, carry, t1); ++ adc(carry, t0, zr, t1); + -+void NativeMembar::set_kind(uint32_t order_kind) { -+ uint32_t predecessor = 0; -+ uint32_t successor = 0; ++ subw(kdx, kdx, 2); ++ ror_imm(product, product, 32); // back to big-endian ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sd(product, Address(t0, 0)); + -+ MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor); ++ j(L_first_loop); + -+ uint32_t insn = uint_at(0); -+ address pInsn = (address) &insn; -+ Assembler::patch(pInsn, 27, 24, predecessor); -+ Assembler::patch(pInsn, 23, 20, successor); ++ bind(L_one_y); ++ lwu(y_idx, Address(y, 0)); ++ j(L_multiply); + -+ address membar = addr_at(0); -+ *(unsigned int*) membar = insn; ++ bind(L_one_x); ++ lwu(x_xstart, Address(x, 0)); ++ j(L_first_loop); ++ ++ bind(L_first_loop_exit); +} -diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp -new file mode 100644 -index 000000000..e8a4e0a46 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp -@@ -0,0 +1,561 @@ -+/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. ++ ++/** ++ * Multiply 128 bit by 128 bit. Unrolled inner loop. + * + */ ++void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, ++ Register carry, Register carry2, ++ Register idx, Register jdx, ++ Register yz_idx1, Register yz_idx2, ++ Register tmp, Register tmp3, Register tmp4, ++ Register tmp6, Register product_hi) ++{ ++ // jlong carry, x[], y[], z[]; ++ // int kdx = xstart+1; ++ // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop ++ // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; ++ // jlong carry2 = (jlong)(tmp3 >>> 64); ++ // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; ++ // carry = (jlong)(tmp4 >>> 64); ++ // z[kdx+idx+1] = (jlong)tmp3; ++ // z[kdx+idx] = (jlong)tmp4; ++ // } ++ // idx += 2; ++ // if (idx > 0) { ++ // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; ++ // z[kdx+idx] = (jlong)yz_idx1; ++ // carry = (jlong)(yz_idx1 >>> 64); ++ // } ++ // + -+#ifndef CPU_RISCV_NATIVEINST_RISCV_HPP -+#define CPU_RISCV_NATIVEINST_RISCV_HPP -+ -+#include "asm/assembler.hpp" -+#include "runtime/icache.hpp" -+#include "runtime/os.hpp" -+ -+// We have interfaces for the following instructions: -+// - NativeInstruction -+// - - NativeCall -+// - - NativeMovConstReg -+// - - NativeMovRegMem -+// - - NativeJump -+// - - NativeGeneralJump -+// - - NativeIllegalInstruction -+// - - NativeCallTrampolineStub -+// - - NativeMembar ++ Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; + -+// The base class for different kinds of native instruction abstractions. -+// Provides the primitive operations to manipulate code relative to this. ++ srliw(jdx, idx, 2); + -+class NativeInstruction { -+ friend class Relocation; -+ friend bool is_NativeCallTrampolineStub_at(address); -+ public: -+ enum { -+ instruction_size = 4 -+ }; ++ bind(L_third_loop); + -+ juint encoding() const { -+ return uint_at(0); -+ } ++ subw(jdx, jdx, 1); ++ bltz(jdx, L_third_loop_exit); ++ subw(idx, idx, 4); + -+ bool is_jal() const { return is_jal_at(addr_at(0)); } -+ bool is_movptr() const { return is_movptr_at(addr_at(0)); } -+ bool is_call() const { return is_call_at(addr_at(0)); } -+ bool is_jump() const { return is_jump_at(addr_at(0)); } ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ ld(yz_idx2, Address(t0, 0)); ++ ld(yz_idx1, Address(t0, wordSize)); + -+ static bool is_jal_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; } -+ static bool is_jalr_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; } -+ static bool is_branch_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; } -+ static bool is_ld_at(address instr) { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; } -+ static bool is_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; } -+ static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; } -+ static bool is_auipc_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; } -+ static bool is_jump_at(address instr) { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); } -+ static bool is_addi_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; } -+ static bool is_addiw_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; } -+ static bool is_lui_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; } -+ static bool is_slli_shift_at(address instr, uint32_t shift) { -+ assert_cond(instr != NULL); -+ return (extract_opcode(instr) == 0b0010011 && // opcode field -+ extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation -+ Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift); // shamt field -+ } ++ shadd(tmp6, idx, z, t0, LogBytesPerInt); + -+ static Register extract_rs1(address instr); -+ static Register extract_rs2(address instr); -+ static Register extract_rd(address instr); -+ static uint32_t extract_opcode(address instr); -+ static uint32_t extract_funct3(address instr); ++ ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian ++ ror_imm(yz_idx2, yz_idx2, 32); + -+ // the instruction sequence of movptr is as below: -+ // lui -+ // addi -+ // slli -+ // addi -+ // slli -+ // addi/jalr/load -+ static bool check_movptr_data_dependency(address instr) { -+ address lui = instr; -+ address addi1 = lui + instruction_size; -+ address slli1 = addi1 + instruction_size; -+ address addi2 = slli1 + instruction_size; -+ address slli2 = addi2 + instruction_size; -+ address last_instr = slli2 + instruction_size; -+ return extract_rs1(addi1) == extract_rd(lui) && -+ extract_rs1(addi1) == extract_rd(addi1) && -+ extract_rs1(slli1) == extract_rd(addi1) && -+ extract_rs1(slli1) == extract_rd(slli1) && -+ extract_rs1(addi2) == extract_rd(slli1) && -+ extract_rs1(addi2) == extract_rd(addi2) && -+ extract_rs1(slli2) == extract_rd(addi2) && -+ extract_rs1(slli2) == extract_rd(slli2) && -+ extract_rs1(last_instr) == extract_rd(slli2); -+ } ++ ld(t1, Address(tmp6, 0)); ++ ld(t0, Address(tmp6, wordSize)); + -+ // the instruction sequence of li64 is as below: -+ // lui -+ // addi -+ // slli -+ // addi -+ // slli -+ // addi -+ // slli -+ // addi -+ static bool check_li64_data_dependency(address instr) { -+ address lui = instr; -+ address addi1 = lui + instruction_size; -+ address slli1 = addi1 + instruction_size; -+ address addi2 = slli1 + instruction_size; -+ address slli2 = addi2 + instruction_size; -+ address addi3 = slli2 + instruction_size; -+ address slli3 = addi3 + instruction_size; -+ address addi4 = slli3 + instruction_size; -+ return extract_rs1(addi1) == extract_rd(lui) && -+ extract_rs1(addi1) == extract_rd(addi1) && -+ extract_rs1(slli1) == extract_rd(addi1) && -+ extract_rs1(slli1) == extract_rd(slli1) && -+ extract_rs1(addi2) == extract_rd(slli1) && -+ extract_rs1(addi2) == extract_rd(addi2) && -+ extract_rs1(slli2) == extract_rd(addi2) && -+ extract_rs1(slli2) == extract_rd(slli2) && -+ extract_rs1(addi3) == extract_rd(slli2) && -+ extract_rs1(addi3) == extract_rd(addi3) && -+ extract_rs1(slli3) == extract_rd(addi3) && -+ extract_rs1(slli3) == extract_rd(slli3) && -+ extract_rs1(addi4) == extract_rd(slli3) && -+ extract_rs1(addi4) == extract_rd(addi4); -+ } ++ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 ++ mulhu(tmp4, product_hi, yz_idx1); + -+ // the instruction sequence of li32 is as below: -+ // lui -+ // addiw -+ static bool check_li32_data_dependency(address instr) { -+ address lui = instr; -+ address addiw = lui + instruction_size; ++ ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian ++ ror_imm(t1, t1, 32, tmp); + -+ return extract_rs1(addiw) == extract_rd(lui) && -+ extract_rs1(addiw) == extract_rd(addiw); -+ } ++ mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp ++ mulhu(carry2, product_hi, yz_idx2); + -+ // the instruction sequence of pc-relative is as below: -+ // auipc -+ // jalr/addi/load/float_load -+ static bool check_pc_relative_data_dependency(address instr) { -+ address auipc = instr; -+ address last_instr = auipc + instruction_size; ++ cad(tmp3, tmp3, carry, carry); ++ adc(tmp4, tmp4, zr, carry); ++ cad(tmp3, tmp3, t0, t0); ++ cadc(tmp4, tmp4, tmp, t0); ++ adc(carry, carry2, zr, t0); ++ cad(tmp4, tmp4, t1, carry2); ++ adc(carry, carry, zr, carry2); + -+ return extract_rs1(last_instr) == extract_rd(auipc); -+ } ++ ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian ++ ror_imm(tmp4, tmp4, 32); ++ sd(tmp4, Address(tmp6, 0)); ++ sd(tmp3, Address(tmp6, wordSize)); + -+ // the instruction sequence of load_label is as below: -+ // auipc -+ // load -+ static bool check_load_pc_relative_data_dependency(address instr) { -+ address auipc = instr; -+ address last_instr = auipc + instruction_size; ++ j(L_third_loop); + -+ return extract_rs1(last_instr) == extract_rd(auipc); -+ } ++ bind(L_third_loop_exit); + -+ static bool is_movptr_at(address instr); -+ static bool is_li32_at(address instr); -+ static bool is_li64_at(address instr); -+ static bool is_pc_relative_at(address branch); -+ static bool is_load_pc_relative_at(address branch); ++ andi(idx, idx, 0x3); ++ beqz(idx, L_post_third_loop_done); + -+ static bool is_call_at(address instr) { -+ if (is_jal_at(instr) || is_jalr_at(instr)) { -+ return true; -+ } -+ return false; -+ } -+ static bool is_lwu_to_zr(address instr); ++ Label L_check_1; ++ subw(idx, idx, 2); ++ bltz(idx, L_check_1); + -+ inline bool is_nop(); -+ inline bool is_jump_or_nop(); -+ bool is_safepoint_poll(); -+ bool is_sigill_zombie_not_entrant(); ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ ld(yz_idx1, Address(t0, 0)); ++ ror_imm(yz_idx1, yz_idx1, 32); + -+ protected: -+ address addr_at(int offset) const { return address(this) + offset; } ++ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 ++ mulhu(tmp4, product_hi, yz_idx1); + -+ jint int_at(int offset) const { return *(jint*) addr_at(offset); } -+ juint uint_at(int offset) const { return *(juint*) addr_at(offset); } ++ shadd(t0, idx, z, t0, LogBytesPerInt); ++ ld(yz_idx2, Address(t0, 0)); ++ ror_imm(yz_idx2, yz_idx2, 32, tmp); + -+ address ptr_at(int offset) const { return *(address*) addr_at(offset); } ++ add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); + -+ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } ++ ror_imm(tmp3, tmp3, 32, tmp); ++ sd(tmp3, Address(t0, 0)); + ++ bind(L_check_1); + -+ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; } -+ void set_uint_at(int offset, jint i) { *(juint*)addr_at(offset) = i; } -+ void set_ptr_at (int offset, address ptr) { *(address*) addr_at(offset) = ptr; } -+ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; } ++ andi(idx, idx, 0x1); ++ subw(idx, idx, 1); ++ bltz(idx, L_post_third_loop_done); ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ lwu(tmp4, Address(t0, 0)); ++ mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 ++ mulhu(carry2, tmp4, product_hi); + -+ public: ++ shadd(t0, idx, z, t0, LogBytesPerInt); ++ lwu(tmp4, Address(t0, 0)); + -+ inline friend NativeInstruction* nativeInstruction_at(address addr); ++ add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); + -+ static bool maybe_cpool_ref(address instr) { -+ return is_auipc_at(instr); -+ } ++ shadd(t0, idx, z, t0, LogBytesPerInt); ++ sw(tmp3, Address(t0, 0)); + -+ bool is_membar() { -+ return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0; -+ } -+}; ++ slli(t0, carry2, 32); ++ srli(carry, tmp3, 32); ++ orr(carry, carry, t0); + -+inline NativeInstruction* nativeInstruction_at(address addr) { -+ return (NativeInstruction*)addr; ++ bind(L_post_third_loop_done); +} + -+// The natural type of an RISCV instruction is uint32_t -+inline NativeInstruction* nativeInstruction_at(uint32_t *addr) { -+ return (NativeInstruction*)addr; -+} ++/** ++ * Code for BigInteger::multiplyToLen() intrinsic. ++ * ++ * x10: x ++ * x11: xlen ++ * x12: y ++ * x13: ylen ++ * x14: z ++ * x15: zlen ++ * x16: tmp1 ++ * x17: tmp2 ++ * x7: tmp3 ++ * x28: tmp4 ++ * x29: tmp5 ++ * x30: tmp6 ++ * x31: tmp7 ++ */ ++void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, ++ Register z, Register zlen, ++ Register tmp1, Register tmp2, Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, Register product_hi) ++{ ++ assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); + -+inline NativeCall* nativeCall_at(address addr); -+// The NativeCall is an abstraction for accessing/manipulating native -+// call instructions (used to manipulate inline caches, primitive & -+// DSO calls, etc.). ++ const Register idx = tmp1; ++ const Register kdx = tmp2; ++ const Register xstart = tmp3; + -+class NativeCall: public NativeInstruction { -+ public: -+ enum RISCV_specific_constants { -+ instruction_size = 4, -+ instruction_offset = 0, -+ displacement_offset = 0, -+ return_address_offset = 4 -+ }; ++ const Register y_idx = tmp4; ++ const Register carry = tmp5; ++ const Register product = xlen; ++ const Register x_xstart = zlen; // reuse register + -+ address instruction_address() const { return addr_at(instruction_offset); } -+ address next_instruction_address() const { return addr_at(return_address_offset); } -+ address return_address() const { return addr_at(return_address_offset); } -+ address destination() const; ++ mv(idx, ylen); // idx = ylen; ++ mv(kdx, zlen); // kdx = xlen+ylen; ++ mv(carry, zr); // carry = 0; + -+ void set_destination(address dest) { -+ if (is_jal()) { -+ intptr_t offset = (intptr_t)(dest - instruction_address()); -+ assert((offset & 0x1) == 0, "should be aligned"); -+ assert(is_imm_in_range(offset, 20, 1), "set_destination, offset is too large to be patched in one jal insrusction\n"); -+ unsigned int insn = 0b1101111; // jal -+ address pInsn = (address)(&insn); -+ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); -+ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); -+ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); -+ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); -+ Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra -+ set_int_at(displacement_offset, insn); -+ return; -+ } -+ ShouldNotReachHere(); -+ } ++ Label L_multiply_64_x_64_loop, L_done; + -+ void verify_alignment() { ; } -+ void verify(); -+ void print(); ++ subw(xstart, xlen, 1); ++ bltz(xstart, L_done); + -+ // Creation -+ inline friend NativeCall* nativeCall_at(address addr); -+ inline friend NativeCall* nativeCall_before(address return_address); ++ const Register jdx = tmp1; + -+ static bool is_call_before(address return_address) { -+ return is_call_at(return_address - NativeCall::return_address_offset); -+ } ++ if (AvoidUnalignedAccesses) { ++ // Check if x and y are both 8-byte aligned. ++ orr(t0, xlen, ylen); ++ andi(t0, t0, 0x1); ++ beqz(t0, L_multiply_64_x_64_loop); ++ ++ multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ ++ Label L_second_loop_unaligned; ++ bind(L_second_loop_unaligned); ++ mv(carry, zr); ++ mv(jdx, ylen); ++ subw(xstart, xstart, 1); ++ bltz(xstart, L_done); ++ sub(sp, sp, 2 * wordSize); ++ sd(z, Address(sp, 0)); ++ sd(zr, Address(sp, wordSize)); ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ addi(z, t0, 4); ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ lwu(product, Address(t0, 0)); ++ Label L_third_loop, L_third_loop_exit; ++ ++ blez(jdx, L_third_loop_exit); ++ ++ bind(L_third_loop); ++ subw(jdx, jdx, 1); ++ shadd(t0, jdx, y, t0, LogBytesPerInt); ++ lwu(t0, Address(t0, 0)); ++ mul(t1, t0, product); ++ add(t0, t1, carry); ++ shadd(tmp6, jdx, z, t1, LogBytesPerInt); ++ lwu(t1, Address(tmp6, 0)); ++ add(t0, t0, t1); ++ sw(t0, Address(tmp6, 0)); ++ srli(carry, t0, 32); ++ bgtz(jdx, L_third_loop); ++ ++ bind(L_third_loop_exit); ++ ld(z, Address(sp, 0)); ++ addi(sp, sp, 2 * wordSize); ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); + -+ // MT-safe patching of a call instruction. -+ static void insert(address code_pos, address entry); ++ j(L_second_loop_unaligned); ++ } + -+ static void replace_mt_safe(address instr_addr, address code_buffer); ++ bind(L_multiply_64_x_64_loop); ++ multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); + -+ // Similar to replace_mt_safe, but just changes the destination. The -+ // important thing is that free-running threads are able to execute -+ // this call instruction at all times. If the call is an immediate BL -+ // instruction we can simply rely on atomicity of 32-bit writes to -+ // make sure other threads will see no intermediate states. ++ Label L_second_loop_aligned; ++ beqz(kdx, L_second_loop_aligned); + -+ // We cannot rely on locks here, since the free-running threads must run at -+ // full speed. -+ // -+ // Used in the runtime linkage of calls; see class CompiledIC. -+ // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.) ++ Label L_carry; ++ subw(kdx, kdx, 1); ++ beqz(kdx, L_carry); + -+ // The parameter assert_lock disables the assertion during code generation. -+ void set_destination_mt_safe(address dest, bool assert_lock = true); ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ srli(carry, carry, 32); ++ subw(kdx, kdx, 1); + -+ address get_trampoline(); -+}; ++ bind(L_carry); ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); + -+inline NativeCall* nativeCall_at(address addr) { -+ assert_cond(addr != NULL); -+ NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset); -+#ifdef ASSERT -+ call->verify(); -+#endif -+ return call; -+} ++ // Second and third (nested) loops. ++ // ++ // for (int i = xstart-1; i >= 0; i--) { // Second loop ++ // carry = 0; ++ // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop ++ // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + ++ // (z[k] & LONG_MASK) + carry; ++ // z[k] = (int)product; ++ // carry = product >>> 32; ++ // } ++ // z[i] = (int)carry; ++ // } ++ // ++ // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi + -+inline NativeCall* nativeCall_before(address return_address) { -+ assert_cond(return_address != NULL); -+ NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset); -+#ifdef ASSERT -+ call->verify(); -+#endif -+ return call; -+} ++ bind(L_second_loop_aligned); ++ mv(carry, zr); // carry = 0; ++ mv(jdx, ylen); // j = ystart+1 + -+// An interface for accessing/manipulating native mov reg, imm instructions. -+// (used to manipulate inlined 64-bit data calls, etc.) -+class NativeMovConstReg: public NativeInstruction { -+ public: -+ enum RISCV_specific_constants { -+ movptr_instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi. See movptr(). -+ movptr_with_offset_instruction_size = 5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset(). -+ load_pc_relative_instruction_size = 2 * NativeInstruction::instruction_size, // auipc, ld -+ instruction_offset = 0, -+ displacement_offset = 0 -+ }; ++ subw(xstart, xstart, 1); // i = xstart-1; ++ bltz(xstart, L_done); + -+ address instruction_address() const { return addr_at(instruction_offset); } -+ address next_instruction_address() const { -+ // if the instruction at 5 * instruction_size is addi, -+ // it means a lui + addi + slli + addi + slli + addi instruction sequence, -+ // and the next instruction address should be addr_at(6 * instruction_size). -+ // However, when the instruction at 5 * instruction_size isn't addi, -+ // the next instruction address should be addr_at(5 * instruction_size) -+ if (nativeInstruction_at(instruction_address())->is_movptr()) { -+ if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) { -+ // Assume: lui, addi, slli, addi, slli, addi -+ return addr_at(movptr_instruction_size); -+ } else { -+ // Assume: lui, addi, slli, addi, slli -+ return addr_at(movptr_with_offset_instruction_size); -+ } -+ } else if (is_load_pc_relative_at(instruction_address())) { -+ // Assume: auipc, ld -+ return addr_at(load_pc_relative_instruction_size); -+ } -+ guarantee(false, "Unknown instruction in NativeMovConstReg"); -+ return NULL; -+ } ++ sub(sp, sp, 4 * wordSize); ++ sd(z, Address(sp, 0)); + -+ intptr_t data() const; -+ void set_data(intptr_t x); ++ Label L_last_x; ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ addi(z, t0, 4); ++ subw(xstart, xstart, 1); // i = xstart-1; ++ bltz(xstart, L_last_x); + -+ void flush() { -+ if (!maybe_cpool_ref(instruction_address())) { -+ ICache::invalidate_range(instruction_address(), movptr_instruction_size); -+ } -+ } ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ ld(product_hi, Address(t0, 0)); ++ ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian + -+ void verify(); -+ void print(); ++ Label L_third_loop_prologue; ++ bind(L_third_loop_prologue); + -+ // Creation -+ inline friend NativeMovConstReg* nativeMovConstReg_at(address addr); -+ inline friend NativeMovConstReg* nativeMovConstReg_before(address addr); -+}; ++ sd(ylen, Address(sp, wordSize)); ++ sd(x, Address(sp, 2 * wordSize)); ++ sd(xstart, Address(sp, 3 * wordSize)); ++ multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, ++ tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); ++ ld(z, Address(sp, 0)); ++ ld(ylen, Address(sp, wordSize)); ++ ld(x, Address(sp, 2 * wordSize)); ++ ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen ++ addi(sp, sp, 4 * wordSize); + -+inline NativeMovConstReg* nativeMovConstReg_at(address addr) { -+ assert_cond(addr != NULL); -+ NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset); -+#ifdef ASSERT -+ test->verify(); -+#endif -+ return test; -+} ++ addiw(tmp3, xlen, 1); ++ shadd(t0, tmp3, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); + -+inline NativeMovConstReg* nativeMovConstReg_before(address addr) { -+ assert_cond(addr != NULL); -+ NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); -+#ifdef ASSERT -+ test->verify(); -+#endif -+ return test; ++ subw(tmp3, tmp3, 1); ++ bltz(tmp3, L_done); ++ ++ srli(carry, carry, 32); ++ shadd(t0, tmp3, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ j(L_second_loop_aligned); ++ ++ // Next infrequent code is moved outside loops. ++ bind(L_last_x); ++ lwu(product_hi, Address(x, 0)); ++ j(L_third_loop_prologue); ++ ++ bind(L_done); +} ++#endif + -+// RISCV should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented. -+class NativeMovRegMem: public NativeInstruction { -+ public: -+ int instruction_start() const { -+ Unimplemented(); -+ return 0; ++// Count bits of trailing zero chars from lsb to msb until first non-zero element. ++// For LL case, one byte for one element, so shift 8 bits once, and for other case, ++// shift 16 bits once. ++void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) ++{ ++ if (UseRVB) { ++ assert_different_registers(Rd, Rs, tmp1); ++ int step = isLL ? 8 : 16; ++ ctz(Rd, Rs); ++ andi(tmp1, Rd, step - 1); ++ sub(Rd, Rd, tmp1); ++ return; + } ++ assert_different_registers(Rd, Rs, tmp1, tmp2); ++ Label Loop; ++ int step = isLL ? 8 : 16; ++ li(Rd, -step); ++ mv(tmp2, Rs); + -+ address instruction_address() const { -+ Unimplemented(); -+ return NULL; -+ } ++ bind(Loop); ++ addi(Rd, Rd, step); ++ andi(tmp1, tmp2, ((1 << step) - 1)); ++ srli(tmp2, tmp2, step); ++ beqz(tmp1, Loop); ++} + -+ int num_bytes_to_end_of_patch() const { -+ Unimplemented(); -+ return 0; ++// This instruction reads adjacent 4 bytes from the lower half of source register, ++// inflate into a register, for example: ++// Rs: A7A6A5A4A3A2A1A0 ++// Rd: 00A300A200A100A0 ++void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) ++{ ++ assert_different_registers(Rd, Rs, tmp1, tmp2); ++ li(tmp1, 0xFF); ++ mv(Rd, zr); ++ for (int i = 0; i <= 3; i++) ++ { ++ andr(tmp2, Rs, tmp1); ++ if (i) { ++ slli(tmp2, tmp2, i * 8); ++ } ++ orr(Rd, Rd, tmp2); ++ if (i != 3) { ++ slli(tmp1, tmp1, 8); ++ } + } ++} + -+ int offset() const; -+ -+ void set_offset(int x); ++// This instruction reads adjacent 4 bytes from the upper half of source register, ++// inflate into a register, for example: ++// Rs: A7A6A5A4A3A2A1A0 ++// Rd: 00A700A600A500A4 ++void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) ++{ ++ assert_different_registers(Rd, Rs, tmp1, tmp2); ++ li(tmp1, 0xFF00000000); ++ mv(Rd, zr); ++ for (int i = 0; i <= 3; i++) ++ { ++ andr(tmp2, Rs, tmp1); ++ orr(Rd, Rd, tmp2); ++ srli(Rd, Rd, 8); ++ if (i != 3) { ++ slli(tmp1, tmp1, 8); ++ } ++ } ++} + -+ void add_offset_in_bytes(int add_offset) { Unimplemented(); } ++// The size of the blocks erased by the zero_blocks stub. We must ++// handle anything smaller than this ourselves in zero_words(). ++const int MacroAssembler::zero_words_block_size = 8; + -+ void verify(); -+ void print(); ++// zero_words() is used by C2 ClearArray patterns. It is as small as ++// possible, handling small word counts locally and delegating ++// anything larger to the zero_blocks stub. It is expanded many times ++// in compiled code, so it is important to keep it short. + -+ private: -+ inline friend NativeMovRegMem* nativeMovRegMem_at (address addr); -+}; ++// ptr: Address of a buffer to be zeroed. ++// cnt: Count in HeapWords. ++// ++// ptr, cnt, and t0 are clobbered. ++address MacroAssembler::zero_words(Register ptr, Register cnt) ++{ ++ assert(is_power_of_2(zero_words_block_size), "adjust this"); ++ assert(ptr == x28 && cnt == x29, "mismatch in register usage"); ++ assert_different_registers(cnt, t0); + -+inline NativeMovRegMem* nativeMovRegMem_at (address addr) { -+ Unimplemented(); -+ return NULL; ++ BLOCK_COMMENT("zero_words {"); ++ mv(t0, zero_words_block_size); ++ Label around, done, done16; ++ bltu(cnt, t0, around); ++ { ++ RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks()); ++ assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); ++ if (StubRoutines::riscv::complete()) { ++ address tpc = trampoline_call(zero_blocks); ++ if (tpc == NULL) { ++ DEBUG_ONLY(reset_labels(around)); ++ postcond(pc() == badAddress); ++ return NULL; ++ } ++ } else { ++ jal(zero_blocks); ++ } ++ } ++ bind(around); ++ for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { ++ Label l; ++ andi(t0, cnt, i); ++ beqz(t0, l); ++ for (int j = 0; j < i; j++) { ++ sd(zr, Address(ptr, 0)); ++ addi(ptr, ptr, 8); ++ } ++ bind(l); ++ } ++ { ++ Label l; ++ andi(t0, cnt, 1); ++ beqz(t0, l); ++ sd(zr, Address(ptr, 0)); ++ bind(l); ++ } ++ BLOCK_COMMENT("} zero_words"); ++ postcond(pc() != badAddress); ++ return pc(); +} + -+class NativeJump: public NativeInstruction { -+ public: -+ enum RISCV_specific_constants { -+ instruction_size = NativeInstruction::instruction_size, -+ instruction_offset = 0, -+ data_offset = 0, -+ next_instruction_offset = NativeInstruction::instruction_size -+ }; -+ -+ address instruction_address() const { return addr_at(instruction_offset); } -+ address next_instruction_address() const { return addr_at(instruction_size); } -+ address jump_destination() const; ++#define SmallArraySize (18 * BytesPerLong) + -+ // Creation -+ inline friend NativeJump* nativeJump_at(address address); ++// base: Address of a buffer to be zeroed, 8 bytes aligned. ++// cnt: Immediate count in HeapWords. ++void MacroAssembler::zero_words(Register base, u_int64_t cnt) ++{ ++ assert_different_registers(base, t0, t1); + -+ void verify(); ++ BLOCK_COMMENT("zero_words {"); + -+ // Unit testing stuff -+ static void test() {} ++ if (cnt <= SmallArraySize / BytesPerLong) { ++ for (int i = 0; i < (int)cnt; i++) { ++ sd(zr, Address(base, i * wordSize)); ++ } ++ } else { ++ const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll ++ int remainder = cnt % unroll; ++ for (int i = 0; i < remainder; i++) { ++ sd(zr, Address(base, i * wordSize)); ++ } + -+ // Insertion of native jump instruction -+ static void insert(address code_pos, address entry); -+ // MT-safe insertion of native jump at verified method entry -+ static void check_verified_entry_alignment(address entry, address verified_entry); -+ static void patch_verified_entry(address entry, address verified_entry, address dest); -+}; ++ Label loop; ++ Register cnt_reg = t0; ++ Register loop_base = t1; ++ cnt = cnt - remainder; ++ li(cnt_reg, cnt); ++ add(loop_base, base, remainder * wordSize); ++ bind(loop); ++ sub(cnt_reg, cnt_reg, unroll); ++ for (int i = 0; i < unroll; i++) { ++ sd(zr, Address(loop_base, i * wordSize)); ++ } ++ add(loop_base, loop_base, unroll * wordSize); ++ bnez(cnt_reg, loop); ++ } + -+inline NativeJump* nativeJump_at(address addr) { -+ NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset); -+#ifdef ASSERT -+ jump->verify(); -+#endif -+ return jump; ++ BLOCK_COMMENT("} zero_words"); +} + -+class NativeGeneralJump: public NativeJump { -+public: -+ enum RISCV_specific_constants { -+ instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr -+ instruction_offset = 0, -+ data_offset = 0, -+ next_instruction_offset = 6 * NativeInstruction::instruction_size // lui, addi, slli, addi, slli, jalr -+ }; ++// base: Address of a buffer to be filled, 8 bytes aligned. ++// cnt: Count in 8-byte unit. ++// value: Value to be filled with. ++// base will point to the end of the buffer after filling. ++void MacroAssembler::fill_words(Register base, Register cnt, Register value) ++{ ++// Algorithm: ++// ++// t0 = cnt & 7 ++// cnt -= t0 ++// p += t0 ++// switch (t0): ++// switch start: ++// do while cnt ++// cnt -= 8 ++// p[-8] = value ++// case 7: ++// p[-7] = value ++// case 6: ++// p[-6] = value ++// // ... ++// case 1: ++// p[-1] = value ++// case 0: ++// p += 8 ++// do-while end ++// switch end + -+ address jump_destination() const; ++ assert_different_registers(base, cnt, value, t0, t1); + -+ static void insert_unconditional(address code_pos, address entry); -+ static void replace_mt_safe(address instr_addr, address code_buffer); -+}; ++ Label fini, skip, entry, loop; ++ const int unroll = 8; // Number of sd instructions we'll unroll + -+inline NativeGeneralJump* nativeGeneralJump_at(address addr) { -+ assert_cond(addr != NULL); -+ NativeGeneralJump* jump = (NativeGeneralJump*)(addr); -+ debug_only(jump->verify();) -+ return jump; -+} ++ beqz(cnt, fini); + -+class NativeIllegalInstruction: public NativeInstruction { -+ public: -+ // Insert illegal opcode as specific address -+ static void insert(address code_pos); -+}; ++ andi(t0, cnt, unroll - 1); ++ sub(cnt, cnt, t0); ++ // align 8, so first sd n % 8 = mod, next loop sd 8 * n. ++ shadd(base, t0, base, t1, 3); ++ la(t1, entry); ++ slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) ++ sub(t1, t1, t0); ++ jr(t1); + -+inline bool NativeInstruction::is_nop() { -+ uint32_t insn = *(uint32_t*)addr_at(0); -+ return insn == 0x13; ++ bind(loop); ++ add(base, base, unroll * 8); ++ for (int i = -unroll; i < 0; i++) { ++ sd(value, Address(base, i * 8)); ++ } ++ bind(entry); ++ sub(cnt, cnt, unroll); ++ bgez(cnt, loop); ++ ++ bind(fini); +} + -+inline bool NativeInstruction::is_jump_or_nop() { -+ return is_nop() || is_jump(); ++#define FCVT_SAFE(FLOATCVT, FLOATEQ) \ ++void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ ++ Label L_Okay; \ ++ fscsr(zr); \ ++ FLOATCVT(dst, src); \ ++ frcsr(tmp); \ ++ andi(tmp, tmp, 0x1E); \ ++ beqz(tmp, L_Okay); \ ++ FLOATEQ(tmp, src, src); \ ++ bnez(tmp, L_Okay); \ ++ mv(dst, zr); \ ++ bind(L_Okay); \ +} + -+// Call trampoline stubs. -+class NativeCallTrampolineStub : public NativeInstruction { -+ public: ++FCVT_SAFE(fcvt_w_s, feq_s) ++FCVT_SAFE(fcvt_l_s, feq_s) ++FCVT_SAFE(fcvt_w_d, feq_d) ++FCVT_SAFE(fcvt_l_d, feq_d) + -+ enum RISCV_specific_constants { -+ // Refer to function emit_trampoline_stub. -+ instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address -+ data_offset = 3 * NativeInstruction::instruction_size, // auipc + ld + jr -+ }; ++#undef FCVT_SAFE + -+ address destination(nmethod *nm = NULL) const; -+ void set_destination(address new_destination); -+ ptrdiff_t destination_offset() const; -+}; ++#define FCMP(FLOATTYPE, FLOATSIG) \ ++void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ ++ FloatRegister Rs2, int unordered_result) { \ ++ Label Ldone; \ ++ if (unordered_result < 0) { \ ++ /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ ++ /* installs 1 if gt else 0 */ \ ++ flt_##FLOATSIG(result, Rs2, Rs1); \ ++ /* Rs1 > Rs2, install 1 */ \ ++ bgtz(result, Ldone); \ ++ feq_##FLOATSIG(result, Rs1, Rs2); \ ++ addi(result, result, -1); \ ++ /* Rs1 = Rs2, install 0 */ \ ++ /* NaN or Rs1 < Rs2, install -1 */ \ ++ bind(Ldone); \ ++ } else { \ ++ /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ ++ /* installs 1 if gt or unordered else 0 */ \ ++ flt_##FLOATSIG(result, Rs1, Rs2); \ ++ /* Rs1 < Rs2, install -1 */ \ ++ bgtz(result, Ldone); \ ++ feq_##FLOATSIG(result, Rs1, Rs2); \ ++ addi(result, result, -1); \ ++ /* Rs1 = Rs2, install 0 */ \ ++ /* NaN or Rs1 > Rs2, install 1 */ \ ++ bind(Ldone); \ ++ neg(result, result); \ ++ } \ ++} + -+inline bool is_NativeCallTrampolineStub_at(address addr) { -+ // Ensure that the stub is exactly -+ // ld t0, L--->auipc + ld -+ // jr t0 -+ // L: ++FCMP(float, s); ++FCMP(double, d); + -+ // judge inst + register + imm -+ // 1). check the instructions: auipc + ld + jalr -+ // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0 -+ // 3). check if the offset in ld[31:20] equals the data_offset -+ assert_cond(addr != NULL); -+ const int instr_size = NativeInstruction::instruction_size; -+ if (NativeInstruction::is_auipc_at(addr) && NativeInstruction::is_ld_at(addr + instr_size) && NativeInstruction::is_jalr_at(addr + 2 * instr_size) && -+ (NativeInstruction::extract_rd(addr) == x5) && -+ (NativeInstruction::extract_rd(addr + instr_size) == x5) && -+ (NativeInstruction::extract_rs1(addr + instr_size) == x5) && -+ (NativeInstruction::extract_rs1(addr + 2 * instr_size) == x5) && -+ (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) { -+ return true; -+ } -+ return false; -+} ++#undef FCMP + -+inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { -+ assert_cond(addr != NULL); -+ assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found"); -+ return (NativeCallTrampolineStub*)addr; -+} ++// Zero words; len is in bytes ++// Destroys all registers except addr ++// len must be a nonzero multiple of wordSize ++void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { ++ assert_different_registers(addr, len, tmp, t0, t1); + -+class NativeMembar : public NativeInstruction { -+public: -+ uint32_t get_kind(); -+ void set_kind(uint32_t order_kind); -+}; ++#ifdef ASSERT ++ { ++ Label L; ++ andi(t0, len, BytesPerWord - 1); ++ beqz(t0, L); ++ stop("len is not a multiple of BytesPerWord"); ++ bind(L); ++ } ++#endif // ASSERT + -+inline NativeMembar *NativeMembar_at(address addr) { -+ assert_cond(addr != NULL); -+ assert(nativeInstruction_at(addr)->is_membar(), "no membar found"); -+ return (NativeMembar*)addr; -+} ++#ifndef PRODUCT ++ block_comment("zero memory"); ++#endif // PRODUCT + -+#endif // CPU_RISCV_NATIVEINST_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp -new file mode 100644 -index 000000000..04a36c1c7 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp -@@ -0,0 +1,46 @@ -+/* -+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#ifndef CPU_RISCV_REGISTERMAP_RISCV_HPP -+#define CPU_RISCV_REGISTERMAP_RISCV_HPP -+ -+// machine-dependent implemention for register maps -+ friend class frame; -+ -+ private: -+ // This is the hook for finding a register in an "well-known" location, -+ // such as a register block of a predetermined format. -+ // Since there is none, we just return NULL. -+ // See registerMap_riscv.hpp for an example of grabbing registers -+ // from register save areas of a standard layout. -+ address pd_location(VMReg reg) const {return NULL;} -+ -+ // no PD state to clear or copy: -+ void pd_clear() {} -+ void pd_initialize() {} -+ void pd_initialize_from(const RegisterMap* map) {} -+ -+#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp -new file mode 100644 -index 000000000..b30c1b107 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp -@@ -0,0 +1,193 @@ -+/* -+ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#include "precompiled.hpp" -+#include "asm/assembler.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "asm/register.hpp" -+#include "interp_masm_riscv.hpp" -+#include "register_riscv.hpp" -+ -+REGISTER_DEFINITION(Register, noreg); -+ -+REGISTER_DEFINITION(Register, x0); -+REGISTER_DEFINITION(Register, x1); -+REGISTER_DEFINITION(Register, x2); -+REGISTER_DEFINITION(Register, x3); -+REGISTER_DEFINITION(Register, x4); -+REGISTER_DEFINITION(Register, x5); -+REGISTER_DEFINITION(Register, x6); -+REGISTER_DEFINITION(Register, x7); -+REGISTER_DEFINITION(Register, x8); -+REGISTER_DEFINITION(Register, x9); -+REGISTER_DEFINITION(Register, x10); -+REGISTER_DEFINITION(Register, x11); -+REGISTER_DEFINITION(Register, x12); -+REGISTER_DEFINITION(Register, x13); -+REGISTER_DEFINITION(Register, x14); -+REGISTER_DEFINITION(Register, x15); -+REGISTER_DEFINITION(Register, x16); -+REGISTER_DEFINITION(Register, x17); -+REGISTER_DEFINITION(Register, x18); -+REGISTER_DEFINITION(Register, x19); -+REGISTER_DEFINITION(Register, x20); -+REGISTER_DEFINITION(Register, x21); -+REGISTER_DEFINITION(Register, x22); -+REGISTER_DEFINITION(Register, x23); -+REGISTER_DEFINITION(Register, x24); -+REGISTER_DEFINITION(Register, x25); -+REGISTER_DEFINITION(Register, x26); -+REGISTER_DEFINITION(Register, x27); -+REGISTER_DEFINITION(Register, x28); -+REGISTER_DEFINITION(Register, x29); -+REGISTER_DEFINITION(Register, x30); -+REGISTER_DEFINITION(Register, x31); -+ -+REGISTER_DEFINITION(FloatRegister, fnoreg); -+ -+REGISTER_DEFINITION(FloatRegister, f0); -+REGISTER_DEFINITION(FloatRegister, f1); -+REGISTER_DEFINITION(FloatRegister, f2); -+REGISTER_DEFINITION(FloatRegister, f3); -+REGISTER_DEFINITION(FloatRegister, f4); -+REGISTER_DEFINITION(FloatRegister, f5); -+REGISTER_DEFINITION(FloatRegister, f6); -+REGISTER_DEFINITION(FloatRegister, f7); -+REGISTER_DEFINITION(FloatRegister, f8); -+REGISTER_DEFINITION(FloatRegister, f9); -+REGISTER_DEFINITION(FloatRegister, f10); -+REGISTER_DEFINITION(FloatRegister, f11); -+REGISTER_DEFINITION(FloatRegister, f12); -+REGISTER_DEFINITION(FloatRegister, f13); -+REGISTER_DEFINITION(FloatRegister, f14); -+REGISTER_DEFINITION(FloatRegister, f15); -+REGISTER_DEFINITION(FloatRegister, f16); -+REGISTER_DEFINITION(FloatRegister, f17); -+REGISTER_DEFINITION(FloatRegister, f18); -+REGISTER_DEFINITION(FloatRegister, f19); -+REGISTER_DEFINITION(FloatRegister, f20); -+REGISTER_DEFINITION(FloatRegister, f21); -+REGISTER_DEFINITION(FloatRegister, f22); -+REGISTER_DEFINITION(FloatRegister, f23); -+REGISTER_DEFINITION(FloatRegister, f24); -+REGISTER_DEFINITION(FloatRegister, f25); -+REGISTER_DEFINITION(FloatRegister, f26); -+REGISTER_DEFINITION(FloatRegister, f27); -+REGISTER_DEFINITION(FloatRegister, f28); -+REGISTER_DEFINITION(FloatRegister, f29); -+REGISTER_DEFINITION(FloatRegister, f30); -+REGISTER_DEFINITION(FloatRegister, f31); -+ -+REGISTER_DEFINITION(VectorRegister, vnoreg); -+ -+REGISTER_DEFINITION(VectorRegister, v0); -+REGISTER_DEFINITION(VectorRegister, v1); -+REGISTER_DEFINITION(VectorRegister, v2); -+REGISTER_DEFINITION(VectorRegister, v3); -+REGISTER_DEFINITION(VectorRegister, v4); -+REGISTER_DEFINITION(VectorRegister, v5); -+REGISTER_DEFINITION(VectorRegister, v6); -+REGISTER_DEFINITION(VectorRegister, v7); -+REGISTER_DEFINITION(VectorRegister, v8); -+REGISTER_DEFINITION(VectorRegister, v9); -+REGISTER_DEFINITION(VectorRegister, v10); -+REGISTER_DEFINITION(VectorRegister, v11); -+REGISTER_DEFINITION(VectorRegister, v12); -+REGISTER_DEFINITION(VectorRegister, v13); -+REGISTER_DEFINITION(VectorRegister, v14); -+REGISTER_DEFINITION(VectorRegister, v15); -+REGISTER_DEFINITION(VectorRegister, v16); -+REGISTER_DEFINITION(VectorRegister, v17); -+REGISTER_DEFINITION(VectorRegister, v18); -+REGISTER_DEFINITION(VectorRegister, v19); -+REGISTER_DEFINITION(VectorRegister, v20); -+REGISTER_DEFINITION(VectorRegister, v21); -+REGISTER_DEFINITION(VectorRegister, v22); -+REGISTER_DEFINITION(VectorRegister, v23); -+REGISTER_DEFINITION(VectorRegister, v24); -+REGISTER_DEFINITION(VectorRegister, v25); -+REGISTER_DEFINITION(VectorRegister, v26); -+REGISTER_DEFINITION(VectorRegister, v27); -+REGISTER_DEFINITION(VectorRegister, v28); -+REGISTER_DEFINITION(VectorRegister, v29); -+REGISTER_DEFINITION(VectorRegister, v30); -+REGISTER_DEFINITION(VectorRegister, v31); ++ Label loop; ++ Label entry; + -+REGISTER_DEFINITION(Register, c_rarg0); -+REGISTER_DEFINITION(Register, c_rarg1); -+REGISTER_DEFINITION(Register, c_rarg2); -+REGISTER_DEFINITION(Register, c_rarg3); -+REGISTER_DEFINITION(Register, c_rarg4); -+REGISTER_DEFINITION(Register, c_rarg5); -+REGISTER_DEFINITION(Register, c_rarg6); -+REGISTER_DEFINITION(Register, c_rarg7); ++ // Algorithm: ++ // ++ // t0 = cnt & 7 ++ // cnt -= t0 ++ // p += t0 ++ // switch (t0) { ++ // do { ++ // cnt -= 8 ++ // p[-8] = 0 ++ // case 7: ++ // p[-7] = 0 ++ // case 6: ++ // p[-6] = 0 ++ // ... ++ // case 1: ++ // p[-1] = 0 ++ // case 0: ++ // p += 8 ++ // } while (cnt) ++ // } + -+REGISTER_DEFINITION(FloatRegister, c_farg0); -+REGISTER_DEFINITION(FloatRegister, c_farg1); -+REGISTER_DEFINITION(FloatRegister, c_farg2); -+REGISTER_DEFINITION(FloatRegister, c_farg3); -+REGISTER_DEFINITION(FloatRegister, c_farg4); -+REGISTER_DEFINITION(FloatRegister, c_farg5); -+REGISTER_DEFINITION(FloatRegister, c_farg6); -+REGISTER_DEFINITION(FloatRegister, c_farg7); ++ const int unroll = 8; // Number of sd(zr) instructions we'll unroll + -+REGISTER_DEFINITION(Register, j_rarg0); -+REGISTER_DEFINITION(Register, j_rarg1); -+REGISTER_DEFINITION(Register, j_rarg2); -+REGISTER_DEFINITION(Register, j_rarg3); -+REGISTER_DEFINITION(Register, j_rarg4); -+REGISTER_DEFINITION(Register, j_rarg5); -+REGISTER_DEFINITION(Register, j_rarg6); -+REGISTER_DEFINITION(Register, j_rarg7); ++ srli(len, len, LogBytesPerWord); ++ andi(t0, len, unroll - 1); // t0 = cnt % unroll ++ sub(len, len, t0); // cnt -= unroll ++ // tmp always points to the end of the region we're about to zero ++ shadd(tmp, t0, addr, t1, LogBytesPerWord); ++ la(t1, entry); ++ slli(t0, t0, 2); ++ sub(t1, t1, t0); ++ jr(t1); ++ bind(loop); ++ sub(len, len, unroll); ++ for (int i = -unroll; i < 0; i++) { ++ Assembler::sd(zr, Address(tmp, i * wordSize)); ++ } ++ bind(entry); ++ add(tmp, tmp, unroll * wordSize); ++ bnez(len, loop); ++} + -+REGISTER_DEFINITION(FloatRegister, j_farg0); -+REGISTER_DEFINITION(FloatRegister, j_farg1); -+REGISTER_DEFINITION(FloatRegister, j_farg2); -+REGISTER_DEFINITION(FloatRegister, j_farg3); -+REGISTER_DEFINITION(FloatRegister, j_farg4); -+REGISTER_DEFINITION(FloatRegister, j_farg5); -+REGISTER_DEFINITION(FloatRegister, j_farg6); -+REGISTER_DEFINITION(FloatRegister, j_farg7); ++// shift left by shamt and add ++// Rd = (Rs1 << shamt) + Rs2 ++void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { ++ if (UseRVB) { ++ if (shamt == 1) { ++ sh1add(Rd, Rs1, Rs2); ++ return; ++ } else if (shamt == 2) { ++ sh2add(Rd, Rs1, Rs2); ++ return; ++ } else if (shamt == 3) { ++ sh3add(Rd, Rs1, Rs2); ++ return; ++ } ++ } + -+REGISTER_DEFINITION(Register, zr); -+REGISTER_DEFINITION(Register, gp); -+REGISTER_DEFINITION(Register, tp); -+REGISTER_DEFINITION(Register, xmethod); -+REGISTER_DEFINITION(Register, ra); -+REGISTER_DEFINITION(Register, sp); -+REGISTER_DEFINITION(Register, fp); -+REGISTER_DEFINITION(Register, xheapbase); -+REGISTER_DEFINITION(Register, xcpool); -+REGISTER_DEFINITION(Register, xmonitors); -+REGISTER_DEFINITION(Register, xlocals); -+REGISTER_DEFINITION(Register, xthread); -+REGISTER_DEFINITION(Register, xbcp); -+REGISTER_DEFINITION(Register, xdispatch); -+REGISTER_DEFINITION(Register, esp); ++ if (shamt != 0) { ++ slli(tmp, Rs1, shamt); ++ add(Rd, Rs2, tmp); ++ } else { ++ add(Rd, Rs1, Rs2); ++ } ++} + -+REGISTER_DEFINITION(Register, t0); -+REGISTER_DEFINITION(Register, t1); -+REGISTER_DEFINITION(Register, t2); -diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp -new file mode 100644 -index 000000000..76215ef2a ---- /dev/null -+++ b/src/hotspot/cpu/riscv/register_riscv.cpp -@@ -0,0 +1,69 @@ -+/* -+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++void MacroAssembler::zero_extend(Register dst, Register src, int bits) { ++ if (UseRVB) { ++ if (bits == 16) { ++ zext_h(dst, src); ++ return; ++ } else if (bits == 32) { ++ zext_w(dst, src); ++ return; ++ } ++ } + -+#include "precompiled.hpp" -+#include "register_riscv.hpp" ++ if (bits == 8) { ++ zext_b(dst, src); ++ } else { ++ slli(dst, src, XLEN - bits); ++ srli(dst, dst, XLEN - bits); ++ } ++} + -+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * -+ RegisterImpl::max_slots_per_register; -+const int ConcreteRegisterImpl::max_fpr = -+ ConcreteRegisterImpl::max_gpr + -+ FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; ++void MacroAssembler::sign_extend(Register dst, Register src, int bits) { ++ if (UseRVB) { ++ if (bits == 8) { ++ sext_b(dst, src); ++ return; ++ } else if (bits == 16) { ++ sext_h(dst, src); ++ return; ++ } ++ } + -+const int ConcreteRegisterImpl::max_vpr = -+ ConcreteRegisterImpl::max_fpr + -+ VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register; ++ if (bits == 32) { ++ sext_w(dst, src); ++ } else { ++ slli(dst, src, XLEN - bits); ++ srai(dst, dst, XLEN - bits); ++ } ++} + ++void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) ++{ ++ if (src1 == src2) { ++ mv(dst, zr); ++ return; ++ } ++ Label done; ++ Register left = src1; ++ Register right = src2; ++ if (dst == src1) { ++ assert_different_registers(dst, src2, tmp); ++ mv(tmp, src1); ++ left = tmp; ++ } else if (dst == src2) { ++ assert_different_registers(dst, src1, tmp); ++ mv(tmp, src2); ++ right = tmp; ++ } + -+const char* RegisterImpl::name() const { -+ const char* names[number_of_registers] = { -+ "zr", "ra", "sp", "gp", "tp", "x5", "x6", "x7", "fp", "x9", -+ "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7", -+ "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals", -+ "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod" -+ }; -+ return is_valid() ? names[encoding()] : "noreg"; ++ // installs 1 if gt else 0 ++ slt(dst, right, left); ++ bnez(dst, done); ++ slt(dst, left, right); ++ // dst = -1 if lt; else if eq , dst = 0 ++ neg(dst, dst); ++ bind(done); +} + -+const char* FloatRegisterImpl::name() const { -+ const char* names[number_of_registers] = { -+ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", -+ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", -+ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", -+ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31" -+ }; -+ return is_valid() ? names[encoding()] : "noreg"; ++void MacroAssembler::safepoint_ifence() { ++ ifence(); ++#ifndef PRODUCT ++ if (VerifyCrossModifyFence) { ++ // Clear the thread state. ++ sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); ++ } ++#endif +} + -+const char* VectorRegisterImpl::name() const { -+ const char* names[number_of_registers] = { -+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", -+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", -+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", -+ "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" -+ }; -+ return is_valid() ? names[encoding()] : "noreg"; ++#ifndef PRODUCT ++void MacroAssembler::verify_cross_modify_fence_not_required() { ++ if (VerifyCrossModifyFence) { ++ // Check if thread needs a cross modify fence. ++ lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); ++ Label fence_not_required; ++ beqz(t0, fence_not_required); ++ // If it does then fail. ++ la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure))); ++ mv(c_rarg0, xthread); ++ jalr(t0); ++ bind(fence_not_required); ++ } +} -diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp ++#endif +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp new file mode 100644 -index 000000000..8beba6776 +index 00000000000..23e09475be1 --- /dev/null -+++ b/src/hotspot/cpu/riscv/register_riscv.hpp -@@ -0,0 +1,337 @@ ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -0,0 +1,858 @@ +/* -+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -27087,22119 +26571,23371 @@ index 000000000..8beba6776 + * + */ + -+#ifndef CPU_RISCV_REGISTER_RISCV_HPP -+#define CPU_RISCV_REGISTER_RISCV_HPP -+ -+#include "asm/register.hpp" -+ -+#define CSR_FFLAGS 0x001 // Floating-Point Accrued Exceptions. -+#define CSR_FRM 0x002 // Floating-Point Dynamic Rounding Mode. -+#define CSR_FCSR 0x003 // Floating-Point Control and Status Register (frm + fflags). -+#define CSR_VSTART 0x008 // Vector start position -+#define CSR_VXSAT 0x009 // Fixed-Point Saturate Flag -+#define CSR_VXRM 0x00A // Fixed-Point Rounding Mode -+#define CSR_VCSR 0x00F // Vector control and status register -+#define CSR_VL 0xC20 // Vector length -+#define CSR_VTYPE 0xC21 // Vector data type register -+#define CSR_VLENB 0xC22 // VLEN/8 (vector register length in bytes) -+#define CSR_CYCLE 0xc00 // Cycle counter for RDCYCLE instruction. -+#define CSR_TIME 0xc01 // Timer for RDTIME instruction. -+#define CSR_INSTERT 0xc02 // Instructions-retired counter for RDINSTRET instruction. ++#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP ++#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP + -+class VMRegImpl; -+typedef VMRegImpl* VMReg; ++#include "asm/assembler.hpp" ++#include "metaprogramming/enableIf.hpp" ++#include "oops/compressedOops.hpp" ++#include "utilities/powerOfTwo.hpp" + -+// Use Register as shortcut -+class RegisterImpl; -+typedef RegisterImpl* Register; ++// MacroAssembler extends Assembler by frequently used macros. ++// ++// Instructions for which a 'better' code sequence exists depending ++// on arguments should also go in here. + -+inline Register as_Register(int encoding) { -+ return (Register)(intptr_t) encoding; -+} ++class MacroAssembler: public Assembler { + -+class RegisterImpl: public AbstractRegisterImpl { + public: -+ enum { -+ number_of_registers = 32, -+ number_of_byte_registers = 32, -+ max_slots_per_register = 2 -+ }; -+ -+ // derived registers, offsets, and addresses -+ Register successor() const { return as_Register(encoding() + 1); } ++ MacroAssembler(CodeBuffer* code) : Assembler(code) { ++ } ++ virtual ~MacroAssembler() {} + -+ // construction -+ inline friend Register as_Register(int encoding); ++ void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod); + -+ VMReg as_VMReg(); ++ // Place a fence.i after code may have been modified due to a safepoint. ++ void safepoint_ifence(); + -+ // accessors -+ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } -+ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } -+ bool has_byte_register() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_byte_registers; } -+ const char* name() const; -+ int encoding_nocheck() const { return (intptr_t)this; } ++ // Alignment ++ void align(int modulus, int extra_offset = 0); + -+ // Return the bit which represents this register. This is intended -+ // to be ORed into a bitmask: for usage see class RegSet below. -+ unsigned long bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; } -+}; ++ // Stack frame creation/removal ++ // Note that SP must be updated to the right place before saving/restoring RA and FP ++ // because signal based thread suspend/resume could happen asynchronously. ++ void enter() { ++ addi(sp, sp, - 2 * wordSize); ++ sd(ra, Address(sp, wordSize)); ++ sd(fp, Address(sp)); ++ addi(fp, sp, 2 * wordSize); ++ } + -+// The integer registers of the riscv architecture ++ void leave() { ++ addi(sp, fp, - 2 * wordSize); ++ ld(fp, Address(sp)); ++ ld(ra, Address(sp, wordSize)); ++ addi(sp, sp, 2 * wordSize); ++ } + -+CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); + -+CONSTANT_REGISTER_DECLARATION(Register, x0, (0)); -+CONSTANT_REGISTER_DECLARATION(Register, x1, (1)); -+CONSTANT_REGISTER_DECLARATION(Register, x2, (2)); -+CONSTANT_REGISTER_DECLARATION(Register, x3, (3)); -+CONSTANT_REGISTER_DECLARATION(Register, x4, (4)); -+CONSTANT_REGISTER_DECLARATION(Register, x5, (5)); -+CONSTANT_REGISTER_DECLARATION(Register, x6, (6)); -+CONSTANT_REGISTER_DECLARATION(Register, x7, (7)); -+CONSTANT_REGISTER_DECLARATION(Register, x8, (8)); -+CONSTANT_REGISTER_DECLARATION(Register, x9, (9)); -+CONSTANT_REGISTER_DECLARATION(Register, x10, (10)); -+CONSTANT_REGISTER_DECLARATION(Register, x11, (11)); -+CONSTANT_REGISTER_DECLARATION(Register, x12, (12)); -+CONSTANT_REGISTER_DECLARATION(Register, x13, (13)); -+CONSTANT_REGISTER_DECLARATION(Register, x14, (14)); -+CONSTANT_REGISTER_DECLARATION(Register, x15, (15)); -+CONSTANT_REGISTER_DECLARATION(Register, x16, (16)); -+CONSTANT_REGISTER_DECLARATION(Register, x17, (17)); -+CONSTANT_REGISTER_DECLARATION(Register, x18, (18)); -+CONSTANT_REGISTER_DECLARATION(Register, x19, (19)); -+CONSTANT_REGISTER_DECLARATION(Register, x20, (20)); -+CONSTANT_REGISTER_DECLARATION(Register, x21, (21)); -+CONSTANT_REGISTER_DECLARATION(Register, x22, (22)); -+CONSTANT_REGISTER_DECLARATION(Register, x23, (23)); -+CONSTANT_REGISTER_DECLARATION(Register, x24, (24)); -+CONSTANT_REGISTER_DECLARATION(Register, x25, (25)); -+CONSTANT_REGISTER_DECLARATION(Register, x26, (26)); -+CONSTANT_REGISTER_DECLARATION(Register, x27, (27)); -+CONSTANT_REGISTER_DECLARATION(Register, x28, (28)); -+CONSTANT_REGISTER_DECLARATION(Register, x29, (29)); -+CONSTANT_REGISTER_DECLARATION(Register, x30, (30)); -+CONSTANT_REGISTER_DECLARATION(Register, x31, (31)); ++ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) ++ // The pointer will be loaded into the thread register. ++ void get_thread(Register thread); + -+// Use FloatRegister as shortcut -+class FloatRegisterImpl; -+typedef FloatRegisterImpl* FloatRegister; ++ // Support for VM calls ++ // ++ // It is imperative that all calls into the VM are handled via the call_VM macros. ++ // They make sure that the stack linkage is setup correctly. call_VM's correspond ++ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. + -+inline FloatRegister as_FloatRegister(int encoding) { -+ return (FloatRegister)(intptr_t) encoding; -+} ++ void call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); + -+// The implementation of floating point registers for the architecture -+class FloatRegisterImpl: public AbstractRegisterImpl { -+ public: -+ enum { -+ number_of_registers = 32, -+ max_slots_per_register = 2 -+ }; ++ // Overloadings with last_Java_sp ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments = 0, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); + -+ // construction -+ inline friend FloatRegister as_FloatRegister(int encoding); ++ void get_vm_result(Register oop_result, Register java_thread); ++ void get_vm_result_2(Register metadata_result, Register java_thread); + -+ VMReg as_VMReg(); ++ // These always tightly bind to MacroAssembler::call_VM_leaf_base ++ // bypassing the virtual implementation ++ void call_VM_leaf(address entry_point, ++ int number_of_arguments = 0); ++ void call_VM_leaf(address entry_point, ++ Register arg_0); ++ void call_VM_leaf(address entry_point, ++ Register arg_0, Register arg_1); ++ void call_VM_leaf(address entry_point, ++ Register arg_0, Register arg_1, Register arg_2); + -+ // derived registers, offsets, and addresses -+ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } ++ // These always tightly bind to MacroAssembler::call_VM_base ++ // bypassing the virtual implementation ++ void super_call_VM_leaf(address entry_point, Register arg_0); ++ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1); ++ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2); ++ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3); + -+ // accessors -+ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } -+ int encoding_nocheck() const { return (intptr_t)this; } -+ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } -+ const char* name() const; ++ // last Java Frame (fills frame anchor) ++ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp); ++ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp); ++ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp); + -+}; ++ // thread in the default location (xthread) ++ void reset_last_Java_frame(bool clear_fp); + -+// The float registers of the RISCV architecture ++ void call_native(address entry_point, ++ Register arg_0); ++ void call_native_base( ++ address entry_point, // the entry point ++ Label* retaddr = NULL ++ ); + -+CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); ++ virtual void call_VM_leaf_base( ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments to pop after the call ++ Label* retaddr = NULL ++ ); + -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); ++ virtual void call_VM_leaf_base( ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments to pop after the call ++ Label& retaddr) { ++ call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); ++ } + -+// Use VectorRegister as shortcut -+class VectorRegisterImpl; -+typedef VectorRegisterImpl* VectorRegister; ++ virtual void call_VM_base( // returns the register containing the thread upon return ++ Register oop_result, // where an oop-result ends up if any; use noreg otherwise ++ Register java_thread, // the thread if computed before ; use noreg otherwise ++ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call ++ bool check_exceptions // whether to check for pending exceptions after return ++ ); + -+inline VectorRegister as_VectorRegister(int encoding) { -+ return (VectorRegister)(intptr_t) encoding; -+} ++ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions); + -+// The implementation of vector registers for riscv-v -+class VectorRegisterImpl: public AbstractRegisterImpl { -+ public: -+ enum { -+ number_of_registers = 32, -+ max_slots_per_register = 4 -+ }; ++ virtual void check_and_handle_earlyret(Register java_thread); ++ virtual void check_and_handle_popframe(Register java_thread); + -+ // construction -+ inline friend VectorRegister as_VectorRegister(int encoding); ++ void resolve_weak_handle(Register result, Register tmp); ++ void resolve_oop_handle(Register result, Register tmp = x15); ++ void resolve_jobject(Register value, Register thread, Register tmp); + -+ VMReg as_VMReg(); ++ void movoop(Register dst, jobject obj, bool immediate = false); ++ void mov_metadata(Register dst, Metadata* obj); ++ void bang_stack_size(Register size, Register tmp); ++ void set_narrow_oop(Register dst, jobject obj); ++ void set_narrow_klass(Register dst, Klass* k); + -+ // derived registers, offsets, and addresses -+ VectorRegister successor() const { return as_VectorRegister(encoding() + 1); } ++ void load_mirror(Register dst, Register method, Register tmp = x15); ++ void access_load_at(BasicType type, DecoratorSet decorators, Register dst, ++ Address src, Register tmp1, Register thread_tmp); ++ void access_store_at(BasicType type, DecoratorSet decorators, Address dst, ++ Register src, Register tmp1, Register thread_tmp); ++ void load_klass(Register dst, Register src); ++ void store_klass(Register dst, Register src); ++ void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L); + -+ // accessors -+ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } -+ int encoding_nocheck() const { return (intptr_t)this; } -+ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } -+ const char* name() const; ++ void encode_klass_not_null(Register r); ++ void decode_klass_not_null(Register r); ++ void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); ++ void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); ++ void decode_heap_oop_not_null(Register r); ++ void decode_heap_oop_not_null(Register dst, Register src); ++ void decode_heap_oop(Register d, Register s); ++ void decode_heap_oop(Register r) { decode_heap_oop(r, r); } ++ void encode_heap_oop(Register d, Register s); ++ void encode_heap_oop(Register r) { encode_heap_oop(r, r); }; ++ void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); + -+}; ++ void store_klass_gap(Register dst, Register src); + -+// The vector registers of RVV -+CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1)); ++ // currently unimplemented ++ // Used for storing NULL. All other oop constants should be ++ // stored using routines that take a jobject. ++ void store_heap_oop_null(Address dst); + -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v0 , ( 0)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v1 , ( 1)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v2 , ( 2)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v3 , ( 3)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v4 , ( 4)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v5 , ( 5)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v6 , ( 6)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v7 , ( 7)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v8 , ( 8)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v9 , ( 9)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v10 , (10)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v11 , (11)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v12 , (12)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v13 , (13)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v14 , (14)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v15 , (15)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v16 , (16)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v17 , (17)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v18 , (18)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v19 , (19)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v20 , (20)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v21 , (21)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v22 , (22)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v23 , (23)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v24 , (24)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v25 , (25)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v26 , (26)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v27 , (27)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v28 , (28)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v29 , (29)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v30 , (30)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v31 , (31)); ++ // This dummy is to prevent a call to store_heap_oop from ++ // converting a zero (linke NULL) into a Register by giving ++ // the compiler two choices it can't resolve + ++ void store_heap_oop(Address dst, void* dummy); + -+// Need to know the total number of registers of all sorts for SharedInfo. -+// Define a class that exports it. -+class ConcreteRegisterImpl : public AbstractRegisterImpl { -+ public: -+ enum { -+ // A big enough number for C2: all the registers plus flags -+ // This number must be large enough to cover REG_COUNT (defined by c2) registers. -+ // There is no requirement that any ordering here matches any ordering c2 gives -+ // it's optoregs. ++ // Support for NULL-checks ++ // ++ // Generates code that causes a NULL OS exception if the content of reg is NULL. ++ // If the accessed location is M[reg + offset] and the offset is known, provide the ++ // offset. No explicit code generateion is needed if the offset is within a certain ++ // range (0 <= offset <= page_size). + -+ number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + -+ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers + -+ VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers) -+ }; ++ virtual void null_check(Register reg, int offset = -1); ++ static bool needs_explicit_null_check(intptr_t offset); ++ static bool uses_implicit_null_check(void* address); + -+ // added to make it compile -+ static const int max_gpr; -+ static const int max_fpr; -+ static const int max_vpr; -+}; ++ // idiv variant which deals with MINLONG as dividend and -1 as divisor ++ int corrected_idivl(Register result, Register rs1, Register rs2, ++ bool want_remainder); ++ int corrected_idivq(Register result, Register rs1, Register rs2, ++ bool want_remainder); + -+// A set of registers -+class RegSet { -+ uint32_t _bitset; ++ // interface method calling ++ void lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_tmp, ++ Label& no_such_interface, ++ bool return_method = true); + -+public: -+ RegSet(uint32_t bitset) : _bitset(bitset) { } ++ // virtual method calling ++ // n.n. x86 allows RegisterOrConstant for vtable_index ++ void lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result); + -+ RegSet() : _bitset(0) { } ++ // Form an addres from base + offset in Rd. Rd my or may not ++ // actually be used: you must use the Address that is returned. It ++ // is up to you to ensure that the shift provided mathces the size ++ // of your data. ++ Address form_address(Register Rd, Register base, long byte_offset); + -+ RegSet(Register r1) : _bitset(r1->bit()) { } ++ // allocation ++ void tlab_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register tmp1, // temp register ++ Register tmp2, // temp register ++ Label& slow_case, // continuation point of fast allocation fails ++ bool is_far = false ++ ); + -+ ~RegSet() {} ++ void eden_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register tmp, // temp register ++ Label& slow_case, // continuation point if fast allocation fails ++ bool is_far = false ++ ); + -+ RegSet operator+(const RegSet aSet) const { -+ RegSet result(_bitset | aSet._bitset); -+ return result; -+ } ++ // Test sub_klass against super_klass, with fast and slow paths. + -+ RegSet operator-(const RegSet aSet) const { -+ RegSet result(_bitset & ~aSet._bitset); -+ return result; -+ } ++ // The fast path produces a tri-state answer: yes / no / maybe-slow. ++ // One of the three labels can be NULL, meaning take the fall-through. ++ // If super_check_offset is -1, the value is loaded up from super_klass. ++ // No registers are killed, except tmp_reg ++ void check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register tmp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ Register super_check_offset = noreg); + -+ RegSet &operator+=(const RegSet aSet) { -+ *this = *this + aSet; -+ return *this; -+ } ++ // The reset of the type cehck; must be wired to a corresponding fast path. ++ // It does not repeat the fast path logic, so don't use it standalone. ++ // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable. ++ // Updates the sub's secondary super cache as necessary. ++ void check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register tmp1_reg, ++ Register tmp2_reg, ++ Label* L_success, ++ Label* L_failure); + -+ static RegSet of(Register r1) { -+ return RegSet(r1); -+ } ++ void check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register tmp_reg, ++ Label& L_success); + -+ static RegSet of(Register r1, Register r2) { -+ return of(r1) + r2; -+ } ++ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + -+ static RegSet of(Register r1, Register r2, Register r3) { -+ return of(r1, r2) + r3; -+ } ++ // only if +VerifyOops ++ void verify_oop(Register reg, const char* s = "broken oop"); ++ void verify_oop_addr(Address addr, const char* s = "broken oop addr"); + -+ static RegSet of(Register r1, Register r2, Register r3, Register r4) { -+ return of(r1, r2, r3) + r4; -+ } ++ void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {} ++ void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {} + -+ static RegSet range(Register start, Register end) { -+ uint32_t bits = ~0; -+ bits <<= start->encoding(); -+ bits <<= (31 - end->encoding()); -+ bits >>= (31 - end->encoding()); ++#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) ++#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + -+ return RegSet(bits); -+ } ++ // A more convenient access to fence for our purposes ++ // We used four bit to indicate the read and write bits in the predecessors and successors, ++ // and extended i for r, o for w if UseConservativeFence enabled. ++ enum Membar_mask_bits { ++ StoreStore = 0b0101, // (pred = ow + succ = ow) ++ LoadStore = 0b1001, // (pred = ir + succ = ow) ++ StoreLoad = 0b0110, // (pred = ow + succ = ir) ++ LoadLoad = 0b1010, // (pred = ir + succ = ir) ++ AnyAny = LoadStore | StoreLoad // (pred = iorw + succ = iorw) ++ }; + -+ uint32_t bits() const { return _bitset; } -+}; ++ void membar(uint32_t order_constraint); + -+#endif // CPU_RISCV_REGISTER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp -new file mode 100644 -index 000000000..f49fd6439 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp -@@ -0,0 +1,113 @@ -+/* -+ * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) { ++ predecessor = (order_constraint >> 2) & 0x3; ++ successor = order_constraint & 0x3; + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "code/relocInfo.hpp" -+#include "nativeInst_riscv.hpp" -+#include "oops/oop.inline.hpp" -+#include "runtime/safepoint.hpp" ++ // extend rw -> iorw: ++ // 01(w) -> 0101(ow) ++ // 10(r) -> 1010(ir) ++ // 11(rw)-> 1111(iorw) ++ if (UseConservativeFence) { ++ predecessor |= predecessor << 2; ++ successor |= successor << 2; ++ } ++ } + -+void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { -+ if (verify_only) { -+ return; ++ static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) { ++ return ((predecessor & 0x3) << 2) | (successor & 0x3); + } + -+ int bytes; ++ // prints msg, dumps registers and stops execution ++ void stop(const char* msg); + -+ switch(type()) { -+ case relocInfo::oop_type: { -+ oop_Relocation *reloc = (oop_Relocation *)this; -+ // in movoop when immediate == false -+ if (NativeInstruction::is_load_pc_relative_at(addr())) { -+ address constptr = (address)code()->oop_addr_at(reloc->oop_index()); -+ bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr); -+ assert(*(address*)constptr == x, "error in oop relocation"); -+ } else { -+ bytes = MacroAssembler::patch_oop(addr(), x); -+ } -+ break; -+ } -+ default: -+ bytes = MacroAssembler::pd_patch_instruction_size(addr(), x); -+ break; -+ } -+ ICache::invalidate_range(addr(), bytes); -+} ++ static void debug64(char* msg, int64_t pc, int64_t regs[]); + -+address Relocation::pd_call_destination(address orig_addr) { -+ assert(is_call(), "should be an address instruction here"); -+ if (NativeCall::is_call_at(addr())) { -+ address trampoline = nativeCall_at(addr())->get_trampoline(); -+ if (trampoline != NULL) { -+ return nativeCallTrampolineStub_at(trampoline)->destination(); -+ } ++ void unimplemented(const char* what = ""); ++ ++ void should_not_reach_here() { stop("should not reach here"); } ++ ++ static address target_addr_for_insn(address insn_addr); ++ ++ // Required platform-specific helpers for Label::patch_instructions. ++ // They _shadow_ the declarations in AbstractAssembler, which are undefined. ++ static int pd_patch_instruction_size(address branch, address target); ++ static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) { ++ pd_patch_instruction_size(branch, target); + } -+ if (orig_addr != NULL) { -+ // the extracted address from the instructions in address orig_addr -+ address new_addr = MacroAssembler::pd_call_destination(orig_addr); -+ // If call is branch to self, don't try to relocate it, just leave it -+ // as branch to self. This happens during code generation if the code -+ // buffer expands. It will be relocated to the trampoline above once -+ // code generation is complete. -+ new_addr = (new_addr == orig_addr) ? addr() : new_addr; -+ return new_addr; ++ static address pd_call_destination(address branch) { ++ return target_addr_for_insn(branch); + } -+ return MacroAssembler::pd_call_destination(addr()); -+} + -+void Relocation::pd_set_call_destination(address x) { -+ assert(is_call(), "should be an address instruction here"); -+ if (NativeCall::is_call_at(addr())) { -+ address trampoline = nativeCall_at(addr())->get_trampoline(); -+ if (trampoline != NULL) { -+ nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false); -+ return; -+ } -+ } -+ MacroAssembler::pd_patch_instruction_size(addr(), x); -+ address pd_call = pd_call_destination(addr()); -+ assert(pd_call == x, "fail in reloc"); -+} ++ static int patch_oop(address insn_addr, address o); ++ address emit_trampoline_stub(int insts_call_instruction_offset, address target); ++ void emit_static_call_stub(); + -+address* Relocation::pd_address_in_code() { -+ assert(NativeCall::is_load_pc_relative_at(addr()), "Not the expected instruction sequence!"); -+ return (address*)(MacroAssembler::target_addr_for_insn(addr())); -+} ++ // The following 4 methods return the offset of the appropriate move instruction + -+address Relocation::pd_get_address_from_code() { -+ return MacroAssembler::pd_call_destination(addr()); -+} ++ // Support for fast byte/short loading with zero extension (depending on particular CPU) ++ int load_unsigned_byte(Register dst, Address src); ++ int load_unsigned_short(Register dst, Address src); + -+void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { -+ if (NativeInstruction::maybe_cpool_ref(addr())) { -+ address old_addr = old_addr_for(addr(), src, dest); -+ MacroAssembler::pd_patch_instruction_size(addr(), MacroAssembler::target_addr_for_insn(old_addr)); -+ } -+} ++ // Support for fast byte/short loading with sign extension (depending on particular CPU) ++ int load_signed_byte(Register dst, Address src); ++ int load_signed_short(Register dst, Address src); + -+void metadata_Relocation::pd_fix_value(address x) { -+} -diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp -new file mode 100644 -index 000000000..c30150e0a ---- /dev/null -+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp -@@ -0,0 +1,45 @@ -+/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#ifndef CPU_RISCV_RELOCINFO_RISCV_HPP -+#define CPU_RISCV_RELOCINFO_RISCV_HPP -+ -+ // machine-dependent parts of class relocInfo -+ private: -+ enum { -+ // Relocations are byte-aligned. -+ offset_unit = 1, -+ // We don't use format(). -+ format_width = 0 -+ }; ++ // Load and store values by size and signed-ness ++ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); ++ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); + + public: ++ // Standard pseudoinstruction ++ void nop(); ++ void mv(Register Rd, Register Rs); ++ void notr(Register Rd, Register Rs); ++ void neg(Register Rd, Register Rs); ++ void negw(Register Rd, Register Rs); ++ void sext_w(Register Rd, Register Rs); ++ void zext_b(Register Rd, Register Rs); ++ void seqz(Register Rd, Register Rs); // set if = zero ++ void snez(Register Rd, Register Rs); // set if != zero ++ void sltz(Register Rd, Register Rs); // set if < zero ++ void sgtz(Register Rd, Register Rs); // set if > zero + -+ // This platform has no oops in the code that are not also -+ // listed in the oop section. -+ static bool mustIterateImmediateOopsInCode() { return false; } ++ // Float pseudoinstruction ++ void fmv_s(FloatRegister Rd, FloatRegister Rs); ++ void fabs_s(FloatRegister Rd, FloatRegister Rs); // single-precision absolute value ++ void fneg_s(FloatRegister Rd, FloatRegister Rs); + -+#endif // CPU_RISCV_RELOCINFO_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -new file mode 100644 -index 000000000..137e9b7c7 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -0,0 +1,10685 @@ -+// -+// Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. -+// Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+// Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. -+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+// -+// This code is free software; you can redistribute it and/or modify it -+// under the terms of the GNU General Public License version 2 only, as -+// published by the Free Software Foundation. -+// -+// This code is distributed in the hope that it will be useful, but WITHOUT -+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+// version 2 for more details (a copy is included in the LICENSE file that -+// accompanied this code). -+// -+// You should have received a copy of the GNU General Public License version -+// 2 along with this work; if not, write to the Free Software Foundation, -+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+// -+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+// or visit www.oracle.com if you need additional information or have any -+// questions. -+// -+// ++ // Double pseudoinstruction ++ void fmv_d(FloatRegister Rd, FloatRegister Rs); ++ void fabs_d(FloatRegister Rd, FloatRegister Rs); ++ void fneg_d(FloatRegister Rd, FloatRegister Rs); + -+// RISCV Architecture Description File ++ // Pseudoinstruction for control and status register ++ void rdinstret(Register Rd); // read instruction-retired counter ++ void rdcycle(Register Rd); // read cycle counter ++ void rdtime(Register Rd); // read time ++ void csrr(Register Rd, unsigned csr); // read csr ++ void csrw(unsigned csr, Register Rs); // write csr ++ void csrs(unsigned csr, Register Rs); // set bits in csr ++ void csrc(unsigned csr, Register Rs); // clear bits in csr ++ void csrwi(unsigned csr, unsigned imm); ++ void csrsi(unsigned csr, unsigned imm); ++ void csrci(unsigned csr, unsigned imm); ++ void frcsr(Register Rd); // read float-point csr ++ void fscsr(Register Rd, Register Rs); // swap float-point csr ++ void fscsr(Register Rs); // write float-point csr ++ void frrm(Register Rd); // read float-point rounding mode ++ void fsrm(Register Rd, Register Rs); // swap float-point rounding mode ++ void fsrm(Register Rs); // write float-point rounding mode ++ void fsrmi(Register Rd, unsigned imm); ++ void fsrmi(unsigned imm); ++ void frflags(Register Rd); // read float-point exception flags ++ void fsflags(Register Rd, Register Rs); // swap float-point exception flags ++ void fsflags(Register Rs); // write float-point exception flags ++ void fsflagsi(Register Rd, unsigned imm); ++ void fsflagsi(unsigned imm); + -+//----------REGISTER DEFINITION BLOCK------------------------------------------ -+// This information is used by the matcher and the register allocator to -+// describe individual registers and classes of registers within the target -+// archtecture. ++ void beqz(Register Rs, const address &dest); ++ void bnez(Register Rs, const address &dest); ++ void blez(Register Rs, const address &dest); ++ void bgez(Register Rs, const address &dest); ++ void bltz(Register Rs, const address &dest); ++ void bgtz(Register Rs, const address &dest); ++ void la(Register Rd, Label &label); ++ void la(Register Rd, const address &dest); ++ void la(Register Rd, const Address &adr); ++ //label ++ void beqz(Register Rs, Label &l, bool is_far = false); ++ void bnez(Register Rs, Label &l, bool is_far = false); ++ void blez(Register Rs, Label &l, bool is_far = false); ++ void bgez(Register Rs, Label &l, bool is_far = false); ++ void bltz(Register Rs, Label &l, bool is_far = false); ++ void bgtz(Register Rs, Label &l, bool is_far = false); ++ void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + -+register %{ -+//----------Architecture Description Register Definitions---------------------- -+// General Registers -+// "reg_def" name ( register save type, C convention save type, -+// ideal register type, encoding ); -+// Register Save Types: -+// -+// NS = No-Save: The register allocator assumes that these registers -+// can be used without saving upon entry to the method, & -+// that they do not need to be saved at call sites. -+// -+// SOC = Save-On-Call: The register allocator assumes that these registers -+// can be used without saving upon entry to the method, -+// but that they must be saved at call sites. -+// -+// SOE = Save-On-Entry: The register allocator assumes that these registers -+// must be saved before using them upon entry to the -+// method, but they do not need to be saved at call -+// sites. -+// -+// AS = Always-Save: The register allocator assumes that these registers -+// must be saved before using them upon entry to the -+// method, & that they must be saved at call sites. -+// -+// Ideal Register Type is used to determine how to save & restore a -+// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get -+// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. -+// -+// The encoding number is the actual bit-pattern placed into the opcodes. ++ void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } } ++ void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } } ++ void push_reg(Register Rs); ++ void pop_reg(Register Rd); ++ int push_reg(unsigned int bitset, Register stack); ++ int pop_reg(unsigned int bitset, Register stack); ++ void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } ++ void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } ++#ifdef COMPILER2 ++ void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); } ++ void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); } ++#endif // COMPILER2 + -+// We must define the 64 bit int registers in two 32 bit halves, the -+// real lower register and a virtual upper half register. upper halves -+// are used by the register allocator but are not actually supplied as -+// operands to memory ops. -+// -+// follow the C1 compiler in making registers -+// -+// x7, x9-x17, x28-x31 volatile (caller save) -+// x0-x4, x8, x27 system (no save, no allocate) -+// x5-x6 non-allocatable (so we can use them as temporary regs) ++ // Push and pop everything that might be clobbered by a native ++ // runtime call except t0 and t1. (They are always ++ // temporary registers, so we don't have to protect them.) ++ // Additional registers can be excluded in a passed RegSet. ++ void push_call_clobbered_registers_except(RegSet exclude); ++ void pop_call_clobbered_registers_except(RegSet exclude); + -+// -+// as regards Java usage. we don't use any callee save registers -+// because this makes it difficult to de-optimise a frame (see comment -+// in x86 implementation of Deoptimization::unwind_callee_save_values) -+// ++ void push_call_clobbered_registers() { ++ push_call_clobbered_registers_except(RegSet()); ++ } ++ void pop_call_clobbered_registers() { ++ pop_call_clobbered_registers_except(RegSet()); ++ } + -+// General Registers ++ void pusha(); ++ void popa(); ++ void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0); ++ void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0); + -+reg_def R0 ( NS, NS, Op_RegI, 0, x0->as_VMReg() ); // zr -+reg_def R0_H ( NS, NS, Op_RegI, 0, x0->as_VMReg()->next() ); -+reg_def R1 ( SOC, SOC, Op_RegI, 1, x1->as_VMReg() ); // ra -+reg_def R1_H ( SOC, SOC, Op_RegI, 1, x1->as_VMReg()->next() ); -+reg_def R2 ( NS, SOE, Op_RegI, 2, x2->as_VMReg() ); // sp -+reg_def R2_H ( NS, SOE, Op_RegI, 2, x2->as_VMReg()->next() ); -+reg_def R3 ( NS, NS, Op_RegI, 3, x3->as_VMReg() ); // gp -+reg_def R3_H ( NS, NS, Op_RegI, 3, x3->as_VMReg()->next() ); -+reg_def R4 ( NS, NS, Op_RegI, 4, x4->as_VMReg() ); // tp -+reg_def R4_H ( NS, NS, Op_RegI, 4, x4->as_VMReg()->next() ); -+reg_def R7 ( SOC, SOC, Op_RegI, 7, x7->as_VMReg() ); -+reg_def R7_H ( SOC, SOC, Op_RegI, 7, x7->as_VMReg()->next() ); -+reg_def R8 ( NS, SOE, Op_RegI, 8, x8->as_VMReg() ); // fp -+reg_def R8_H ( NS, SOE, Op_RegI, 8, x8->as_VMReg()->next() ); -+reg_def R9 ( SOC, SOE, Op_RegI, 9, x9->as_VMReg() ); -+reg_def R9_H ( SOC, SOE, Op_RegI, 9, x9->as_VMReg()->next() ); -+reg_def R10 ( SOC, SOC, Op_RegI, 10, x10->as_VMReg() ); -+reg_def R10_H ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next()); -+reg_def R11 ( SOC, SOC, Op_RegI, 11, x11->as_VMReg() ); -+reg_def R11_H ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next()); -+reg_def R12 ( SOC, SOC, Op_RegI, 12, x12->as_VMReg() ); -+reg_def R12_H ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next()); -+reg_def R13 ( SOC, SOC, Op_RegI, 13, x13->as_VMReg() ); -+reg_def R13_H ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next()); -+reg_def R14 ( SOC, SOC, Op_RegI, 14, x14->as_VMReg() ); -+reg_def R14_H ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next()); -+reg_def R15 ( SOC, SOC, Op_RegI, 15, x15->as_VMReg() ); -+reg_def R15_H ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next()); -+reg_def R16 ( SOC, SOC, Op_RegI, 16, x16->as_VMReg() ); -+reg_def R16_H ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next()); -+reg_def R17 ( SOC, SOC, Op_RegI, 17, x17->as_VMReg() ); -+reg_def R17_H ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next()); -+reg_def R18 ( SOC, SOE, Op_RegI, 18, x18->as_VMReg() ); -+reg_def R18_H ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next()); -+reg_def R19 ( SOC, SOE, Op_RegI, 19, x19->as_VMReg() ); -+reg_def R19_H ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next()); -+reg_def R20 ( SOC, SOE, Op_RegI, 20, x20->as_VMReg() ); // caller esp -+reg_def R20_H ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next()); -+reg_def R21 ( SOC, SOE, Op_RegI, 21, x21->as_VMReg() ); -+reg_def R21_H ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next()); -+reg_def R22 ( SOC, SOE, Op_RegI, 22, x22->as_VMReg() ); -+reg_def R22_H ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next()); -+reg_def R23 ( NS, SOE, Op_RegI, 23, x23->as_VMReg() ); // java thread -+reg_def R23_H ( NS, SOE, Op_RegI, 23, x23->as_VMReg()->next()); -+reg_def R24 ( SOC, SOE, Op_RegI, 24, x24->as_VMReg() ); -+reg_def R24_H ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next()); -+reg_def R25 ( SOC, SOE, Op_RegI, 25, x25->as_VMReg() ); -+reg_def R25_H ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next()); -+reg_def R26 ( SOC, SOE, Op_RegI, 26, x26->as_VMReg() ); -+reg_def R26_H ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next()); -+reg_def R27 ( SOC, SOE, Op_RegI, 27, x27->as_VMReg() ); // heapbase -+reg_def R27_H ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next()); -+reg_def R28 ( SOC, SOC, Op_RegI, 28, x28->as_VMReg() ); -+reg_def R28_H ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next()); -+reg_def R29 ( SOC, SOC, Op_RegI, 29, x29->as_VMReg() ); -+reg_def R29_H ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next()); -+reg_def R30 ( SOC, SOC, Op_RegI, 30, x30->as_VMReg() ); -+reg_def R30_H ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next()); -+reg_def R31 ( SOC, SOC, Op_RegI, 31, x31->as_VMReg() ); -+reg_def R31_H ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next()); ++ // if heap base register is used - reinit it with the correct value ++ void reinit_heapbase(); + -+// ---------------------------- -+// Float/Double Registers -+// ---------------------------- ++ void bind(Label& L) { ++ Assembler::bind(L); ++ // fences across basic blocks should not be merged ++ code()->clear_last_insn(); ++ } + -+// Double Registers ++ // mv ++ template::value)> ++ inline void mv(Register Rd, T o) { ++ li(Rd, (int64_t)o); ++ } + -+// The rules of ADL require that double registers be defined in pairs. -+// Each pair must be two 32-bit values, but not necessarily a pair of -+// single float registers. In each pair, ADLC-assigned register numbers -+// must be adjacent, with the lower number even. Finally, when the -+// CPU stores such a register pair to memory, the word associated with -+// the lower ADLC-assigned number must be stored to the lower address. ++ inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } + -+// RISCV has 32 floating-point registers. Each can store a single -+// or double precision floating-point value. ++ void mv(Register Rd, Address dest); ++ void mv(Register Rd, address addr); ++ void mv(Register Rd, RegisterOrConstant src); + -+// for Java use float registers f0-f31 are always save on call whereas -+// the platform ABI treats f8-f9 and f18-f27 as callee save). Other -+// float registers are SOC as per the platform spec ++ // logic ++ void andrw(Register Rd, Register Rs1, Register Rs2); ++ void orrw(Register Rd, Register Rs1, Register Rs2); ++ void xorrw(Register Rd, Register Rs1, Register Rs2); + -+reg_def F0 ( SOC, SOC, Op_RegF, 0, f0->as_VMReg() ); -+reg_def F0_H ( SOC, SOC, Op_RegF, 0, f0->as_VMReg()->next() ); -+reg_def F1 ( SOC, SOC, Op_RegF, 1, f1->as_VMReg() ); -+reg_def F1_H ( SOC, SOC, Op_RegF, 1, f1->as_VMReg()->next() ); -+reg_def F2 ( SOC, SOC, Op_RegF, 2, f2->as_VMReg() ); -+reg_def F2_H ( SOC, SOC, Op_RegF, 2, f2->as_VMReg()->next() ); -+reg_def F3 ( SOC, SOC, Op_RegF, 3, f3->as_VMReg() ); -+reg_def F3_H ( SOC, SOC, Op_RegF, 3, f3->as_VMReg()->next() ); -+reg_def F4 ( SOC, SOC, Op_RegF, 4, f4->as_VMReg() ); -+reg_def F4_H ( SOC, SOC, Op_RegF, 4, f4->as_VMReg()->next() ); -+reg_def F5 ( SOC, SOC, Op_RegF, 5, f5->as_VMReg() ); -+reg_def F5_H ( SOC, SOC, Op_RegF, 5, f5->as_VMReg()->next() ); -+reg_def F6 ( SOC, SOC, Op_RegF, 6, f6->as_VMReg() ); -+reg_def F6_H ( SOC, SOC, Op_RegF, 6, f6->as_VMReg()->next() ); -+reg_def F7 ( SOC, SOC, Op_RegF, 7, f7->as_VMReg() ); -+reg_def F7_H ( SOC, SOC, Op_RegF, 7, f7->as_VMReg()->next() ); -+reg_def F8 ( SOC, SOE, Op_RegF, 8, f8->as_VMReg() ); -+reg_def F8_H ( SOC, SOE, Op_RegF, 8, f8->as_VMReg()->next() ); -+reg_def F9 ( SOC, SOE, Op_RegF, 9, f9->as_VMReg() ); -+reg_def F9_H ( SOC, SOE, Op_RegF, 9, f9->as_VMReg()->next() ); -+reg_def F10 ( SOC, SOC, Op_RegF, 10, f10->as_VMReg() ); -+reg_def F10_H ( SOC, SOC, Op_RegF, 10, f10->as_VMReg()->next() ); -+reg_def F11 ( SOC, SOC, Op_RegF, 11, f11->as_VMReg() ); -+reg_def F11_H ( SOC, SOC, Op_RegF, 11, f11->as_VMReg()->next() ); -+reg_def F12 ( SOC, SOC, Op_RegF, 12, f12->as_VMReg() ); -+reg_def F12_H ( SOC, SOC, Op_RegF, 12, f12->as_VMReg()->next() ); -+reg_def F13 ( SOC, SOC, Op_RegF, 13, f13->as_VMReg() ); -+reg_def F13_H ( SOC, SOC, Op_RegF, 13, f13->as_VMReg()->next() ); -+reg_def F14 ( SOC, SOC, Op_RegF, 14, f14->as_VMReg() ); -+reg_def F14_H ( SOC, SOC, Op_RegF, 14, f14->as_VMReg()->next() ); -+reg_def F15 ( SOC, SOC, Op_RegF, 15, f15->as_VMReg() ); -+reg_def F15_H ( SOC, SOC, Op_RegF, 15, f15->as_VMReg()->next() ); -+reg_def F16 ( SOC, SOC, Op_RegF, 16, f16->as_VMReg() ); -+reg_def F16_H ( SOC, SOC, Op_RegF, 16, f16->as_VMReg()->next() ); -+reg_def F17 ( SOC, SOC, Op_RegF, 17, f17->as_VMReg() ); -+reg_def F17_H ( SOC, SOC, Op_RegF, 17, f17->as_VMReg()->next() ); -+reg_def F18 ( SOC, SOE, Op_RegF, 18, f18->as_VMReg() ); -+reg_def F18_H ( SOC, SOE, Op_RegF, 18, f18->as_VMReg()->next() ); -+reg_def F19 ( SOC, SOE, Op_RegF, 19, f19->as_VMReg() ); -+reg_def F19_H ( SOC, SOE, Op_RegF, 19, f19->as_VMReg()->next() ); -+reg_def F20 ( SOC, SOE, Op_RegF, 20, f20->as_VMReg() ); -+reg_def F20_H ( SOC, SOE, Op_RegF, 20, f20->as_VMReg()->next() ); -+reg_def F21 ( SOC, SOE, Op_RegF, 21, f21->as_VMReg() ); -+reg_def F21_H ( SOC, SOE, Op_RegF, 21, f21->as_VMReg()->next() ); -+reg_def F22 ( SOC, SOE, Op_RegF, 22, f22->as_VMReg() ); -+reg_def F22_H ( SOC, SOE, Op_RegF, 22, f22->as_VMReg()->next() ); -+reg_def F23 ( SOC, SOE, Op_RegF, 23, f23->as_VMReg() ); -+reg_def F23_H ( SOC, SOE, Op_RegF, 23, f23->as_VMReg()->next() ); -+reg_def F24 ( SOC, SOE, Op_RegF, 24, f24->as_VMReg() ); -+reg_def F24_H ( SOC, SOE, Op_RegF, 24, f24->as_VMReg()->next() ); -+reg_def F25 ( SOC, SOE, Op_RegF, 25, f25->as_VMReg() ); -+reg_def F25_H ( SOC, SOE, Op_RegF, 25, f25->as_VMReg()->next() ); -+reg_def F26 ( SOC, SOE, Op_RegF, 26, f26->as_VMReg() ); -+reg_def F26_H ( SOC, SOE, Op_RegF, 26, f26->as_VMReg()->next() ); -+reg_def F27 ( SOC, SOE, Op_RegF, 27, f27->as_VMReg() ); -+reg_def F27_H ( SOC, SOE, Op_RegF, 27, f27->as_VMReg()->next() ); -+reg_def F28 ( SOC, SOC, Op_RegF, 28, f28->as_VMReg() ); -+reg_def F28_H ( SOC, SOC, Op_RegF, 28, f28->as_VMReg()->next() ); -+reg_def F29 ( SOC, SOC, Op_RegF, 29, f29->as_VMReg() ); -+reg_def F29_H ( SOC, SOC, Op_RegF, 29, f29->as_VMReg()->next() ); -+reg_def F30 ( SOC, SOC, Op_RegF, 30, f30->as_VMReg() ); -+reg_def F30_H ( SOC, SOC, Op_RegF, 30, f30->as_VMReg()->next() ); -+reg_def F31 ( SOC, SOC, Op_RegF, 31, f31->as_VMReg() ); -+reg_def F31_H ( SOC, SOC, Op_RegF, 31, f31->as_VMReg()->next() ); ++ // revb ++ void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend ++ void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend ++ void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend ++ void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend ++ void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower ++ void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword ++ void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word ++ void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword + -+// ---------------------------- -+// Vector Registers -+// ---------------------------- ++ void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); ++ void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0); ++ void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1); + -+// For RVV vector registers, we simply extend vector register size to 4 -+// 'logical' slots. This is nominally 128 bits but it actually covers -+// all possible 'physical' RVV vector register lengths from 128 ~ 1024 -+// bits. The 'physical' RVV vector register length is detected during -+// startup, so the register allocator is able to identify the correct -+// number of bytes needed for an RVV spill/unspill. -+// for Java use vector registers v0-v31 are always save on call just -+// as the platform ABI treats v0-v31 as caller save. ++ void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail); ++ void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail); ++ void cmpxchg(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, bool result_as_bool = false); ++ void cmpxchg_weak(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result); ++ void cmpxchg_narrow_value_helper(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Register tmp1, Register tmp2, Register tmp3); ++ void cmpxchg_narrow_value(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, bool result_as_bool, ++ Register tmp1, Register tmp2, Register tmp3); ++ void weak_cmpxchg_narrow_value(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, ++ Register tmp1, Register tmp2, Register tmp3); + -+reg_def V0 ( SOC, SOC, Op_VecA, 0, v0->as_VMReg() ); -+reg_def V0_H ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next() ); -+reg_def V0_J ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(2) ); -+reg_def V0_K ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(3) ); ++ void atomic_add(Register prev, RegisterOrConstant incr, Register addr); ++ void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); ++ void atomic_addal(Register prev, RegisterOrConstant incr, Register addr); ++ void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr); + -+reg_def V1 ( SOC, SOC, Op_VecA, 1, v1->as_VMReg() ); -+reg_def V1_H ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next() ); -+reg_def V1_J ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(2) ); -+reg_def V1_K ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(3) ); ++ void atomic_xchg(Register prev, Register newv, Register addr); ++ void atomic_xchgw(Register prev, Register newv, Register addr); ++ void atomic_xchgal(Register prev, Register newv, Register addr); ++ void atomic_xchgalw(Register prev, Register newv, Register addr); ++ void atomic_xchgwu(Register prev, Register newv, Register addr); ++ void atomic_xchgalwu(Register prev, Register newv, Register addr); + -+reg_def V2 ( SOC, SOC, Op_VecA, 2, v2->as_VMReg() ); -+reg_def V2_H ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next() ); -+reg_def V2_J ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(2) ); -+reg_def V2_K ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(3) ); ++ static bool far_branches() { ++ return ReservedCodeCacheSize > branch_range; ++ } + -+reg_def V3 ( SOC, SOC, Op_VecA, 3, v3->as_VMReg() ); -+reg_def V3_H ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next() ); -+reg_def V3_J ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(2) ); -+reg_def V3_K ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(3) ); ++ // Jumps that can reach anywhere in the code cache. ++ // Trashes tmp. ++ void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); ++ void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); + -+reg_def V4 ( SOC, SOC, Op_VecA, 4, v4->as_VMReg() ); -+reg_def V4_H ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next() ); -+reg_def V4_J ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(2) ); -+reg_def V4_K ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(3) ); ++ static int far_branch_size() { ++ if (far_branches()) { ++ return 2 * 4; // auipc + jalr, see far_call() & far_jump() ++ } else { ++ return 4; ++ } ++ } + -+reg_def V5 ( SOC, SOC, Op_VecA, 5, v5->as_VMReg() ); -+reg_def V5_H ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next() ); -+reg_def V5_J ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(2) ); -+reg_def V5_K ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(3) ); ++ void load_byte_map_base(Register reg); + -+reg_def V6 ( SOC, SOC, Op_VecA, 6, v6->as_VMReg() ); -+reg_def V6_H ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next() ); -+reg_def V6_J ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(2) ); -+reg_def V6_K ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(3) ); ++ void bang_stack_with_offset(int offset) { ++ // stack grows down, caller passes positive offset ++ assert(offset > 0, "must bang with negative offset"); ++ sub(t0, sp, offset); ++ sd(zr, Address(t0)); ++ } + -+reg_def V7 ( SOC, SOC, Op_VecA, 7, v7->as_VMReg() ); -+reg_def V7_H ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next() ); -+reg_def V7_J ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(2) ); -+reg_def V7_K ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(3) ); ++ void la_patchable(Register reg1, const Address &dest, int32_t &offset); + -+reg_def V8 ( SOC, SOC, Op_VecA, 8, v8->as_VMReg() ); -+reg_def V8_H ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next() ); -+reg_def V8_J ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(2) ); -+reg_def V8_K ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(3) ); ++ virtual void _call_Unimplemented(address call_site) { ++ mv(t1, call_site); ++ } + -+reg_def V9 ( SOC, SOC, Op_VecA, 9, v9->as_VMReg() ); -+reg_def V9_H ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next() ); -+reg_def V9_J ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(2) ); -+reg_def V9_K ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(3) ); ++ #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) + -+reg_def V10 ( SOC, SOC, Op_VecA, 10, v10->as_VMReg() ); -+reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next() ); -+reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) ); -+reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) ); ++ // Frame creation and destruction shared between JITs. ++ void build_frame(int framesize); ++ void remove_frame(int framesize); + -+reg_def V11 ( SOC, SOC, Op_VecA, 11, v11->as_VMReg() ); -+reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next() ); -+reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) ); -+reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) ); ++ void reserved_stack_check(); + -+reg_def V12 ( SOC, SOC, Op_VecA, 12, v12->as_VMReg() ); -+reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next() ); -+reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) ); -+reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) ); ++ void get_polling_page(Register dest, relocInfo::relocType rtype); ++ address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); + -+reg_def V13 ( SOC, SOC, Op_VecA, 13, v13->as_VMReg() ); -+reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next() ); -+reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) ); -+reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) ); ++ address trampoline_call(Address entry, CodeBuffer* cbuf = NULL); ++ address ic_call(address entry, jint method_index = 0); + -+reg_def V14 ( SOC, SOC, Op_VecA, 14, v14->as_VMReg() ); -+reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next() ); -+reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) ); -+reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) ); ++ void add_memory_int64(const Address dst, int64_t imm); ++ void add_memory_int32(const Address dst, int32_t imm); + -+reg_def V15 ( SOC, SOC, Op_VecA, 15, v15->as_VMReg() ); -+reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next() ); -+reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) ); -+reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) ); ++ void cmpptr(Register src1, Address src2, Label& equal); + -+reg_def V16 ( SOC, SOC, Op_VecA, 16, v16->as_VMReg() ); -+reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next() ); -+reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) ); -+reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) ); ++ void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL); ++ void load_method_holder_cld(Register result, Register method); ++ void load_method_holder(Register holder, Register method); + -+reg_def V17 ( SOC, SOC, Op_VecA, 17, v17->as_VMReg() ); -+reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next() ); -+reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) ); -+reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) ); ++ void compute_index(Register str1, Register trailing_zeros, Register match_mask, ++ Register result, Register char_tmp, Register tmp, ++ bool haystack_isL); ++ void compute_match_mask(Register src, Register pattern, Register match_mask, ++ Register mask1, Register mask2); + -+reg_def V18 ( SOC, SOC, Op_VecA, 18, v18->as_VMReg() ); -+reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next() ); -+reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) ); -+reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) ); ++#ifdef COMPILER2 ++ void mul_add(Register out, Register in, Register offset, ++ Register len, Register k, Register tmp); ++ void cad(Register dst, Register src1, Register src2, Register carry); ++ void cadc(Register dst, Register src1, Register src2, Register carry); ++ void adc(Register dst, Register src1, Register src2, Register carry); ++ void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, ++ Register src1, Register src2, Register carry); ++ void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, ++ Register y, Register y_idx, Register z, ++ Register carry, Register product, ++ Register idx, Register kdx); ++ void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, ++ Register y, Register y_idx, Register z, ++ Register carry, Register product, ++ Register idx, Register kdx); ++ void multiply_128_x_128_loop(Register y, Register z, ++ Register carry, Register carry2, ++ Register idx, Register jdx, ++ Register yz_idx1, Register yz_idx2, ++ Register tmp, Register tmp3, Register tmp4, ++ Register tmp6, Register product_hi); ++ void multiply_to_len(Register x, Register xlen, Register y, Register ylen, ++ Register z, Register zlen, ++ Register tmp1, Register tmp2, Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, Register product_hi); ++#endif + -+reg_def V19 ( SOC, SOC, Op_VecA, 19, v19->as_VMReg() ); -+reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next() ); -+reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) ); -+reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) ); ++ void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); ++ void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); + -+reg_def V20 ( SOC, SOC, Op_VecA, 20, v20->as_VMReg() ); -+reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next() ); -+reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) ); -+reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) ); ++ void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1); + -+reg_def V21 ( SOC, SOC, Op_VecA, 21, v21->as_VMReg() ); -+reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next() ); -+reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) ); -+reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) ); ++ void zero_words(Register base, u_int64_t cnt); ++ address zero_words(Register ptr, Register cnt); ++ void fill_words(Register base, Register cnt, Register value); ++ void zero_memory(Register addr, Register len, Register tmp); + -+reg_def V22 ( SOC, SOC, Op_VecA, 22, v22->as_VMReg() ); -+reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next() ); -+reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) ); -+reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) ); ++ // shift left by shamt and add ++ void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt); + -+reg_def V23 ( SOC, SOC, Op_VecA, 23, v23->as_VMReg() ); -+reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next() ); -+reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) ); -+reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) ); ++ // Here the float instructions with safe deal with some exceptions. ++ // e.g. convert from NaN, +Inf, -Inf to int, float, double ++ // will trigger exception, we need to deal with these situations ++ // to get correct results. ++ void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0); ++ void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0); ++ void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0); ++ void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0); + -+reg_def V24 ( SOC, SOC, Op_VecA, 24, v24->as_VMReg() ); -+reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next() ); -+reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) ); -+reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) ); ++ // vector load/store unit-stride instructions ++ void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { ++ switch (sew) { ++ case Assembler::e64: ++ vle64_v(vd, base, vm); ++ break; ++ case Assembler::e32: ++ vle32_v(vd, base, vm); ++ break; ++ case Assembler::e16: ++ vle16_v(vd, base, vm); ++ break; ++ case Assembler::e8: // fall through ++ default: ++ vle8_v(vd, base, vm); ++ break; ++ } ++ } + -+reg_def V25 ( SOC, SOC, Op_VecA, 25, v25->as_VMReg() ); -+reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next() ); -+reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) ); -+reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) ); ++ void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { ++ switch (sew) { ++ case Assembler::e64: ++ vse64_v(store_data, base, vm); ++ break; ++ case Assembler::e32: ++ vse32_v(store_data, base, vm); ++ break; ++ case Assembler::e16: ++ vse16_v(store_data, base, vm); ++ break; ++ case Assembler::e8: // fall through ++ default: ++ vse8_v(store_data, base, vm); ++ break; ++ } ++ } + -+reg_def V26 ( SOC, SOC, Op_VecA, 26, v26->as_VMReg() ); -+reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next() ); -+reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) ); -+reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) ); ++ static const int zero_words_block_size; + -+reg_def V27 ( SOC, SOC, Op_VecA, 27, v27->as_VMReg() ); -+reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next() ); -+reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) ); -+reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) ); ++ void cast_primitive_type(BasicType type, Register Rt) { ++ switch (type) { ++ case T_BOOLEAN: ++ sltu(Rt, zr, Rt); ++ break; ++ case T_CHAR : ++ zero_extend(Rt, Rt, 16); ++ break; ++ case T_BYTE : ++ sign_extend(Rt, Rt, 8); ++ break; ++ case T_SHORT : ++ sign_extend(Rt, Rt, 16); ++ break; ++ case T_INT : ++ addw(Rt, Rt, zr); ++ break; ++ case T_LONG : /* nothing to do */ break; ++ case T_VOID : /* nothing to do */ break; ++ case T_FLOAT : /* nothing to do */ break; ++ case T_DOUBLE : /* nothing to do */ break; ++ default: ShouldNotReachHere(); ++ } ++ } + -+reg_def V28 ( SOC, SOC, Op_VecA, 28, v28->as_VMReg() ); -+reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next() ); -+reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) ); -+reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) ); ++ // float cmp with unordered_result ++ void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); ++ void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); + -+reg_def V29 ( SOC, SOC, Op_VecA, 29, v29->as_VMReg() ); -+reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next() ); -+reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) ); -+reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) ); ++ // Zero/Sign-extend ++ void zero_extend(Register dst, Register src, int bits); ++ void sign_extend(Register dst, Register src, int bits); + -+reg_def V30 ( SOC, SOC, Op_VecA, 30, v30->as_VMReg() ); -+reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next() ); -+reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) ); -+reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) ); ++ // compare src1 and src2 and get -1/0/1 in dst. ++ // if [src1 > src2], dst = 1; ++ // if [src1 == src2], dst = 0; ++ // if [src1 < src2], dst = -1; ++ void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0); + -+reg_def V31 ( SOC, SOC, Op_VecA, 31, v31->as_VMReg() ); -+reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next() ); -+reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) ); -+reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) ); ++ int push_fp(unsigned int bitset, Register stack); ++ int pop_fp(unsigned int bitset, Register stack); + -+// ---------------------------- -+// Special Registers -+// ---------------------------- ++ int push_vp(unsigned int bitset, Register stack); ++ int pop_vp(unsigned int bitset, Register stack); + -+// On riscv, the physical flag register is missing, so we use t1 instead, -+// to bridge the RegFlag semantics in share/opto ++ // vext ++ void vmnot_m(VectorRegister vd, VectorRegister vs); ++ void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); ++ void vfneg_v(VectorRegister vd, VectorRegister vs); + -+reg_def RFLAGS (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg() ); ++private: + -+// Specify priority of register selection within phases of register -+// allocation. Highest priority is first. A useful heuristic is to -+// give registers a low priority when they are required by machine -+// instructions, like EAX and EDX on I486, and choose no-save registers -+// before save-on-call, & save-on-call before save-on-entry. Registers -+// which participate in fixed calling sequences should come last. -+// Registers which are used as pairs must fall on an even boundary. ++#ifdef ASSERT ++ // Template short-hand support to clean-up after a failed call to trampoline ++ // call generation (see trampoline_call() below), when a set of Labels must ++ // be reset (before returning). ++ template ++ void reset_labels(Label& lbl, More&... more) { ++ lbl.reset(); reset_labels(more...); ++ } ++ template ++ void reset_labels(Label& lbl) { ++ lbl.reset(); ++ } ++#endif ++ void repne_scan(Register addr, Register value, Register count, Register tmp); + -+alloc_class chunk0( -+ // volatiles -+ R7, R7_H, -+ R28, R28_H, -+ R29, R29_H, -+ R30, R30_H, -+ R31, R31_H, ++ // Return true if an address is within the 48-bit RISCV64 address space. ++ bool is_valid_riscv64_address(address addr) { ++ return ((uintptr_t)addr >> 48) == 0; ++ } + -+ // arg registers -+ R10, R10_H, -+ R11, R11_H, -+ R12, R12_H, -+ R13, R13_H, -+ R14, R14_H, -+ R15, R15_H, -+ R16, R16_H, -+ R17, R17_H, ++ void ld_constant(Register dest, const Address &const_addr) { ++ if (NearCpool) { ++ ld(dest, const_addr); ++ } else { ++ int32_t offset = 0; ++ la_patchable(dest, InternalAddress(const_addr.target()), offset); ++ ld(dest, Address(dest, offset)); ++ } ++ } + -+ // non-volatiles -+ R9, R9_H, -+ R18, R18_H, -+ R19, R19_H, -+ R20, R20_H, -+ R21, R21_H, -+ R22, R22_H, -+ R24, R24_H, -+ R25, R25_H, -+ R26, R26_H, ++ int bitset_to_regs(unsigned int bitset, unsigned char* regs); ++ Address add_memory_helper(const Address dst); + -+ // non-allocatable registers -+ R23, R23_H, // java thread -+ R27, R27_H, // heapbase -+ R4, R4_H, // thread -+ R8, R8_H, // fp -+ R0, R0_H, // zero -+ R1, R1_H, // ra -+ R2, R2_H, // sp -+ R3, R3_H, // gp -+); ++ void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); ++ void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); + -+alloc_class chunk1( ++ // Check the current thread doesn't need a cross modify fence. ++ void verify_cross_modify_fence_not_required() PRODUCT_RETURN; ++}; + -+ // no save -+ F0, F0_H, -+ F1, F1_H, -+ F2, F2_H, -+ F3, F3_H, -+ F4, F4_H, -+ F5, F5_H, -+ F6, F6_H, -+ F7, F7_H, -+ F28, F28_H, -+ F29, F29_H, -+ F30, F30_H, -+ F31, F31_H, ++#ifdef ASSERT ++inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } ++#endif + -+ // arg registers -+ F10, F10_H, -+ F11, F11_H, -+ F12, F12_H, -+ F13, F13_H, -+ F14, F14_H, -+ F15, F15_H, -+ F16, F16_H, -+ F17, F17_H, ++/** ++ * class SkipIfEqual: ++ * ++ * Instantiating this class will result in assembly code being output that will ++ * jump around any code emitted between the creation of the instance and it's ++ * automatic destruction at the end of a scope block, depending on the value of ++ * the flag passed to the constructor, which will be checked at run-time. ++ */ ++class SkipIfEqual { ++ private: ++ MacroAssembler* _masm; ++ Label _label; + -+ // non-volatiles -+ F8, F8_H, -+ F9, F9_H, -+ F18, F18_H, -+ F19, F19_H, -+ F20, F20_H, -+ F21, F21_H, -+ F22, F22_H, -+ F23, F23_H, -+ F24, F24_H, -+ F25, F25_H, -+ F26, F26_H, -+ F27, F27_H, -+); ++ public: ++ SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); ++ ~SkipIfEqual(); ++}; + -+alloc_class chunk2( -+ V0, V0_H, V0_J, V0_K, -+ V1, V1_H, V1_J, V1_K, -+ V2, V2_H, V2_J, V2_K, -+ V3, V3_H, V3_J, V3_K, -+ V4, V4_H, V4_J, V4_K, -+ V5, V5_H, V5_J, V5_K, -+ V6, V6_H, V6_J, V6_K, -+ V7, V7_H, V7_J, V7_K, -+ V8, V8_H, V8_J, V8_K, -+ V9, V9_H, V9_J, V9_K, -+ V10, V10_H, V10_J, V10_K, -+ V11, V11_H, V11_J, V11_K, -+ V12, V12_H, V12_J, V12_K, -+ V13, V13_H, V13_J, V13_K, -+ V14, V14_H, V14_J, V14_K, -+ V15, V15_H, V15_J, V15_K, -+ V16, V16_H, V16_J, V16_K, -+ V17, V17_H, V17_J, V17_K, -+ V18, V18_H, V18_J, V18_K, -+ V19, V19_H, V19_J, V19_K, -+ V20, V20_H, V20_J, V20_K, -+ V21, V21_H, V21_J, V21_K, -+ V22, V22_H, V22_J, V22_K, -+ V23, V23_H, V23_J, V23_K, -+ V24, V24_H, V24_J, V24_K, -+ V25, V25_H, V25_J, V25_K, -+ V26, V26_H, V26_J, V26_K, -+ V27, V27_H, V27_J, V27_K, -+ V28, V28_H, V28_J, V28_K, -+ V29, V29_H, V29_J, V29_K, -+ V30, V30_H, V30_J, V30_K, -+ V31, V31_H, V31_J, V31_K, -+); ++#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp +new file mode 100644 +index 00000000000..ef968ccd96d +--- /dev/null ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+alloc_class chunk3(RFLAGS); ++#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP ++#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP + -+//----------Architecture Description Register Classes-------------------------- -+// Several register classes are automatically defined based upon information in -+// this architecture description. -+// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) -+// 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) -+// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) -+// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) -+// ++// Still empty. + -+// Class for all 32 bit general purpose registers -+reg_class all_reg32( -+ R0, -+ R1, -+ R2, -+ R3, -+ R4, -+ R7, -+ R8, -+ R9, -+ R10, -+ R11, -+ R12, -+ R13, -+ R14, -+ R15, -+ R16, -+ R17, -+ R18, -+ R19, -+ R20, -+ R21, -+ R22, -+ R23, -+ R24, -+ R25, -+ R26, -+ R27, -+ R28, -+ R29, -+ R30, -+ R31 -+); ++#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP +diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp +new file mode 100644 +index 00000000000..23a75d20502 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp +@@ -0,0 +1,169 @@ ++/* ++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+// Class for any 32 bit integer registers (excluding zr) -+reg_class any_reg32 %{ -+ return _ANY_REG32_mask; -+%} ++#ifndef CPU_RISCV_MATCHER_RISCV_HPP ++#define CPU_RISCV_MATCHER_RISCV_HPP + -+// Singleton class for R10 int register -+reg_class int_r10_reg(R10); ++ // Defined within class Matcher + -+// Singleton class for R12 int register -+reg_class int_r12_reg(R12); ++ // false => size gets scaled to BytesPerLong, ok. ++ static const bool init_array_count_is_in_bytes = false; + -+// Singleton class for R13 int register -+reg_class int_r13_reg(R13); ++ // Whether this platform implements the scalable vector feature ++ static const bool implements_scalable_vector = true; + -+// Singleton class for R14 int register -+reg_class int_r14_reg(R14); ++ static const bool supports_scalable_vector() { ++ return UseRVV; ++ } + -+// Class for all long integer registers -+reg_class all_reg( -+ R0, R0_H, -+ R1, R1_H, -+ R2, R2_H, -+ R3, R3_H, -+ R4, R4_H, -+ R7, R7_H, -+ R8, R8_H, -+ R9, R9_H, -+ R10, R10_H, -+ R11, R11_H, -+ R12, R12_H, -+ R13, R13_H, -+ R14, R14_H, -+ R15, R15_H, -+ R16, R16_H, -+ R17, R17_H, -+ R18, R18_H, -+ R19, R19_H, -+ R20, R20_H, -+ R21, R21_H, -+ R22, R22_H, -+ R23, R23_H, -+ R24, R24_H, -+ R25, R25_H, -+ R26, R26_H, -+ R27, R27_H, -+ R28, R28_H, -+ R29, R29_H, -+ R30, R30_H, -+ R31, R31_H -+); ++ // riscv supports misaligned vectors store/load. ++ static constexpr bool misaligned_vectors_ok() { ++ return true; ++ } + -+// Class for all long integer registers (excluding zr) -+reg_class any_reg %{ -+ return _ANY_REG_mask; -+%} ++ // Whether code generation need accurate ConvI2L types. ++ static const bool convi2l_type_required = false; + -+// Class for non-allocatable 32 bit registers -+reg_class non_allocatable_reg32( -+ R0, // zr -+ R1, // ra -+ R2, // sp -+ R3, // gp -+ R4, // tp -+ R23 // java thread -+); ++ // Does the CPU require late expand (see block.cpp for description of late expand)? ++ static const bool require_postalloc_expand = false; + -+// Class for non-allocatable 64 bit registers -+reg_class non_allocatable_reg( -+ R0, R0_H, // zr -+ R1, R1_H, // ra -+ R2, R2_H, // sp -+ R3, R3_H, // gp -+ R4, R4_H, // tp -+ R23, R23_H // java thread -+); ++ // Do we need to mask the count passed to shift instructions or does ++ // the cpu only look at the lower 5/6 bits anyway? ++ static const bool need_masked_shift_count = false; + -+reg_class no_special_reg32 %{ -+ return _NO_SPECIAL_REG32_mask; -+%} ++ // No support for generic vector operands. ++ static const bool supports_generic_vector_operands = false; + -+reg_class no_special_reg %{ -+ return _NO_SPECIAL_REG_mask; -+%} ++ static constexpr bool isSimpleConstant64(jlong value) { ++ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. ++ // Probably always true, even if a temp register is required. ++ return true; ++ } + -+reg_class ptr_reg %{ -+ return _PTR_REG_mask; -+%} -+ -+reg_class no_special_ptr_reg %{ -+ return _NO_SPECIAL_PTR_REG_mask; -+%} -+ -+// Class for 64 bit register r10 -+reg_class r10_reg( -+ R10, R10_H -+); -+ -+// Class for 64 bit register r11 -+reg_class r11_reg( -+ R11, R11_H -+); -+ -+// Class for 64 bit register r12 -+reg_class r12_reg( -+ R12, R12_H -+); -+ -+// Class for 64 bit register r13 -+reg_class r13_reg( -+ R13, R13_H -+); -+ -+// Class for 64 bit register r14 -+reg_class r14_reg( -+ R14, R14_H -+); -+ -+// Class for 64 bit register r15 -+reg_class r15_reg( -+ R15, R15_H -+); ++ // Use conditional move (CMOVL) ++ static constexpr int long_cmove_cost() { ++ // long cmoves are no more expensive than int cmoves ++ return 0; ++ } + -+// Class for 64 bit register r16 -+reg_class r16_reg( -+ R16, R16_H -+); ++ static constexpr int float_cmove_cost() { ++ // float cmoves are no more expensive than int cmoves ++ return 0; ++ } + -+// Class for method register -+reg_class method_reg( -+ R31, R31_H -+); ++ // This affects two different things: ++ // - how Decode nodes are matched ++ // - how ImplicitNullCheck opportunities are recognized ++ // If true, the matcher will try to remove all Decodes and match them ++ // (as operands) into nodes. NullChecks are not prepared to deal with ++ // Decodes by final_graph_reshaping(). ++ // If false, final_graph_reshaping() forces the decode behind the Cmp ++ // for a NullCheck. The matcher matches the Decode node into a register. ++ // Implicit_null_check optimization moves the Decode along with the ++ // memory operation back up before the NullCheck. ++ static bool narrow_oop_use_complex_address() { ++ return CompressedOops::shift() == 0; ++ } + -+// Class for heapbase register -+reg_class heapbase_reg( -+ R27, R27_H -+); ++ static bool narrow_klass_use_complex_address() { ++ return false; ++ } + -+// Class for java thread register -+reg_class java_thread_reg( -+ R23, R23_H -+); ++ static bool const_oop_prefer_decode() { ++ // Prefer ConN+DecodeN over ConP in simple compressed oops mode. ++ return CompressedOops::base() == NULL; ++ } + -+reg_class r28_reg( -+ R28, R28_H -+); ++ static bool const_klass_prefer_decode() { ++ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. ++ return CompressedKlassPointers::base() == NULL; ++ } + -+reg_class r29_reg( -+ R29, R29_H -+); ++ // Is it better to copy float constants, or load them directly from ++ // memory? Intel can load a float constant from a direct address, ++ // requiring no extra registers. Most RISCs will have to materialize ++ // an address into a register first, so they would do better to copy ++ // the constant from stack. ++ static const bool rematerialize_float_constants = false; + -+reg_class r30_reg( -+ R30, R30_H -+); ++ // If CPU can load and store mis-aligned doubles directly then no ++ // fixup is needed. Else we split the double into 2 integer pieces ++ // and move it piece-by-piece. Only happens when passing doubles into ++ // C code as the Java calling convention forces doubles to be aligned. ++ static const bool misaligned_doubles_ok = true; + -+// Class for zero registesr -+reg_class zr_reg( -+ R0, R0_H -+); ++ // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. ++ static const bool strict_fp_requires_explicit_rounding = false; + -+// Class for thread register -+reg_class thread_reg( -+ R4, R4_H -+); ++ // Are floats converted to double when stored to stack during ++ // deoptimization? ++ static constexpr bool float_in_double() { return false; } + -+// Class for frame pointer register -+reg_class fp_reg( -+ R8, R8_H -+); ++ // Do ints take an entire long register or just half? ++ // The relevant question is how the int is callee-saved: ++ // the whole long is written but de-opt'ing will have to extract ++ // the relevant 32 bits. ++ static const bool int_in_long = true; + -+// Class for link register -+reg_class lr_reg( -+ R1, R1_H -+); ++ // Does the CPU supports vector variable shift instructions? ++ static constexpr bool supports_vector_variable_shifts(void) { ++ return false; ++ } + -+// Class for long sp register -+reg_class sp_reg( -+ R2, R2_H -+); ++ // Does the CPU supports vector variable rotate instructions? ++ static constexpr bool supports_vector_variable_rotates(void) { ++ return false; ++ } + -+// Class for all float registers -+reg_class float_reg( -+ F0, -+ F1, -+ F2, -+ F3, -+ F4, -+ F5, -+ F6, -+ F7, -+ F8, -+ F9, -+ F10, -+ F11, -+ F12, -+ F13, -+ F14, -+ F15, -+ F16, -+ F17, -+ F18, -+ F19, -+ F20, -+ F21, -+ F22, -+ F23, -+ F24, -+ F25, -+ F26, -+ F27, -+ F28, -+ F29, -+ F30, -+ F31 -+); ++ // Does the CPU supports vector constant rotate instructions? ++ static constexpr bool supports_vector_constant_rotates(int shift) { ++ return false; ++ } + -+// Double precision float registers have virtual `high halves' that -+// are needed by the allocator. -+// Class for all double registers -+reg_class double_reg( -+ F0, F0_H, -+ F1, F1_H, -+ F2, F2_H, -+ F3, F3_H, -+ F4, F4_H, -+ F5, F5_H, -+ F6, F6_H, -+ F7, F7_H, -+ F8, F8_H, -+ F9, F9_H, -+ F10, F10_H, -+ F11, F11_H, -+ F12, F12_H, -+ F13, F13_H, -+ F14, F14_H, -+ F15, F15_H, -+ F16, F16_H, -+ F17, F17_H, -+ F18, F18_H, -+ F19, F19_H, -+ F20, F20_H, -+ F21, F21_H, -+ F22, F22_H, -+ F23, F23_H, -+ F24, F24_H, -+ F25, F25_H, -+ F26, F26_H, -+ F27, F27_H, -+ F28, F28_H, -+ F29, F29_H, -+ F30, F30_H, -+ F31, F31_H -+); ++ // Does the CPU supports vector unsigned comparison instructions? ++ static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { ++ return false; ++ } + -+// Class for all RVV vector registers -+reg_class vectora_reg( -+ V1, V1_H, V1_J, V1_K, -+ V2, V2_H, V2_J, V2_K, -+ V3, V3_H, V3_J, V3_K, -+ V4, V4_H, V4_J, V4_K, -+ V5, V5_H, V5_J, V5_K, -+ V6, V6_H, V6_J, V6_K, -+ V7, V7_H, V7_J, V7_K, -+ V8, V8_H, V8_J, V8_K, -+ V9, V9_H, V9_J, V9_K, -+ V10, V10_H, V10_J, V10_K, -+ V11, V11_H, V11_J, V11_K, -+ V12, V12_H, V12_J, V12_K, -+ V13, V13_H, V13_J, V13_K, -+ V14, V14_H, V14_J, V14_K, -+ V15, V15_H, V15_J, V15_K, -+ V16, V16_H, V16_J, V16_K, -+ V17, V17_H, V17_J, V17_K, -+ V18, V18_H, V18_J, V18_K, -+ V19, V19_H, V19_J, V19_K, -+ V20, V20_H, V20_J, V20_K, -+ V21, V21_H, V21_J, V21_K, -+ V22, V22_H, V22_J, V22_K, -+ V23, V23_H, V23_J, V23_K, -+ V24, V24_H, V24_J, V24_K, -+ V25, V25_H, V25_J, V25_K, -+ V26, V26_H, V26_J, V26_K, -+ V27, V27_H, V27_J, V27_K, -+ V28, V28_H, V28_J, V28_K, -+ V29, V29_H, V29_J, V29_K, -+ V30, V30_H, V30_J, V30_K, -+ V31, V31_H, V31_J, V31_K -+); ++ // Some microarchitectures have mask registers used on vectors ++ static const bool has_predicated_vectors(void) { ++ return false; ++ } + -+// Class for 64 bit register f0 -+reg_class f0_reg( -+ F0, F0_H -+); ++ // true means we have fast l2f convers ++ // false means that conversion is done by runtime call ++ static constexpr bool convL2FSupported(void) { ++ return true; ++ } + -+// Class for 64 bit register f1 -+reg_class f1_reg( -+ F1, F1_H -+); ++ // Implements a variant of EncodeISOArrayNode that encode ASCII only ++ static const bool supports_encode_ascii_array = false; + -+// Class for 64 bit register f2 -+reg_class f2_reg( -+ F2, F2_H -+); ++ // Returns pre-selection estimated size of a vector operation. ++ static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) { ++ return 0; ++ } + -+// Class for 64 bit register f3 -+reg_class f3_reg( -+ F3, F3_H -+); ++#endif // CPU_RISCV_MATCHER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +new file mode 100644 +index 00000000000..1f7c0c87c21 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +@@ -0,0 +1,461 @@ ++/* ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+// class for vector register v1 -+reg_class v1_reg( -+ V1, V1_H, V1_J, V1_K -+); ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "classfile/javaClasses.inline.hpp" ++#include "classfile/vmClasses.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/flags/flagSetting.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" + -+// class for vector register v2 -+reg_class v2_reg( -+ V2, V2_H, V2_J, V2_K -+); ++#define __ _masm-> + -+// class for vector register v3 -+reg_class v3_reg( -+ V3, V3_H, V3_J, V3_K -+); ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif + -+// class for vector register v4 -+reg_class v4_reg( -+ V4, V4_H, V4_J, V4_K -+); ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + -+// class for vector register v5 -+reg_class v5_reg( -+ V5, V5_H, V5_J, V5_K -+); ++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { ++ assert_cond(_masm != NULL); ++ if (VerifyMethodHandles) { ++ verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class), ++ "MH argument is a Class"); ++ } ++ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset())); ++} + -+// class for condition codes -+reg_class reg_flags(RFLAGS); -+%} ++#ifdef ASSERT ++static int check_nonzero(const char* xname, int x) { ++ assert(x != 0, "%s should be nonzero", xname); ++ return x; ++} ++#define NONZERO(x) check_nonzero(#x, x) ++#else //ASSERT ++#define NONZERO(x) (x) ++#endif //PRODUCT + -+//----------DEFINITION BLOCK--------------------------------------------------- -+// Define name --> value mappings to inform the ADLC of an integer valued name -+// Current support includes integer values in the range [0, 0x7FFFFFFF] -+// Format: -+// int_def ( , ); -+// Generated Code in ad_.hpp -+// #define () -+// // value == -+// Generated code in ad_.cpp adlc_verification() -+// assert( == , "Expect () to equal "); -+// ++#ifdef ASSERT ++void MethodHandles::verify_klass(MacroAssembler* _masm, ++ Register obj, vmClassID klass_id, ++ const char* error_message) { ++ assert_cond(_masm != NULL); ++ InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id); ++ Klass* klass = vmClasses::klass_at(klass_id); ++ Register temp = t1; ++ Register temp2 = t0; // used by MacroAssembler::cmpptr ++ Label L_ok, L_bad; ++ BLOCK_COMMENT("verify_klass {"); ++ __ verify_oop(obj); ++ __ beqz(obj, L_bad); ++ __ push_reg(RegSet::of(temp, temp2), sp); ++ __ load_klass(temp, obj); ++ __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok); ++ intptr_t super_check_offset = klass->super_check_offset(); ++ __ ld(temp, Address(temp, super_check_offset)); ++ __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok); ++ __ pop_reg(RegSet::of(temp, temp2), sp); ++ __ bind(L_bad); ++ __ stop(error_message); ++ __ BIND(L_ok); ++ __ pop_reg(RegSet::of(temp, temp2), sp); ++ BLOCK_COMMENT("} verify_klass"); ++} + -+// we follow the ppc-aix port in using a simple cost model which ranks -+// register operations as cheap, memory ops as more expensive and -+// branches as most expensive. the first two have a low as well as a -+// normal cost. huge cost appears to be a way of saying don't do -+// something ++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {} + -+definitions %{ -+ // The default cost (of a register move instruction). -+ int_def DEFAULT_COST ( 100, 100); -+ int_def ALU_COST ( 100, 1 * DEFAULT_COST); // unknown, const, arith, shift, slt, -+ // multi, auipc, nop, logical, move -+ int_def LOAD_COST ( 300, 3 * DEFAULT_COST); // load, fpload -+ int_def STORE_COST ( 100, 1 * DEFAULT_COST); // store, fpstore -+ int_def XFER_COST ( 300, 3 * DEFAULT_COST); // mfc, mtc, fcvt, fmove, fcmp -+ int_def BRANCH_COST ( 100, 1 * DEFAULT_COST); // branch, jmp, call -+ int_def IMUL_COST ( 1000, 10 * DEFAULT_COST); // imul -+ int_def IDIVSI_COST ( 3400, 34 * DEFAULT_COST); // idivdi -+ int_def IDIVDI_COST ( 6600, 66 * DEFAULT_COST); // idivsi -+ int_def FMUL_SINGLE_COST ( 500, 5 * DEFAULT_COST); // fadd, fmul, fmadd -+ int_def FMUL_DOUBLE_COST ( 700, 7 * DEFAULT_COST); // fadd, fmul, fmadd -+ int_def FDIV_COST ( 2000, 20 * DEFAULT_COST); // fdiv -+ int_def FSQRT_COST ( 2500, 25 * DEFAULT_COST); // fsqrt -+%} ++#endif //ASSERT + ++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry) { ++ assert_cond(_masm != NULL); ++ assert(method == xmethod, "interpreter calling convention"); ++ Label L_no_such_method; ++ __ beqz(xmethod, L_no_such_method); ++ __ verify_method_ptr(method); + ++ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. + -+//----------SOURCE BLOCK------------------------------------------------------- -+// This is a block of C++ code which provides values, functions, and -+// definitions necessary in the rest of the architecture description ++ __ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset())); ++ __ beqz(t0, run_compiled_code); ++ __ ld(t0, Address(method, Method::interpreter_entry_offset())); ++ __ jr(t0); ++ __ BIND(run_compiled_code); ++ } + -+source_hpp %{ ++ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : ++ Method::from_interpreted_offset(); ++ __ ld(t0,Address(method, entry_offset)); ++ __ jr(t0); ++ __ bind(L_no_such_method); ++ __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry())); ++} + -+#include "asm/macroAssembler.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" -+#include "gc/shared/cardTable.hpp" -+#include "gc/shared/cardTableBarrierSet.hpp" -+#include "gc/shared/collectedHeap.hpp" -+#include "opto/addnode.hpp" -+#include "opto/convertnode.hpp" ++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry) { ++ assert_cond(_masm != NULL); ++ BLOCK_COMMENT("jump_to_lambda_form {"); ++ // This is the initial entry point of a lazy method handle. ++ // After type checking, it picks up the invoker from the LambdaForm. ++ assert_different_registers(recv, method_temp, temp2); ++ assert(recv != noreg, "required register"); ++ assert(method_temp == xmethod, "required register for loading method"); + -+extern RegMask _ANY_REG32_mask; -+extern RegMask _ANY_REG_mask; -+extern RegMask _PTR_REG_mask; -+extern RegMask _NO_SPECIAL_REG32_mask; -+extern RegMask _NO_SPECIAL_REG_mask; -+extern RegMask _NO_SPECIAL_PTR_REG_mask; ++ // Load the invoker, as MH -> MH.form -> LF.vmentry ++ __ verify_oop(recv); ++ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2); ++ __ verify_oop(method_temp); ++ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg); + -+class CallStubImpl { ++ if (VerifyMethodHandles && !for_compiler_entry) { ++ // make sure recv is already on stack ++ __ ld(temp2, Address(method_temp, Method::const_offset())); ++ __ load_sized_value(temp2, ++ Address(temp2, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), /*is_signed*/ false); ++ Label L; ++ __ ld(t0, __ argument_address(temp2, -1)); ++ __ beq(recv, t0, L); ++ __ ld(x10, __ argument_address(temp2, -1)); ++ __ ebreak(); ++ __ BIND(L); ++ } + -+ //-------------------------------------------------------------- -+ //---< Used for optimization in Compile::shorten_branches >--- -+ //-------------------------------------------------------------- ++ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); ++ BLOCK_COMMENT("} jump_to_lambda_form"); ++} + -+ public: -+ // Size of call trampoline stub. -+ static uint size_call_trampoline() { -+ return 0; // no call trampolines on this platform ++// Code generation ++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, ++ vmIntrinsics::ID iid) { ++ assert_cond(_masm != NULL); ++ const bool not_for_compiler_entry = false; // this is the interpreter entry ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ if (iid == vmIntrinsics::_invokeGeneric || ++ iid == vmIntrinsics::_compiledLambdaForm) { ++ // Perhaps surprisingly, the symbolic references visible to Java are not directly used. ++ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. ++ // They all allow an appendix argument. ++ __ ebreak(); // empty stubs make SG sick ++ return NULL; + } + -+ // number of relocations needed by a call trampoline stub -+ static uint reloc_call_trampoline() { -+ return 0; // no call trampolines on this platform ++ // No need in interpreter entry for linkToNative for now. ++ // Interpreter calls compiled entry through i2c. ++ if (iid == vmIntrinsics::_linkToNative) { ++ __ ebreak(); ++ return NULL; + } -+}; + -+class HandlerImpl { ++ // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted) ++ // xmethod: Method* ++ // x13: argument locator (parameter slot count, added to sp) ++ // x11: used as temp to hold mh or receiver ++ // x10, x29: garbage temps, blown away ++ Register argp = x13; // argument list ptr, live on error paths ++ Register mh = x11; // MH receiver; dies quickly and is recycled + -+ public: ++ // here's where control starts out: ++ __ align(CodeEntryAlignment); ++ address entry_point = __ pc(); + -+ static int emit_exception_handler(CodeBuffer &cbuf); -+ static int emit_deopt_handler(CodeBuffer& cbuf); ++ if (VerifyMethodHandles) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + -+ static uint size_exception_handler() { -+ return MacroAssembler::far_branch_size(); ++ Label L; ++ BLOCK_COMMENT("verify_intrinsic_id {"); ++ __ lhu(t0, Address(xmethod, Method::intrinsic_id_offset_in_bytes())); ++ __ mv(t1, (int) iid); ++ __ beq(t0, t1, L); ++ if (iid == vmIntrinsics::_linkToVirtual || ++ iid == vmIntrinsics::_linkToSpecial) { ++ // could do this for all kinds, but would explode assembly code size ++ trace_method_handle(_masm, "bad Method*::intrinsic_id"); ++ } ++ __ ebreak(); ++ __ bind(L); ++ BLOCK_COMMENT("} verify_intrinsic_id"); + } + -+ static uint size_deopt_handler() { -+ // count auipc + far branch -+ return NativeInstruction::instruction_size + MacroAssembler::far_branch_size(); ++ // First task: Find out how big the argument list is. ++ Address x13_first_arg_addr; ++ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); ++ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); ++ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ __ ld(argp, Address(xmethod, Method::const_offset())); ++ __ load_sized_value(argp, ++ Address(argp, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), /*is_signed*/ false); ++ x13_first_arg_addr = __ argument_address(argp, -1); ++ } else { ++ DEBUG_ONLY(argp = noreg); + } -+}; + -+// predicate controlling translation of StoreCM -+bool unnecessary_storestore(const Node *storecm); ++ if (!is_signature_polymorphic_static(iid)) { ++ __ ld(mh, x13_first_arg_addr); ++ DEBUG_ONLY(argp = noreg); ++ } + -+bool is_CAS(int opcode, bool maybe_volatile); ++ // x13_first_arg_addr is live! + -+// predicate controlling translation of CompareAndSwapX -+bool needs_acquiring_load_exclusive(const Node *load); ++ trace_method_handle_interpreter_entry(_masm, iid); ++ if (iid == vmIntrinsics::_invokeBasic) { ++ generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry); ++ } else { ++ // Adjust argument list by popping the trailing MemberName argument. ++ Register recv = noreg; ++ if (MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. ++ __ ld(recv = x12, x13_first_arg_addr); ++ } ++ DEBUG_ONLY(argp = noreg); ++ Register xmember = xmethod; // MemberName ptr; incoming method ptr is dead now ++ __ pop_reg(xmember); // extract last argument ++ generate_method_handle_dispatch(_masm, iid, recv, xmember, not_for_compiler_entry); ++ } + ++ return entry_point; ++} + -+// predicate using the temp register for decoding klass -+bool maybe_use_tmp_register_decoding_klass(); -+%} + -+source %{ ++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, ++ vmIntrinsics::ID iid, ++ Register receiver_reg, ++ Register member_reg, ++ bool for_compiler_entry) { ++ assert_cond(_masm != NULL); ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ // temps used in this code are not used in *either* compiled or interpreted calling sequences ++ Register temp1 = x7; ++ Register temp2 = x28; ++ Register temp3 = x29; // x30 is live by this point: it contains the sender SP ++ if (for_compiler_entry) { ++ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); ++ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); ++ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); ++ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); ++ } + -+ // Derived RegMask with conditionally allocatable registers ++ assert_different_registers(temp1, temp2, temp3, receiver_reg); ++ assert_different_registers(temp1, temp2, temp3, member_reg); + -+ RegMask _ANY_REG32_mask; -+ RegMask _ANY_REG_mask; -+ RegMask _PTR_REG_mask; -+ RegMask _NO_SPECIAL_REG32_mask; -+ RegMask _NO_SPECIAL_REG_mask; -+ RegMask _NO_SPECIAL_PTR_REG_mask; ++ if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { ++ if (iid == vmIntrinsics::_linkToNative) { ++ assert(for_compiler_entry, "only compiler entry is supported"); ++ } ++ // indirect through MH.form.vmentry.vmtarget ++ jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry); ++ } else { ++ // The method is a member invoker used by direct method handles. ++ if (VerifyMethodHandles) { ++ // make sure the trailing argument really is a MemberName (caller responsibility) ++ verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName), ++ "MemberName required for invokeVirtual etc."); ++ } + -+ void reg_mask_init() { ++ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset())); ++ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset())); ++ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset())); ++ Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())); + -+ _ANY_REG32_mask = _ALL_REG32_mask; -+ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg())); ++ Register temp1_recv_klass = temp1; ++ if (iid != vmIntrinsics::_linkToStatic) { ++ __ verify_oop(receiver_reg); ++ if (iid == vmIntrinsics::_linkToSpecial) { ++ // Don't actually load the klass; just null-check the receiver. ++ __ null_check(receiver_reg); ++ } else { ++ // load receiver klass itself ++ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ BLOCK_COMMENT("check_receiver {"); ++ // The receiver for the MemberName must be in receiver_reg. ++ // Check the receiver against the MemberName.clazz ++ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { ++ // Did not load it above... ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { ++ Label L_ok; ++ Register temp2_defc = temp2; ++ __ load_heap_oop(temp2_defc, member_clazz, temp3); ++ load_klass_from_Class(_masm, temp2_defc); ++ __ verify_klass_ptr(temp2_defc); ++ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); ++ // If we get here, the type check failed! ++ __ ebreak(); ++ __ bind(L_ok); ++ } ++ BLOCK_COMMENT("} check_receiver"); ++ } ++ if (iid == vmIntrinsics::_linkToSpecial || ++ iid == vmIntrinsics::_linkToStatic) { ++ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass ++ } + -+ _ANY_REG_mask = _ALL_REG_mask; -+ _ANY_REG_mask.SUBTRACT(_ZR_REG_mask); ++ // Live registers at this point: ++ // member_reg - MemberName that was the trailing argument ++ // temp1_recv_klass - klass of stacked receiver, if needed ++ // x30 - interpreter linkage (if interpreted) ++ // x11 ... x10 - compiler arguments (if compiled) + -+ _PTR_REG_mask = _ALL_REG_mask; -+ _PTR_REG_mask.SUBTRACT(_ZR_REG_mask); ++ Label L_incompatible_class_change_error; ++ switch (iid) { ++ case vmIntrinsics::_linkToSpecial: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); ++ } ++ __ load_heap_oop(xmethod, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg); ++ break; + -+ _NO_SPECIAL_REG32_mask = _ALL_REG32_mask; -+ _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask); ++ case vmIntrinsics::_linkToStatic: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); ++ } ++ __ load_heap_oop(xmethod, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg); ++ break; + -+ _NO_SPECIAL_REG_mask = _ALL_REG_mask; -+ _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); ++ case vmIntrinsics::_linkToVirtual: ++ { ++ // same as TemplateTable::invokevirtual, ++ // minus the CP setup and profiling: + -+ _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask; -+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); ++ } + -+ // x27 is not allocatable when compressed oops is on -+ if (UseCompressedOops) { -+ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg())); -+ _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); -+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); -+ } ++ // pick out the vtable index from the MemberName, and then we can discard it: ++ Register temp2_index = temp2; ++ __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); + -+ // x8 is not allocatable when PreserveFramePointer is on -+ if (PreserveFramePointer) { -+ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg())); -+ _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask); -+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask); -+ } -+ } ++ if (VerifyMethodHandles) { ++ Label L_index_ok; ++ __ bgez(temp2_index, L_index_ok); ++ __ ebreak(); ++ __ BIND(L_index_ok); ++ } + ++ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget ++ // at this point. And VerifyMethodHandles has already checked clazz, if needed. + -+// predicate controlling translation of StoreCM -+// -+// returns true if a StoreStore must precede the card write otherwise -+// false -+bool unnecessary_storestore(const Node *storecm) -+{ -+ assert(storecm != NULL && storecm->Opcode() == Op_StoreCM, "expecting a StoreCM"); ++ // get target Method* & entry point ++ __ lookup_virtual_method(temp1_recv_klass, temp2_index, xmethod); ++ break; ++ } + -+ // we need to generate a membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore) -+ // between an object put and the associated card mark when we are using -+ // CMS without conditional card marking ++ case vmIntrinsics::_linkToInterface: ++ { ++ // same as TemplateTable::invokeinterface ++ // (minus the CP setup and profiling, with different argument motion) ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); ++ } + -+ if (UseConcMarkSweepGC && !UseCondCardMark) { -+ return false; -+ } ++ Register temp3_intf = temp3; ++ __ load_heap_oop(temp3_intf, member_clazz); ++ load_klass_from_Class(_masm, temp3_intf); ++ __ verify_klass_ptr(temp3_intf); + -+ // a storestore is unnecesary in all other cases ++ Register rindex = xmethod; ++ __ access_load_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L; ++ __ bgez(rindex, L); ++ __ ebreak(); ++ __ bind(L); ++ } + -+ return true; -+} ++ // given intf, index, and recv klass, dispatch to the implementation method ++ __ lookup_interface_method(temp1_recv_klass, temp3_intf, ++ // note: next two args must be the same: ++ rindex, xmethod, ++ temp2, ++ L_incompatible_class_change_error); ++ break; ++ } + -+// is_CAS(int opcode, bool maybe_volatile) -+// -+// return true if opcode is one of the possible CompareAndSwapX -+// values otherwise false. -+bool is_CAS(int opcode, bool maybe_volatile) -+{ -+ switch(opcode) { -+ // We handle these -+ case Op_CompareAndSwapI: -+ case Op_CompareAndSwapL: -+ case Op_CompareAndSwapP: -+ case Op_CompareAndSwapN: -+ case Op_CompareAndSwapB: -+ case Op_CompareAndSwapS: -+ case Op_GetAndSetI: -+ case Op_GetAndSetL: -+ case Op_GetAndSetP: -+ case Op_GetAndSetN: -+ case Op_GetAndAddI: -+ case Op_GetAndAddL: -+#if INCLUDE_SHENANDOAHGC -+ case Op_ShenandoahCompareAndSwapP: -+ case Op_ShenandoahCompareAndSwapN: -+#endif -+ return true; -+ case Op_CompareAndExchangeI: -+ case Op_CompareAndExchangeN: -+ case Op_CompareAndExchangeB: -+ case Op_CompareAndExchangeS: -+ case Op_CompareAndExchangeL: -+ case Op_CompareAndExchangeP: -+ case Op_WeakCompareAndSwapB: -+ case Op_WeakCompareAndSwapS: -+ case Op_WeakCompareAndSwapI: -+ case Op_WeakCompareAndSwapL: -+ case Op_WeakCompareAndSwapP: -+ case Op_WeakCompareAndSwapN: -+ return maybe_volatile; -+ default: -+ return false; -+ } -+} ++ default: ++ fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid)); ++ break; ++ } + -+// predicate controlling translation of CAS -+// -+// returns true if CAS needs to use an acquiring load otherwise false -+bool needs_acquiring_load_exclusive(const Node *n) -+{ -+ assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap"); -+ if (UseBarriersForVolatile) { -+ return false; -+ } ++ // live at this point: xmethod, x30 (if interpreted) + -+ LoadStoreNode* ldst = n->as_LoadStore(); -+ if (n != NULL && is_CAS(n->Opcode(), false)) { -+ assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar"); -+ } else { -+ return ldst != NULL && ldst->trailing_membar() != NULL; ++ // After figuring out which concrete method to call, jump into it. ++ // Note that this works in the interpreter with no data motion. ++ // But the compiled version will require that r2_recv be shifted out. ++ __ verify_method_ptr(xmethod); ++ jump_from_method_handle(_masm, xmethod, temp1, for_compiler_entry); ++ if (iid == vmIntrinsics::_linkToInterface) { ++ __ bind(L_incompatible_class_change_error); ++ __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); ++ } + } -+ // so we can just return true here -+ return true; -+} + -+bool maybe_use_tmp_register_decoding_klass() { -+ return !UseCompressedOops && -+ Universe::narrow_klass_base() != NULL && -+ Universe::narrow_klass_shift() != 0; +} -+#define __ _masm. + -+// advance declarations for helper functions to convert register -+// indices to register objects ++#ifndef PRODUCT ++void trace_method_handle_stub(const char* adaptername, ++ oopDesc* mh, ++ intptr_t* saved_regs, ++ intptr_t* entry_sp) { } + -+// the ad file has to provide implementations of certain methods -+// expected by the generic code -+// -+// REQUIRED FUNCTIONALITY ++// The stub wraps the arguments in a struct on the stack to avoid ++// dealing with the different calling conventions for passing 6 ++// arguments. ++struct MethodHandleStubArguments { ++ const char* adaptername; ++ oopDesc* mh; ++ intptr_t* saved_regs; ++ intptr_t* entry_sp; ++}; ++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { } + -+//============================================================================= ++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { } ++#endif //PRODUCT +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp +new file mode 100644 +index 00000000000..f73aba29d67 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+// !!!!! Special hack to get all types of calls to specify the byte offset -+// from the start of the call to the point where the return address -+// will point. ++// Platform-specific definitions for method handles. ++// These definitions are inlined into class MethodHandles. + -+int MachCallStaticJavaNode::ret_addr_offset() -+{ -+ // call should be a simple jal -+ int off = 4; -+ return off; -+} ++// Adapters ++enum /* platform_dependent_constants */ { ++ adapter_code_size = 32000 DEBUG_ONLY(+ 120000) ++}; + -+int MachCallDynamicJavaNode::ret_addr_offset() -+{ -+ return 28; // movptr, jal -+} ++public: + -+int MachCallRuntimeNode::ret_addr_offset() { -+ // for generated stubs the call will be -+ // jal(addr) -+ // or with far branches -+ // jal(trampoline_stub) -+ // for real runtime callouts it will be five instructions -+ // see riscv_enc_java_to_runtime -+ // la(t1, retaddr) -+ // la(t0, RuntimeAddress(addr)) -+ // addi(sp, sp, -2 * wordSize) -+ // sd(zr, Address(sp)) -+ // sd(t1, Address(sp, wordSize)) -+ // jalr(t0) -+ CodeBlob *cb = CodeCache::find_blob(_entry_point); -+ if (cb != NULL) { -+ return 1 * NativeInstruction::instruction_size; -+ } else { -+ return 11 * NativeInstruction::instruction_size; -+ } -+} ++ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); + -+// Indicate if the safepoint node needs the polling page as an input ++ static void verify_klass(MacroAssembler* _masm, ++ Register obj, vmClassID klass_id, ++ const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + -+// the shared code plants the oop data at the start of the generated -+// code for the safepoint node and that needs ot be at the load -+// instruction itself. so we cannot plant a mov of the safepoint poll -+// address followed by a load. setting this to true means the mov is -+// scheduled as a prior instruction. that's better for scheduling -+// anyway. ++ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { ++ verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle), ++ "reference is a MH"); ++ } + -+bool SafePointNode::needs_polling_address_input() -+{ -+ return true; -+} ++ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; + -+//============================================================================= ++ // Similar to InterpreterMacroAssembler::jump_from_interpreted. ++ // Takes care of special dispatch from single stepping too. ++ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry); + -+#ifndef PRODUCT -+void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const { -+ assert_cond(st != NULL); -+ st->print("BREAKPOINT"); -+} ++ static void jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry); +diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +new file mode 100644 +index 00000000000..0a05c577860 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +@@ -0,0 +1,429 @@ ++/* ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/compiledIC.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.hpp" ++#include "runtime/orderAccess.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/ostream.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_Runtime1.hpp" +#endif + -+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { -+ MacroAssembler _masm(&cbuf); -+ __ ebreak(); ++Register NativeInstruction::extract_rs1(address instr) { ++ assert_cond(instr != NULL); ++ return as_Register(Assembler::extract(((unsigned*)instr)[0], 19, 15)); +} + -+uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { -+ return MachNode::size(ra_); ++Register NativeInstruction::extract_rs2(address instr) { ++ assert_cond(instr != NULL); ++ return as_Register(Assembler::extract(((unsigned*)instr)[0], 24, 20)); +} + -+//============================================================================= -+ -+#ifndef PRODUCT -+ void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { -+ st->print("nop \t# %d bytes pad for loops and calls", _count); -+ } -+#endif -+ -+ void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { -+ MacroAssembler _masm(&cbuf); -+ for (int i = 0; i < _count; i++) { -+ __ nop(); -+ } -+ } ++Register NativeInstruction::extract_rd(address instr) { ++ assert_cond(instr != NULL); ++ return as_Register(Assembler::extract(((unsigned*)instr)[0], 11, 7)); ++} + -+ uint MachNopNode::size(PhaseRegAlloc*) const { -+ return _count * NativeInstruction::instruction_size; -+ } ++uint32_t NativeInstruction::extract_opcode(address instr) { ++ assert_cond(instr != NULL); ++ return Assembler::extract(((unsigned*)instr)[0], 6, 0); ++} + -+//============================================================================= -+const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; ++uint32_t NativeInstruction::extract_funct3(address instr) { ++ assert_cond(instr != NULL); ++ return Assembler::extract(((unsigned*)instr)[0], 14, 12); ++} + -+int Compile::ConstantTable::calculate_table_base_offset() const { -+ return 0; // absolute addressing, no offset ++bool NativeInstruction::is_pc_relative_at(address instr) { ++ // auipc + jalr ++ // auipc + addi ++ // auipc + load ++ // auipc + fload_load ++ return (is_auipc_at(instr)) && ++ (is_addi_at(instr + instruction_size) || ++ is_jalr_at(instr + instruction_size) || ++ is_load_at(instr + instruction_size) || ++ is_float_load_at(instr + instruction_size)) && ++ check_pc_relative_data_dependency(instr); +} + -+bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } -+void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { -+ ShouldNotReachHere(); ++// ie:ld(Rd, Label) ++bool NativeInstruction::is_load_pc_relative_at(address instr) { ++ return is_auipc_at(instr) && // auipc ++ is_ld_at(instr + instruction_size) && // ld ++ check_load_pc_relative_data_dependency(instr); +} + -+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { -+ // Empty encoding ++bool NativeInstruction::is_movptr_at(address instr) { ++ return is_lui_at(instr) && // Lui ++ is_addi_at(instr + instruction_size) && // Addi ++ is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11 ++ is_addi_at(instr + instruction_size * 3) && // Addi ++ is_slli_shift_at(instr + instruction_size * 4, 5) && // Slli Rd, Rs, 5 ++ (is_addi_at(instr + instruction_size * 5) || ++ is_jalr_at(instr + instruction_size * 5) || ++ is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load ++ check_movptr_data_dependency(instr); +} + -+uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { -+ return 0; ++bool NativeInstruction::is_li32_at(address instr) { ++ return is_lui_at(instr) && // lui ++ is_addiw_at(instr + instruction_size) && // addiw ++ check_li32_data_dependency(instr); +} + -+#ifndef PRODUCT -+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { -+ assert_cond(st != NULL); -+ st->print("-- \t// MachConstantBaseNode (empty encoding)"); ++bool NativeInstruction::is_li64_at(address instr) { ++ return is_lui_at(instr) && // lui ++ is_addi_at(instr + instruction_size) && // addi ++ is_slli_shift_at(instr + instruction_size * 2, 12) && // Slli Rd, Rs, 12 ++ is_addi_at(instr + instruction_size * 3) && // addi ++ is_slli_shift_at(instr + instruction_size * 4, 12) && // Slli Rd, Rs, 12 ++ is_addi_at(instr + instruction_size * 5) && // addi ++ is_slli_shift_at(instr + instruction_size * 6, 8) && // Slli Rd, Rs, 8 ++ is_addi_at(instr + instruction_size * 7) && // addi ++ check_li64_data_dependency(instr); +} -+#endif + -+#ifndef PRODUCT -+void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { -+ assert_cond(st != NULL && ra_ != NULL); -+ Compile* C = ra_->C; ++void NativeCall::verify() { ++ assert(NativeCall::is_call_at((address)this), "unexpected code at call site"); ++} + -+ int framesize = C->frame_slots() << LogBytesPerInt; ++address NativeCall::destination() const { ++ address addr = (address)this; ++ assert(NativeInstruction::is_jal_at(instruction_address()), "inst must be jal."); ++ address destination = MacroAssembler::target_addr_for_insn(instruction_address()); + -+ if (C->need_stack_bang(framesize)) { -+ st->print("# stack bang size=%d\n\t", framesize); ++ // Do we use a trampoline stub for this call? ++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. ++ assert(cb && cb->is_nmethod(), "sanity"); ++ nmethod *nm = (nmethod *)cb; ++ if (nm != NULL && nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) { ++ // Yes we do, so get the destination from the trampoline stub. ++ const address trampoline_stub_addr = destination; ++ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); + } + -+ st->print("sub sp, sp, #%d\n\t", framesize); -+ st->print("sd fp, [sp, #%d]", - 2 * wordSize); -+ st->print("sd ra, [sp, #%d]", - wordSize); -+ if (PreserveFramePointer) { st->print("\n\tsub fp, sp, #%d", 2 * wordSize); } ++ return destination; +} -+#endif + -+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { -+ assert_cond(ra_ != NULL); -+ Compile* C = ra_->C; -+ MacroAssembler _masm(&cbuf); ++// Similar to replace_mt_safe, but just changes the destination. The ++// important thing is that free-running threads are able to execute this ++// call instruction at all times. ++// ++// Used in the runtime linkage of calls; see class CompiledIC. ++// ++// Add parameter assert_lock to switch off assertion ++// during code generation, where no patching lock is needed. ++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { ++ assert(!assert_lock || ++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) || ++ CompiledICLocker::is_safe(addr_at(0)), ++ "concurrent code patching"); + -+ // n.b. frame size includes space for return pc and fp -+ const int framesize = C->frame_size_in_bytes(); -+ assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment"); ++ ResourceMark rm; ++ address addr_call = addr_at(0); ++ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); + -+ // insert a nop at the start of the prolog so we can patch in a -+ // branch if we need to invalidate the method later -+ __ nop(); ++ // Patch the constant in the call's trampoline stub. ++ address trampoline_stub_addr = get_trampoline(); ++ if (trampoline_stub_addr != NULL) { ++ assert (!is_NativeCallTrampolineStub_at(dest), "chained trampolines"); ++ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); ++ } + -+ assert_cond(C != NULL); -+ int bangsize = C->bang_size_in_bytes(); -+ if (C->need_stack_bang(bangsize) && UseStackBanging) { -+ __ generate_stack_overflow_check(bangsize); ++ // Patch the call. ++ if (Assembler::reachable_from_branch_at(addr_call, dest)) { ++ set_destination(dest); ++ } else { ++ assert (trampoline_stub_addr != NULL, "we need a trampoline"); ++ set_destination(trampoline_stub_addr); + } + -+ __ build_frame(framesize); ++ ICache::invalidate_range(addr_call, instruction_size); ++} + -+ if (VerifyStackAtCalls) { -+ Unimplemented(); -+ } ++address NativeCall::get_trampoline() { ++ address call_addr = addr_at(0); + -+ C->set_frame_complete(cbuf.insts_size()); ++ CodeBlob *code = CodeCache::find_blob(call_addr); ++ assert(code != NULL, "Could not find the containing code blob"); + -+ if (C->has_mach_constant_base_node()) { -+ // NOTE: We set the table base offset here because users might be -+ // emitted before MachConstantBaseNode. -+ Compile::ConstantTable& constant_table = C->constant_table(); -+ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); ++ address jal_destination = MacroAssembler::pd_call_destination(call_addr); ++ if (code != NULL && code->contains(jal_destination) && is_NativeCallTrampolineStub_at(jal_destination)) { ++ return jal_destination; + } -+} + -+uint MachPrologNode::size(PhaseRegAlloc* ra_) const -+{ -+ assert_cond(ra_ != NULL); -+ return MachNode::size(ra_); // too many variables; just compute it -+ // the hard way -+} ++ if (code != NULL && code->is_nmethod()) { ++ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); ++ } + -+int MachPrologNode::reloc() const -+{ -+ return 0; ++ return NULL; +} + -+//============================================================================= -+ -+#ifndef PRODUCT -+void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { -+ assert_cond(st != NULL && ra_ != NULL); -+ Compile* C = ra_->C; -+ assert_cond(C != NULL); -+ int framesize = C->frame_size_in_bytes(); ++// Inserts a native call instruction at a given pc ++void NativeCall::insert(address code_pos, address entry) { Unimplemented(); } + -+ st->print("# pop frame %d\n\t", framesize); ++//------------------------------------------------------------------- + -+ if (framesize == 0) { -+ st->print("ld ra, [sp,#%d]\n\t", (2 * wordSize)); -+ st->print("ld fp, [sp,#%d]\n\t", (3 * wordSize)); -+ st->print("add sp, sp, #%d\n\t", (2 * wordSize)); -+ } else { -+ st->print("add sp, sp, #%d\n\t", framesize); -+ st->print("ld ra, [sp,#%d]\n\t", - 2 * wordSize); -+ st->print("ld fp, [sp,#%d]\n\t", - wordSize); ++void NativeMovConstReg::verify() { ++ if (!(nativeInstruction_at(instruction_address())->is_movptr() || ++ is_auipc_at(instruction_address()))) { ++ fatal("should be MOVPTR or AUIPC"); + } ++} + -+ if (do_polling() && C->is_method_compilation()) { -+ st->print("# touch polling page\n\t"); -+ st->print("li t0, #0x%lx\n\t", p2i(os::get_polling_page())); -+ st->print("ld zr, [t0]"); ++intptr_t NativeMovConstReg::data() const { ++ address addr = MacroAssembler::target_addr_for_insn(instruction_address()); ++ if (maybe_cpool_ref(instruction_address())) { ++ return *(intptr_t*)addr; ++ } else { ++ return (intptr_t)addr; + } +} -+#endif -+ -+void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { -+ assert_cond(ra_ != NULL); -+ Compile* C = ra_->C; -+ MacroAssembler _masm(&cbuf); -+ assert_cond(C != NULL); -+ int framesize = C->frame_size_in_bytes(); -+ -+ __ remove_frame(framesize); + -+ if (StackReservedPages > 0 && C->has_reserved_stack_access()) { -+ __ reserved_stack_check(); ++void NativeMovConstReg::set_data(intptr_t x) { ++ if (maybe_cpool_ref(instruction_address())) { ++ address addr = MacroAssembler::target_addr_for_insn(instruction_address()); ++ *(intptr_t*)addr = x; ++ } else { ++ // Store x into the instruction stream. ++ MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x); ++ ICache::invalidate_range(instruction_address(), movptr_instruction_size); + } + -+ if (do_polling() && C->is_method_compilation()) { -+ __ read_polling_page(t0, os::get_polling_page(), relocInfo::poll_return_type); ++ // Find and replace the oop/metadata corresponding to this ++ // instruction in oops section. ++ CodeBlob* cb = CodeCache::find_blob(instruction_address()); ++ nmethod* nm = cb->as_nmethod_or_null(); ++ if (nm != NULL) { ++ RelocIterator iter(nm, instruction_address(), next_instruction_address()); ++ while (iter.next()) { ++ if (iter.type() == relocInfo::oop_type) { ++ oop* oop_addr = iter.oop_reloc()->oop_addr(); ++ *oop_addr = cast_to_oop(x); ++ break; ++ } else if (iter.type() == relocInfo::metadata_type) { ++ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); ++ *metadata_addr = (Metadata*)x; ++ break; ++ } ++ } + } +} + -+uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { -+ assert_cond(ra_ != NULL); -+ // Variable size. Determine dynamically. -+ return MachNode::size(ra_); ++void NativeMovConstReg::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, ++ p2i(instruction_address()), data()); +} + -+int MachEpilogNode::reloc() const { -+ // Return number of relocatable values contained in this instruction. -+ return 1; // 1 for polling page. -+} -+const Pipeline * MachEpilogNode::pipeline() const { -+ return MachNode::pipeline_class(); ++//------------------------------------------------------------------- ++ ++int NativeMovRegMem::offset() const { ++ Unimplemented(); ++ return 0; +} + -+int MachEpilogNode::safepoint_offset() const { -+ assert(do_polling(), "no return for this epilog node"); -+ return 4; ++void NativeMovRegMem::set_offset(int x) { Unimplemented(); } ++ ++void NativeMovRegMem::verify() { ++ Unimplemented(); +} + -+//============================================================================= ++//-------------------------------------------------------------------------------- + -+// Figure out which register class each belongs in: rc_int, rc_float or -+// rc_stack. -+enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack }; ++void NativeJump::verify() { } + -+static enum RC rc_class(OptoReg::Name reg) { + -+ if (reg == OptoReg::Bad) { -+ return rc_bad; -+ } ++void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) { ++} + -+ // we have 30 int registers * 2 halves -+ // (t0 and t1 are omitted) -+ int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2); -+ if (reg < slots_of_int_registers) { -+ return rc_int; -+ } + -+ // we have 32 float register * 2 halves -+ int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers; -+ if (reg < slots_of_int_registers + slots_of_float_registers) { -+ return rc_float; -+ } ++address NativeJump::jump_destination() const { ++ address dest = MacroAssembler::target_addr_for_insn(instruction_address()); + -+ // we have 32 vector register * 4 halves -+ int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers; -+ if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) { -+ return rc_vector; -+ } ++ // We use jump to self as the unresolved address which the inline ++ // cache code (and relocs) know about ++ // As a special case we also use sequence movptr_with_offset(r,0), jalr(r,0) ++ // i.e. jump to 0 when we need leave space for a wide immediate ++ // load + -+ // Between vector regs & stack is the flags regs. -+ assert(OptoReg::is_stack(reg), "blow up if spilling flags"); ++ // return -1 if jump to self or to 0 ++ if ((dest == (address) this) || dest == 0) { ++ dest = (address) -1; ++ } + -+ return rc_stack; -+} ++ return dest; ++}; + -+uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const { -+ assert_cond(ra_ != NULL); -+ Compile* C = ra_->C; ++void NativeJump::set_jump_destination(address dest) { ++ // We use jump to self as the unresolved address which the inline ++ // cache code (and relocs) know about ++ if (dest == (address) -1) ++ dest = instruction_address(); + -+ // Get registers to move. -+ OptoReg::Name src_hi = ra_->get_reg_second(in(1)); -+ OptoReg::Name src_lo = ra_->get_reg_first(in(1)); -+ OptoReg::Name dst_hi = ra_->get_reg_second(this); -+ OptoReg::Name dst_lo = ra_->get_reg_first(this); ++ MacroAssembler::pd_patch_instruction(instruction_address(), dest); ++ ICache::invalidate_range(instruction_address(), instruction_size); ++} + -+ enum RC src_hi_rc = rc_class(src_hi); -+ enum RC src_lo_rc = rc_class(src_lo); -+ enum RC dst_hi_rc = rc_class(dst_hi); -+ enum RC dst_lo_rc = rc_class(dst_lo); ++//------------------------------------------------------------------- + -+ assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register"); ++address NativeGeneralJump::jump_destination() const { ++ NativeMovConstReg* move = nativeMovConstReg_at(instruction_address()); ++ address dest = (address) move->data(); + -+ if (src_hi != OptoReg::Bad) { -+ assert((src_lo & 1) == 0 && src_lo + 1 == src_hi && -+ (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi, -+ "expected aligned-adjacent pairs"); -+ } ++ // We use jump to self as the unresolved address which the inline ++ // cache code (and relocs) know about ++ // As a special case we also use jump to 0 when first generating ++ // a general jump + -+ if (src_lo == dst_lo && src_hi == dst_hi) { -+ return 0; // Self copy, no move. ++ // return -1 if jump to self or to 0 ++ if ((dest == (address) this) || dest == 0) { ++ dest = (address) -1; + } + -+ bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi && -+ (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi; -+ int src_offset = ra_->reg2offset(src_lo); -+ int dst_offset = ra_->reg2offset(dst_lo); ++ return dest; ++} + -+ if (bottom_type() == NULL) { -+ ShouldNotReachHere(); -+ } else if (bottom_type()->isa_vect() != NULL) { -+ uint ireg = ideal_reg(); -+ if (ireg == Op_VecA && cbuf) { -+ MacroAssembler _masm(cbuf); -+ int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); -+ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { -+ // stack to stack -+ __ spill_copy_vector_stack_to_stack(src_offset, dst_offset, -+ vector_reg_size_in_bytes); -+ } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) { -+ // vpr to stack -+ __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo)); -+ } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) { -+ // stack to vpr -+ __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo)); -+ } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) { -+ // vpr to vpr -+ __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo])); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } -+ } else if (cbuf != NULL) { -+ MacroAssembler _masm(cbuf); -+ switch (src_lo_rc) { -+ case rc_int: -+ if (dst_lo_rc == rc_int) { // gpr --> gpr copy -+ if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass -+ __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32); -+ } else { -+ __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo])); -+ } -+ } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy -+ if (is64) { -+ __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]), -+ as_Register(Matcher::_regEncode[src_lo])); -+ } else { -+ __ fmv_w_x(as_FloatRegister(Matcher::_regEncode[dst_lo]), -+ as_Register(Matcher::_regEncode[src_lo])); -+ } -+ } else { // gpr --> stack spill -+ assert(dst_lo_rc == rc_stack, "spill to bad register class"); -+ __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset); -+ } -+ break; -+ case rc_float: -+ if (dst_lo_rc == rc_int) { // fpr --> gpr copy -+ if (is64) { -+ __ fmv_x_d(as_Register(Matcher::_regEncode[dst_lo]), -+ as_FloatRegister(Matcher::_regEncode[src_lo])); -+ } else { -+ __ fmv_x_w(as_Register(Matcher::_regEncode[dst_lo]), -+ as_FloatRegister(Matcher::_regEncode[src_lo])); -+ } -+ } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy -+ if (is64) { -+ __ fmv_d(as_FloatRegister(Matcher::_regEncode[dst_lo]), -+ as_FloatRegister(Matcher::_regEncode[src_lo])); -+ } else { -+ __ fmv_s(as_FloatRegister(Matcher::_regEncode[dst_lo]), -+ as_FloatRegister(Matcher::_regEncode[src_lo])); -+ } -+ } else { // fpr --> stack spill -+ assert(dst_lo_rc == rc_stack, "spill to bad register class"); -+ __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), -+ is64, dst_offset); -+ } -+ break; -+ case rc_stack: -+ if (dst_lo_rc == rc_int) { // stack --> gpr load -+ if (this->ideal_reg() == Op_RegI) { -+ __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); -+ } else { // // zero extended for narrow oop or klass -+ __ unspillu(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); -+ } -+ } else if (dst_lo_rc == rc_float) { // stack --> fpr load -+ __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), -+ is64, src_offset); -+ } else { // stack --> stack copy -+ assert(dst_lo_rc == rc_stack, "spill to bad register class"); -+ if (this->ideal_reg() == Op_RegI) { -+ __ unspill(t0, is64, src_offset); -+ } else { // zero extended for narrow oop or klass -+ __ unspillu(t0, is64, src_offset); -+ } -+ __ spill(t0, is64, dst_offset); -+ } -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+ } ++//------------------------------------------------------------------- + -+ if (st != NULL) { -+ st->print("spill "); -+ if (src_lo_rc == rc_stack) { -+ st->print("[sp, #%d] -> ", src_offset); -+ } else { -+ st->print("%s -> ", Matcher::regName[src_lo]); -+ } -+ if (dst_lo_rc == rc_stack) { -+ st->print("[sp, #%d]", dst_offset); -+ } else { -+ st->print("%s", Matcher::regName[dst_lo]); -+ } -+ if (bottom_type()->isa_vect() != NULL) { -+ int vsize = 0; -+ if (ideal_reg() == Op_VecA) { -+ vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8; -+ } else { -+ ShouldNotReachHere(); -+ } -+ st->print("\t# vector spill size = %d", vsize); -+ } else { -+ st->print("\t# spill size = %d", is64 ? 64 : 32); -+ } -+ } ++bool NativeInstruction::is_safepoint_poll() { ++ return is_lwu_to_zr(address(this)); ++} + -+ return 0; ++bool NativeInstruction::is_lwu_to_zr(address instr) { ++ assert_cond(instr != NULL); ++ return (extract_opcode(instr) == 0b0000011 && ++ extract_funct3(instr) == 0b110 && ++ extract_rd(instr) == zr); // zr +} + -+#ifndef PRODUCT -+void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const { -+ if (ra_ == NULL) { -+ st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx); -+ } else { -+ implementation(NULL, ra_, false, st); -+ } ++// A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction. ++bool NativeInstruction::is_sigill_zombie_not_entrant() { ++ // jvmci ++ return uint_at(0) == 0xffffffff; +} -+#endif + -+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { -+ implementation(&cbuf, ra_, false, NULL); ++void NativeIllegalInstruction::insert(address code_pos) { ++ assert_cond(code_pos != NULL); ++ *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction +} + -+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { -+ return MachNode::size(ra_); ++bool NativeInstruction::is_stop() { ++ return uint_at(0) == 0xffffffff; // an illegal instruction +} + -+//============================================================================= ++//------------------------------------------------------------------- + -+#ifndef PRODUCT -+void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const { -+ assert_cond(ra_ != NULL && st != NULL); -+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); -+ int reg = ra_->get_reg_first(this); -+ st->print("add %s, sp, #%d\t# box lock", -+ Matcher::regName[reg], offset); -+} -+#endif ++// MT-safe inserting of a jump over a jump or a nop (used by ++// nmethod::make_not_entrant_or_zombie) + -+void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { -+ MacroAssembler _masm(&cbuf); ++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { + -+ assert_cond(ra_ != NULL); -+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); -+ int reg = ra_->get_encode(this); ++ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); + -+ if (is_imm_in_range(offset, 12, 0)) { -+ __ addi(as_Register(reg), sp, offset); -+ } else if (is_imm_in_range(offset, 32, 0)) { -+ __ li32(t0, offset); -+ __ add(as_Register(reg), sp, t0); -+ } else { -+ ShouldNotReachHere(); -+ } -+} ++ assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() || ++ nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(), ++ "riscv cannot replace non-jump with jump"); + -+uint BoxLockNode::size(PhaseRegAlloc *ra_) const { -+ // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_). -+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ // Patch this nmethod atomically. ++ if (Assembler::reachable_from_branch_at(verified_entry, dest)) { ++ ptrdiff_t offset = dest - verified_entry; ++ guarantee(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction."); // 1M + -+ if (is_imm_in_range(offset, 12, 0)) { -+ return NativeInstruction::instruction_size; ++ uint32_t insn = 0; ++ address pInsn = (address)&insn; ++ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); ++ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); ++ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); ++ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); ++ Assembler::patch(pInsn, 11, 7, 0); // zero, no link jump ++ Assembler::patch(pInsn, 6, 0, 0b1101111); // j, (jal x0 offset) ++ *(unsigned int*)verified_entry = insn; + } else { -+ return 3 * NativeInstruction::instruction_size; // lui + addiw + add; ++ // We use an illegal instruction for marking a method as ++ // not_entrant or zombie. ++ NativeIllegalInstruction::insert(verified_entry); + } ++ ++ ICache::invalidate_range(verified_entry, instruction_size); +} + -+//============================================================================= ++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { ++ CodeBuffer cb(code_pos, instruction_size); ++ MacroAssembler a(&cb); + -+#ifndef PRODUCT -+void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const -+{ -+ assert_cond(st != NULL); -+ st->print_cr("# MachUEPNode"); -+ if (UseCompressedClassPointers) { -+ st->print_cr("\tlw t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); -+ if (Universe::narrow_klass_shift() != 0) { -+ st->print_cr("\tdecode_klass_not_null t0, t0"); -+ } -+ } else { -+ st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); -+ } -+ st->print_cr("\tbne x10, t0, SharedRuntime::_ic_miss_stub\t # Inline cache check"); ++ int32_t offset = 0; ++ a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli ++ a.jalr(x0, t0, offset); // jalr ++ ++ ICache::invalidate_range(code_pos, instruction_size); +} -+#endif + -+void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const -+{ -+ // This is the unverified entry point. -+ MacroAssembler _masm(&cbuf); ++// MT-safe patching of a long jump instruction. ++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { ++ ShouldNotCallThis(); ++} + -+ Label skip; -+ __ cmp_klass(j_rarg0, t1, t0, skip); -+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); -+ __ bind(skip); ++ ++address NativeCallTrampolineStub::destination(nmethod *nm) const { ++ return ptr_at(data_offset); +} + -+uint MachUEPNode::size(PhaseRegAlloc* ra_) const -+{ -+ assert_cond(ra_ != NULL); -+ return MachNode::size(ra_); ++void NativeCallTrampolineStub::set_destination(address new_destination) { ++ set_ptr_at(data_offset, new_destination); ++ OrderAccess::release(); +} + -+// REQUIRED EMIT CODE ++uint32_t NativeMembar::get_kind() { ++ uint32_t insn = uint_at(0); + -+//============================================================================= ++ uint32_t predecessor = Assembler::extract(insn, 27, 24); ++ uint32_t successor = Assembler::extract(insn, 23, 20); + -+// Emit exception handler code. -+int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) -+{ -+ // la_patchable t0, #exception_blob_entry_point -+ // jr (offset)t0 -+ // or -+ // j #exception_blob_entry_point -+ // Note that the code buffer's insts_mark is always relative to insts. -+ // That's why we must use the macroassembler to generate a handler. -+ MacroAssembler _masm(&cbuf); -+ address base = __ start_a_stub(size_exception_handler()); -+ if (base == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return 0; // CodeBuffer::expand failed -+ } -+ int offset = __ offset(); -+ __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); -+ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); -+ __ end_a_stub(); -+ return offset; ++ return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor); +} + -+// Emit deopt handler code. -+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) -+{ -+ // Note that the code buffer's insts_mark is always relative to insts. -+ // That's why we must use the macroassembler to generate a handler. -+ MacroAssembler _masm(&cbuf); -+ address base = __ start_a_stub(size_deopt_handler()); -+ if (base == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return 0; // CodeBuffer::expand failed -+ } -+ int offset = __ offset(); ++void NativeMembar::set_kind(uint32_t order_kind) { ++ uint32_t predecessor = 0; ++ uint32_t successor = 0; + -+ __ auipc(ra, 0); -+ __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); ++ MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor); + -+ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); -+ __ end_a_stub(); -+ return offset; ++ uint32_t insn = uint_at(0); ++ address pInsn = (address) &insn; ++ Assembler::patch(pInsn, 27, 24, predecessor); ++ Assembler::patch(pInsn, 23, 20, successor); + ++ address membar = addr_at(0); ++ *(unsigned int*) membar = insn; +} -+// REQUIRED MATCHER CODE +diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp +new file mode 100644 +index 00000000000..718b2e3de6c +--- /dev/null ++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp +@@ -0,0 +1,572 @@ ++/* ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+//============================================================================= ++#ifndef CPU_RISCV_NATIVEINST_RISCV_HPP ++#define CPU_RISCV_NATIVEINST_RISCV_HPP + -+const bool Matcher::match_rule_supported(int opcode) { -+ if (!has_match_rule(opcode)) { -+ return false; -+ } ++#include "asm/assembler.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/os.hpp" + -+ switch (opcode) { -+ case Op_StrCompressedCopy: // fall through -+ case Op_StrInflatedCopy: // fall through -+ case Op_HasNegatives: -+ return UseRVV; -+ case Op_EncodeISOArray: -+ return UseRVV && SpecialEncodeISOArray; -+ case Op_PopCountI: -+ case Op_PopCountL: -+ return UsePopCountInstruction; -+ case Op_CountLeadingZerosI: -+ case Op_CountLeadingZerosL: -+ case Op_CountTrailingZerosI: -+ case Op_CountTrailingZerosL: -+ return UseZbb; -+ } ++// We have interfaces for the following instructions: ++// - NativeInstruction ++// - - NativeCall ++// - - NativeMovConstReg ++// - - NativeMovRegMem ++// - - NativeJump ++// - - NativeGeneralJump ++// - - NativeIllegalInstruction ++// - - NativeCallTrampolineStub ++// - - NativeMembar ++// - - NativeFenceI + -+ return true; // Per default match rules are supported. -+} ++// The base class for different kinds of native instruction abstractions. ++// Provides the primitive operations to manipulate code relative to this. + -+// Identify extra cases that we might want to provide match rules for vector nodes and -+// other intrinsics guarded with vector length (vlen). -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { -+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { -+ return false; ++class NativeCall; ++ ++class NativeInstruction { ++ friend class Relocation; ++ friend bool is_NativeCallTrampolineStub_at(address); ++ public: ++ enum { ++ instruction_size = 4, ++ compressed_instruction_size = 2, ++ }; ++ ++ juint encoding() const { ++ return uint_at(0); + } + -+ return op_vec_supported(opcode); -+} ++ bool is_jal() const { return is_jal_at(addr_at(0)); } ++ bool is_movptr() const { return is_movptr_at(addr_at(0)); } ++ bool is_call() const { return is_call_at(addr_at(0)); } ++ bool is_jump() const { return is_jump_at(addr_at(0)); } + -+const bool Matcher::has_predicated_vectors(void) { -+ return false; // not supported ++ static bool is_jal_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; } ++ static bool is_jalr_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; } ++ static bool is_branch_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; } ++ static bool is_ld_at(address instr) { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; } ++ static bool is_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; } ++ static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; } ++ static bool is_auipc_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; } ++ static bool is_jump_at(address instr) { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); } ++ static bool is_addi_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; } ++ static bool is_addiw_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; } ++ static bool is_lui_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; } ++ static bool is_slli_shift_at(address instr, uint32_t shift) { ++ assert_cond(instr != NULL); ++ return (extract_opcode(instr) == 0b0010011 && // opcode field ++ extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation ++ Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift); // shamt field ++ } + -+} ++ static Register extract_rs1(address instr); ++ static Register extract_rs2(address instr); ++ static Register extract_rd(address instr); ++ static uint32_t extract_opcode(address instr); ++ static uint32_t extract_funct3(address instr); + -+const int Matcher::float_pressure(int default_pressure_threshold) { -+ return default_pressure_threshold; -+} ++ // the instruction sequence of movptr is as below: ++ // lui ++ // addi ++ // slli ++ // addi ++ // slli ++ // addi/jalr/load ++ static bool check_movptr_data_dependency(address instr) { ++ address lui = instr; ++ address addi1 = lui + instruction_size; ++ address slli1 = addi1 + instruction_size; ++ address addi2 = slli1 + instruction_size; ++ address slli2 = addi2 + instruction_size; ++ address last_instr = slli2 + instruction_size; ++ return extract_rs1(addi1) == extract_rd(lui) && ++ extract_rs1(addi1) == extract_rd(addi1) && ++ extract_rs1(slli1) == extract_rd(addi1) && ++ extract_rs1(slli1) == extract_rd(slli1) && ++ extract_rs1(addi2) == extract_rd(slli1) && ++ extract_rs1(addi2) == extract_rd(addi2) && ++ extract_rs1(slli2) == extract_rd(addi2) && ++ extract_rs1(slli2) == extract_rd(slli2) && ++ extract_rs1(last_instr) == extract_rd(slli2); ++ } + -+int Matcher::regnum_to_fpu_offset(int regnum) -+{ -+ Unimplemented(); -+ return 0; -+} ++ // the instruction sequence of li64 is as below: ++ // lui ++ // addi ++ // slli ++ // addi ++ // slli ++ // addi ++ // slli ++ // addi ++ static bool check_li64_data_dependency(address instr) { ++ address lui = instr; ++ address addi1 = lui + instruction_size; ++ address slli1 = addi1 + instruction_size; ++ address addi2 = slli1 + instruction_size; ++ address slli2 = addi2 + instruction_size; ++ address addi3 = slli2 + instruction_size; ++ address slli3 = addi3 + instruction_size; ++ address addi4 = slli3 + instruction_size; ++ return extract_rs1(addi1) == extract_rd(lui) && ++ extract_rs1(addi1) == extract_rd(addi1) && ++ extract_rs1(slli1) == extract_rd(addi1) && ++ extract_rs1(slli1) == extract_rd(slli1) && ++ extract_rs1(addi2) == extract_rd(slli1) && ++ extract_rs1(addi2) == extract_rd(addi2) && ++ extract_rs1(slli2) == extract_rd(addi2) && ++ extract_rs1(slli2) == extract_rd(slli2) && ++ extract_rs1(addi3) == extract_rd(slli2) && ++ extract_rs1(addi3) == extract_rd(addi3) && ++ extract_rs1(slli3) == extract_rd(addi3) && ++ extract_rs1(slli3) == extract_rd(slli3) && ++ extract_rs1(addi4) == extract_rd(slli3) && ++ extract_rs1(addi4) == extract_rd(addi4); ++ } + -+// Is this branch offset short enough that a short branch can be used? -+// -+// NOTE: If the platform does not provide any short branch variants, then -+// this method should return false for offset 0. -+// |---label(L1)-----| -+// |-----------------| -+// |-----------------|----------eq: float------------------- -+// |-----------------| // far_cmpD_branch | cmpD_branch -+// |------- ---------| feq; | feq; -+// |-far_cmpD_branch-| beqz done; | bnez L; -+// |-----------------| j L; | -+// |-----------------| bind(done); | -+// |-----------------|-------------------------------------- -+// |-----------------| // so shortBrSize = br_size - 4; -+// |-----------------| // so offs = offset - shortBrSize + 4; -+// |---label(L2)-----| -+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { -+ // The passed offset is relative to address of the branch. -+ int shortBrSize = br_size - 4; -+ int offs = offset - shortBrSize + 4; -+ return (-4096 <= offs && offs < 4096); -+} ++ // the instruction sequence of li32 is as below: ++ // lui ++ // addiw ++ static bool check_li32_data_dependency(address instr) { ++ address lui = instr; ++ address addiw = lui + instruction_size; + -+const bool Matcher::isSimpleConstant64(jlong value) { -+ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. -+ // Probably always true, even if a temp register is required. -+ return true; -+} ++ return extract_rs1(addiw) == extract_rd(lui) && ++ extract_rs1(addiw) == extract_rd(addiw); ++ } + -+// true just means we have fast l2f conversion -+const bool Matcher::convL2FSupported(void) { -+ return true; -+} ++ // the instruction sequence of pc-relative is as below: ++ // auipc ++ // jalr/addi/load/float_load ++ static bool check_pc_relative_data_dependency(address instr) { ++ address auipc = instr; ++ address last_instr = auipc + instruction_size; + -+// Vector width in bytes. -+const int Matcher::vector_width_in_bytes(BasicType bt) { -+ if (UseRVV) { -+ // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV. -+ // MaxVectorSize == VM_Version::_initial_vector_length -+ return MaxVectorSize; ++ return extract_rs1(last_instr) == extract_rd(auipc); + } -+ return 0; -+} + -+// Limits on vector size (number of elements) loaded into vector. -+const int Matcher::max_vector_size(const BasicType bt) { -+ return vector_width_in_bytes(bt) / type2aelembytes(bt); -+} -+const int Matcher::min_vector_size(const BasicType bt) { -+ return max_vector_size(bt); -+} ++ // the instruction sequence of load_label is as below: ++ // auipc ++ // load ++ static bool check_load_pc_relative_data_dependency(address instr) { ++ address auipc = instr; ++ address load = auipc + instruction_size; + -+// Vector ideal reg. -+const uint Matcher::vector_ideal_reg(int len) { -+ assert(MaxVectorSize >= len, ""); -+ if (UseRVV) { -+ return Op_VecA; ++ return extract_rd(load) == extract_rd(auipc) && ++ extract_rs1(load) == extract_rd(load); + } + -+ ShouldNotReachHere(); -+ return 0; -+} ++ static bool is_movptr_at(address instr); ++ static bool is_li32_at(address instr); ++ static bool is_li64_at(address instr); ++ static bool is_pc_relative_at(address branch); ++ static bool is_load_pc_relative_at(address branch); + -+const uint Matcher::vector_shift_count_ideal_reg(int size) { -+ switch(size) { -+ case 8: return Op_VecD; -+ case 16: return Op_VecX; -+ default: -+ if (size == vector_width_in_bytes(T_BYTE)) { -+ return Op_VecA; -+ } ++ static bool is_call_at(address instr) { ++ if (is_jal_at(instr) || is_jalr_at(instr)) { ++ return true; ++ } ++ return false; + } -+ ShouldNotReachHere(); -+ return 0; -+} ++ static bool is_lwu_to_zr(address instr); + -+const bool Matcher::supports_scalable_vector() { -+ return UseRVV; -+} ++ inline bool is_nop(); ++ inline bool is_jump_or_nop(); ++ bool is_safepoint_poll(); ++ bool is_sigill_zombie_not_entrant(); ++ bool is_stop(); + -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return Matcher::max_vector_size(bt); -+} ++ protected: ++ address addr_at(int offset) const { return address(this) + offset; } + -+// AES support not yet implemented -+const bool Matcher::pass_original_key_for_aes() { -+ return false; -+} ++ jint int_at(int offset) const { return *(jint*) addr_at(offset); } ++ juint uint_at(int offset) const { return *(juint*) addr_at(offset); } + -+// riscv supports misaligned vectors store/load. -+const bool Matcher::misaligned_vectors_ok() { -+ return true; -+} ++ address ptr_at(int offset) const { return *(address*) addr_at(offset); } + -+// false => size gets scaled to BytesPerLong, ok. -+const bool Matcher::init_array_count_is_in_bytes = false; ++ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } + -+// Use conditional move (CMOVL) -+const int Matcher::long_cmove_cost() { -+ // long cmoves are no more expensive than int cmoves -+ return 0; -+} + -+const int Matcher::float_cmove_cost() { -+ // float cmoves are no more expensive than int cmoves -+ return 0; -+} ++ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; } ++ void set_uint_at(int offset, jint i) { *(juint*)addr_at(offset) = i; } ++ void set_ptr_at (int offset, address ptr) { *(address*) addr_at(offset) = ptr; } ++ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; } + -+// Does the CPU require late expand (see block.cpp for description of late expand)? -+const bool Matcher::require_postalloc_expand = false; ++ public: + -+// Do we need to mask the count passed to shift instructions or does -+// the cpu only look at the lower 5/6 bits anyway? -+const bool Matcher::need_masked_shift_count = false; ++ inline friend NativeInstruction* nativeInstruction_at(address addr); + -+// This affects two different things: -+// - how Decode nodes are matched -+// - how ImplicitNullCheck opportunities are recognized -+// If true, the matcher will try to remove all Decodes and match them -+// (as operands) into nodes. NullChecks are not prepared to deal with -+// Decodes by final_graph_reshaping(). -+// If false, final_graph_reshaping() forces the decode behind the Cmp -+// for a NullCheck. The matcher matches the Decode node into a register. -+// Implicit_null_check optimization moves the Decode along with the -+// memory operation back up before the NullCheck. -+bool Matcher::narrow_oop_use_complex_address() { -+ return Universe::narrow_oop_shift() == 0; -+} ++ static bool maybe_cpool_ref(address instr) { ++ return is_auipc_at(instr); ++ } + -+bool Matcher::narrow_klass_use_complex_address() { -+// TODO -+// decide whether we need to set this to true -+ return false; -+} ++ bool is_membar() { ++ return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0; ++ } ++}; + -+bool Matcher::const_oop_prefer_decode() { -+ // Prefer ConN+DecodeN over ConP in simple compressed oops mode. -+ return Universe::narrow_oop_base() == NULL; ++inline NativeInstruction* nativeInstruction_at(address addr) { ++ return (NativeInstruction*)addr; +} + -+bool Matcher::const_klass_prefer_decode() { -+ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. -+ return Universe::narrow_klass_base() == NULL; ++// The natural type of an RISCV instruction is uint32_t ++inline NativeInstruction* nativeInstruction_at(uint32_t *addr) { ++ return (NativeInstruction*)addr; +} + -+// Is it better to copy float constants, or load them directly from -+// memory? Intel can load a float constant from a direct address, -+// requiring no extra registers. Most RISCs will have to materialize -+// an address into a register first, so they would do better to copy -+// the constant from stack. -+const bool Matcher::rematerialize_float_constants = false; ++inline NativeCall* nativeCall_at(address addr); ++// The NativeCall is an abstraction for accessing/manipulating native ++// call instructions (used to manipulate inline caches, primitive & ++// DSO calls, etc.). + -+// If CPU can load and store mis-aligned doubles directly then no -+// fixup is needed. Else we split the double into 2 integer pieces -+// and move it piece-by-piece. Only happens when passing doubles into -+// C code as the Java calling convention forces doubles to be aligned. -+const bool Matcher::misaligned_doubles_ok = true; ++class NativeCall: public NativeInstruction { ++ public: ++ enum RISCV_specific_constants { ++ instruction_size = 4, ++ instruction_offset = 0, ++ displacement_offset = 0, ++ return_address_offset = 4 ++ }; + -+// No-op on amd64 -+void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { -+ Unimplemented(); -+} ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address next_instruction_address() const { return addr_at(return_address_offset); } ++ address return_address() const { return addr_at(return_address_offset); } ++ address destination() const; + -+// Advertise here if the CPU requires explicit rounding operations to -+// implement the UseStrictFP mode. -+const bool Matcher::strict_fp_requires_explicit_rounding = false; ++ void set_destination(address dest) { ++ assert(is_jal(), "Should be jal instruction!"); ++ intptr_t offset = (intptr_t)(dest - instruction_address()); ++ assert((offset & 0x1) == 0, "bad alignment"); ++ assert(is_imm_in_range(offset, 20, 1), "encoding constraint"); ++ unsigned int insn = 0b1101111; // jal ++ address pInsn = (address)(&insn); ++ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); ++ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); ++ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); ++ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); ++ Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra ++ set_int_at(displacement_offset, insn); ++ } + -+// Are floats converted to double when stored to stack during -+// deoptimization? -+bool Matcher::float_in_double() { return false; } ++ void verify_alignment() {} // do nothing on riscv ++ void verify(); ++ void print(); + -+// Do ints take an entire long register or just half? -+// The relevant question is how the int is callee-saved: -+// the whole long is written but de-opt'ing will have to extract -+// the relevant 32 bits. -+const bool Matcher::int_in_long = true; ++ // Creation ++ inline friend NativeCall* nativeCall_at(address addr); ++ inline friend NativeCall* nativeCall_before(address return_address); + -+// Return whether or not this register is ever used as an argument. -+// This function is used on startup to build the trampoline stubs in -+// generateOptoStub. Registers not mentioned will be killed by the VM -+// call in the trampoline, and arguments in those registers not be -+// available to the callee. -+bool Matcher::can_be_java_arg(int reg) -+{ -+ return -+ reg == R10_num || reg == R10_H_num || -+ reg == R11_num || reg == R11_H_num || -+ reg == R12_num || reg == R12_H_num || -+ reg == R13_num || reg == R13_H_num || -+ reg == R14_num || reg == R14_H_num || -+ reg == R15_num || reg == R15_H_num || -+ reg == R16_num || reg == R16_H_num || -+ reg == R17_num || reg == R17_H_num || -+ reg == F10_num || reg == F10_H_num || -+ reg == F11_num || reg == F11_H_num || -+ reg == F12_num || reg == F12_H_num || -+ reg == F13_num || reg == F13_H_num || -+ reg == F14_num || reg == F14_H_num || -+ reg == F15_num || reg == F15_H_num || -+ reg == F16_num || reg == F16_H_num || -+ reg == F17_num || reg == F17_H_num; -+} ++ static bool is_call_before(address return_address) { ++ return is_call_at(return_address - NativeCall::return_address_offset); ++ } + -+bool Matcher::is_spillable_arg(int reg) -+{ -+ return can_be_java_arg(reg); -+} ++ // MT-safe patching of a call instruction. ++ static void insert(address code_pos, address entry); + -+bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { -+ return false; -+} ++ static void replace_mt_safe(address instr_addr, address code_buffer); + -+RegMask Matcher::divI_proj_mask() { -+ ShouldNotReachHere(); -+ return RegMask(); -+} ++ // Similar to replace_mt_safe, but just changes the destination. The ++ // important thing is that free-running threads are able to execute ++ // this call instruction at all times. If the call is an immediate BL ++ // instruction we can simply rely on atomicity of 32-bit writes to ++ // make sure other threads will see no intermediate states. + -+// Register for MODI projection of divmodI. -+RegMask Matcher::modI_proj_mask() { -+ ShouldNotReachHere(); -+ return RegMask(); -+} ++ // We cannot rely on locks here, since the free-running threads must run at ++ // full speed. ++ // ++ // Used in the runtime linkage of calls; see class CompiledIC. ++ // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.) + -+// Register for DIVL projection of divmodL. -+RegMask Matcher::divL_proj_mask() { -+ ShouldNotReachHere(); -+ return RegMask(); -+} ++ // The parameter assert_lock disables the assertion during code generation. ++ void set_destination_mt_safe(address dest, bool assert_lock = true); + -+// Register for MODL projection of divmodL. -+RegMask Matcher::modL_proj_mask() { -+ ShouldNotReachHere(); -+ return RegMask(); ++ address get_trampoline(); ++}; ++ ++inline NativeCall* nativeCall_at(address addr) { ++ assert_cond(addr != NULL); ++ NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; +} + -+const RegMask Matcher::method_handle_invoke_SP_save_mask() { -+ return FP_REG_mask(); ++inline NativeCall* nativeCall_before(address return_address) { ++ assert_cond(return_address != NULL); ++ NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; +} + -+bool size_fits_all_mem_uses(AddPNode* addp, int shift) { -+ assert_cond(addp != NULL); -+ for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) { -+ Node* u = addp->fast_out(i); -+ if (u != NULL && u->is_Mem()) { -+ int opsize = u->as_Mem()->memory_size(); -+ assert(opsize > 0, "unexpected memory operand size"); -+ if (u->as_Mem()->memory_size() != (1 << shift)) { -+ return false; ++// An interface for accessing/manipulating native mov reg, imm instructions. ++// (used to manipulate inlined 64-bit data calls, etc.) ++class NativeMovConstReg: public NativeInstruction { ++ public: ++ enum RISCV_specific_constants { ++ movptr_instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi. See movptr(). ++ movptr_with_offset_instruction_size = 5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset(). ++ load_pc_relative_instruction_size = 2 * NativeInstruction::instruction_size, // auipc, ld ++ instruction_offset = 0, ++ displacement_offset = 0 ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address next_instruction_address() const { ++ // if the instruction at 5 * instruction_size is addi, ++ // it means a lui + addi + slli + addi + slli + addi instruction sequence, ++ // and the next instruction address should be addr_at(6 * instruction_size). ++ // However, when the instruction at 5 * instruction_size isn't addi, ++ // the next instruction address should be addr_at(5 * instruction_size) ++ if (nativeInstruction_at(instruction_address())->is_movptr()) { ++ if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) { ++ // Assume: lui, addi, slli, addi, slli, addi ++ return addr_at(movptr_instruction_size); ++ } else { ++ // Assume: lui, addi, slli, addi, slli ++ return addr_at(movptr_with_offset_instruction_size); + } ++ } else if (is_load_pc_relative_at(instruction_address())) { ++ // Assume: auipc, ld ++ return addr_at(load_pc_relative_instruction_size); + } ++ guarantee(false, "Unknown instruction in NativeMovConstReg"); ++ return NULL; + } -+ return true; -+} -+ -+const bool Matcher::convi2l_type_required = false; + -+// Should the Matcher clone shifts on addressing modes, expecting them -+// to be subsumed into complex addressing expressions or compute them -+// into registers? -+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { -+ assert_cond(m != NULL); -+ if (clone_base_plus_offset_address(m, mstack, address_visited)) { -+ return true; -+ } ++ intptr_t data() const; ++ void set_data(intptr_t x); + -+ Node *off = m->in(AddPNode::Offset); -+ if (off != NULL && off->Opcode() == Op_LShiftL && off->in(2)->is_Con() && -+ size_fits_all_mem_uses(m, off->in(2)->get_int()) && -+ // Are there other uses besides address expressions? -+ !is_visited(off)) { -+ address_visited.set(off->_idx); // Flag as address_visited -+ mstack.push(off->in(2), Visit); -+ Node *conv = off->in(1); -+ if (conv->Opcode() == Op_ConvI2L && -+ // Are there other uses besides address expressions? -+ !is_visited(conv)) { -+ address_visited.set(conv->_idx); // Flag as address_visited -+ mstack.push(conv->in(1), Pre_Visit); -+ } else { -+ mstack.push(conv, Pre_Visit); ++ void flush() { ++ if (!maybe_cpool_ref(instruction_address())) { ++ ICache::invalidate_range(instruction_address(), movptr_instruction_size); + } -+ address_visited.test_set(m->_idx); // Flag as address_visited -+ mstack.push(m->in(AddPNode::Address), Pre_Visit); -+ mstack.push(m->in(AddPNode::Base), Pre_Visit); -+ return true; -+ } else if (off != NULL && off->Opcode() == Op_ConvI2L && -+ // Are there other uses besides address expressions? -+ !is_visited(off)) { -+ address_visited.test_set(m->_idx); // Flag as address_visited -+ address_visited.set(off->_idx); // Flag as address_visited -+ mstack.push(off->in(1), Pre_Visit); -+ mstack.push(m->in(AddPNode::Address), Pre_Visit); -+ mstack.push(m->in(AddPNode::Base), Pre_Visit); -+ return true; + } -+ return false; -+} -+ -+void Compile::reshape_address(AddPNode* addp) { -+} -+ -+%} + ++ void verify(); ++ void print(); + ++ // Creation ++ inline friend NativeMovConstReg* nativeMovConstReg_at(address addr); ++ inline friend NativeMovConstReg* nativeMovConstReg_before(address addr); ++}; + -+//----------ENCODING BLOCK----------------------------------------------------- -+// This block specifies the encoding classes used by the compiler to -+// output byte streams. Encoding classes are parameterized macros -+// used by Machine Instruction Nodes in order to generate the bit -+// encoding of the instruction. Operands specify their base encoding -+// interface with the interface keyword. There are currently -+// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & -+// COND_INTER. REG_INTER causes an operand to generate a function -+// which returns its register number when queried. CONST_INTER causes -+// an operand to generate a function which returns the value of the -+// constant when queried. MEMORY_INTER causes an operand to generate -+// four functions which return the Base Register, the Index Register, -+// the Scale Value, and the Offset Value of the operand when queried. -+// COND_INTER causes an operand to generate six functions which return -+// the encoding code (ie - encoding bits for the instruction) -+// associated with each basic boolean condition for a conditional -+// instruction. -+// -+// Instructions specify two basic values for encoding. Again, a -+// function is available to check if the constant displacement is an -+// oop. They use the ins_encode keyword to specify their encoding -+// classes (which must be a sequence of enc_class names, and their -+// parameters, specified in the encoding block), and they use the -+// opcode keyword to specify, in order, their primary, secondary, and -+// tertiary opcode. Only the opcode sections which a particular -+// instruction needs for encoding need to be specified. -+encode %{ -+ // BEGIN Non-volatile memory access ++inline NativeMovConstReg* nativeMovConstReg_at(address addr) { ++ assert_cond(addr != NULL); ++ NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} + -+ enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{ -+ MacroAssembler _masm(&cbuf); -+ int64_t con = (int64_t)$src$$constant; -+ Register dst_reg = as_Register($dst$$reg); -+ __ mv(dst_reg, con); -+ %} ++inline NativeMovConstReg* nativeMovConstReg_before(address addr) { ++ assert_cond(addr != NULL); ++ NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} + -+ enc_class riscv_enc_mov_p(iRegP dst, immP src) %{ -+ MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ address con = (address)$src$$constant; -+ if (con == NULL || con == (address)1) { -+ ShouldNotReachHere(); -+ } else { -+ relocInfo::relocType rtype = $src->constant_reloc(); -+ if (rtype == relocInfo::oop_type) { -+ __ movoop(dst_reg, (jobject)con, /*immediate*/true); -+ } else if (rtype == relocInfo::metadata_type) { -+ __ mov_metadata(dst_reg, (Metadata*)con); -+ } else { -+ assert(rtype == relocInfo::none, "unexpected reloc type"); -+ __ mv(dst_reg, $src$$constant); -+ } -+ } -+ %} ++// RISCV should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented. ++class NativeMovRegMem: public NativeInstruction { ++ public: ++ int instruction_start() const { ++ Unimplemented(); ++ return 0; ++ } + -+ enc_class riscv_enc_mov_p1(iRegP dst) %{ -+ MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ __ mv(dst_reg, 1); -+ %} ++ address instruction_address() const { ++ Unimplemented(); ++ return NULL; ++ } + -+ enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{ -+ MacroAssembler _masm(&cbuf); -+ int32_t offset = 0; -+ address page = (address)$src$$constant; -+ unsigned long align = (unsigned long)page & 0xfff; -+ assert(align == 0, "polling page must be page aligned"); -+ Register dst_reg = as_Register($dst$$reg); -+ __ la_patchable(dst_reg, Address(page, relocInfo::poll_type), offset); -+ __ addi(dst_reg, dst_reg, offset); -+ %} ++ int num_bytes_to_end_of_patch() const { ++ Unimplemented(); ++ return 0; ++ } + -+ enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{ -+ MacroAssembler _masm(&cbuf); -+ __ load_byte_map_base($dst$$Register); -+ %} ++ int offset() const; + -+ enc_class riscv_enc_mov_n(iRegN dst, immN src) %{ -+ MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ address con = (address)$src$$constant; -+ if (con == NULL) { -+ ShouldNotReachHere(); -+ } else { -+ relocInfo::relocType rtype = $src->constant_reloc(); -+ assert(rtype == relocInfo::oop_type, "unexpected reloc type"); -+ __ set_narrow_oop(dst_reg, (jobject)con); -+ } -+ %} ++ void set_offset(int x); + -+ enc_class riscv_enc_mov_zero(iRegNorP dst) %{ -+ MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ __ mv(dst_reg, zr); -+ %} ++ void add_offset_in_bytes(int add_offset) { Unimplemented(); } + -+ enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{ -+ MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ address con = (address)$src$$constant; -+ if (con == NULL) { -+ ShouldNotReachHere(); -+ } else { -+ relocInfo::relocType rtype = $src->constant_reloc(); -+ assert(rtype == relocInfo::metadata_type, "unexpected reloc type"); -+ __ set_narrow_klass(dst_reg, (Klass *)con); -+ } -+ %} ++ void verify(); ++ void print(); + -+ enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -+ MacroAssembler _masm(&cbuf); -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ /*result as bool*/ true); -+ %} ++ private: ++ inline friend NativeMovRegMem* nativeMovRegMem_at (address addr); ++}; + -+ enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -+ MacroAssembler _masm(&cbuf); -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ /*result as bool*/ true); -+ %} ++inline NativeMovRegMem* nativeMovRegMem_at (address addr) { ++ Unimplemented(); ++ return NULL; ++} + -+ enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ -+ MacroAssembler _masm(&cbuf); -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ /*result as bool*/ true); -+ %} ++class NativeJump: public NativeInstruction { ++ public: ++ enum RISCV_specific_constants { ++ instruction_size = NativeInstruction::instruction_size, ++ instruction_offset = 0, ++ data_offset = 0, ++ next_instruction_offset = NativeInstruction::instruction_size ++ }; + -+ enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -+ MacroAssembler _masm(&cbuf); -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ /*result as bool*/ true); -+ %} ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address next_instruction_address() const { return addr_at(instruction_size); } ++ address jump_destination() const; ++ void set_jump_destination(address dest); + -+ enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -+ MacroAssembler _masm(&cbuf); -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ /*result as bool*/ true); -+ %} ++ // Creation ++ inline friend NativeJump* nativeJump_at(address address); + -+ enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ -+ MacroAssembler _masm(&cbuf); -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ /*result as bool*/ true); -+ %} ++ void verify(); + -+ // compare and branch instruction encodings ++ // Insertion of native jump instruction ++ static void insert(address code_pos, address entry); ++ // MT-safe insertion of native jump at verified method entry ++ static void check_verified_entry_alignment(address entry, address verified_entry); ++ static void patch_verified_entry(address entry, address verified_entry, address dest); ++}; + -+ enc_class riscv_enc_j(label lbl) %{ -+ MacroAssembler _masm(&cbuf); -+ Label* L = $lbl$$label; -+ __ j(*L); -+ %} ++inline NativeJump* nativeJump_at(address addr) { ++ NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset); ++#ifdef ASSERT ++ jump->verify(); ++#endif ++ return jump; ++} + -+ enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{ -+ MacroAssembler _masm(&cbuf); -+ Label* L = $lbl$$label; -+ switch($cmp$$cmpcode) { -+ case(BoolTest::ge): -+ __ j(*L); -+ break; -+ case(BoolTest::lt): -+ break; -+ default: -+ Unimplemented(); -+ } -+ %} ++class NativeGeneralJump: public NativeJump { ++public: ++ enum RISCV_specific_constants { ++ instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr ++ instruction_offset = 0, ++ data_offset = 0, ++ next_instruction_offset = 6 * NativeInstruction::instruction_size // lui, addi, slli, addi, slli, jalr ++ }; + -+ // call instruction encodings ++ address jump_destination() const; + -+ enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{ -+ Register sub_reg = as_Register($sub$$reg); -+ Register super_reg = as_Register($super$$reg); -+ Register temp_reg = as_Register($temp$$reg); -+ Register result_reg = as_Register($result$$reg); -+ Register cr_reg = t1; ++ static void insert_unconditional(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++}; + -+ Label miss; -+ Label done; -+ MacroAssembler _masm(&cbuf); -+ __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg, -+ NULL, &miss); -+ if ($primary) { -+ __ mv(result_reg, zr); -+ } else { -+ __ mv(cr_reg, zr); -+ __ j(done); -+ } ++inline NativeGeneralJump* nativeGeneralJump_at(address addr) { ++ assert_cond(addr != NULL); ++ NativeGeneralJump* jump = (NativeGeneralJump*)(addr); ++ debug_only(jump->verify();) ++ return jump; ++} + -+ __ bind(miss); -+ if (!$primary) { -+ __ mv(cr_reg, 1); -+ } ++class NativeIllegalInstruction: public NativeInstruction { ++ public: ++ // Insert illegal opcode as specific address ++ static void insert(address code_pos); ++}; + -+ __ bind(done); -+ %} ++inline bool NativeInstruction::is_nop() { ++ uint32_t insn = *(uint32_t*)addr_at(0); ++ return insn == 0x13; ++} + -+ enc_class riscv_enc_java_static_call(method meth) %{ -+ MacroAssembler _masm(&cbuf); ++inline bool NativeInstruction::is_jump_or_nop() { ++ return is_nop() || is_jump(); ++} + -+ address addr = (address)$meth$$method; -+ address call = NULL; -+ assert_cond(addr != NULL); -+ if (!_method) { -+ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. -+ call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type)); -+ if (call == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } -+ } else { -+ int method_index = resolved_method_index(cbuf); -+ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) -+ : static_call_Relocation::spec(method_index); -+ call = __ trampoline_call(Address(addr, rspec)); -+ if (call == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } -+ // Emit stub for static call -+ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, call); -+ if (stub == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } -+ } ++// Call trampoline stubs. ++class NativeCallTrampolineStub : public NativeInstruction { ++ public: + -+ %} ++ enum RISCV_specific_constants { ++ // Refer to function emit_trampoline_stub. ++ instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address ++ data_offset = 3 * NativeInstruction::instruction_size, // auipc + ld + jr ++ }; + -+ enc_class riscv_enc_java_dynamic_call(method meth) %{ -+ MacroAssembler _masm(&cbuf); -+ int method_index = resolved_method_index(cbuf); -+ address call = __ ic_call((address)$meth$$method, method_index); -+ if (call == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } -+ %} ++ address destination(nmethod *nm = NULL) const; ++ void set_destination(address new_destination); ++ ptrdiff_t destination_offset() const; ++}; + -+ enc_class riscv_enc_call_epilog() %{ -+ MacroAssembler _masm(&cbuf); -+ if (VerifyStackAtCalls) { -+ // Check that stack depth is unchanged: find majik cookie on stack -+ __ call_Unimplemented(); -+ } -+ %} ++inline bool is_NativeCallTrampolineStub_at(address addr) { ++ // Ensure that the stub is exactly ++ // ld t0, L--->auipc + ld ++ // jr t0 ++ // L: + -+ enc_class riscv_enc_java_to_runtime(method meth) %{ -+ MacroAssembler _masm(&cbuf); ++ // judge inst + register + imm ++ // 1). check the instructions: auipc + ld + jalr ++ // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0 ++ // 3). check if the offset in ld[31:20] equals the data_offset ++ assert_cond(addr != NULL); ++ const int instr_size = NativeInstruction::instruction_size; ++ if (NativeInstruction::is_auipc_at(addr) && ++ NativeInstruction::is_ld_at(addr + instr_size) && ++ NativeInstruction::is_jalr_at(addr + 2 * instr_size) && ++ (NativeInstruction::extract_rd(addr) == x5) && ++ (NativeInstruction::extract_rd(addr + instr_size) == x5) && ++ (NativeInstruction::extract_rs1(addr + instr_size) == x5) && ++ (NativeInstruction::extract_rs1(addr + 2 * instr_size) == x5) && ++ (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) { ++ return true; ++ } ++ return false; ++} + -+ // some calls to generated routines (arraycopy code) are scheduled -+ // by C2 as runtime calls. if so we can call them using a jr (they -+ // will be in a reachable segment) otherwise we have to use a jalr -+ // which loads the absolute address into a register. -+ address entry = (address)$meth$$method; -+ CodeBlob *cb = CodeCache::find_blob(entry); -+ if (cb != NULL) { -+ address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type)); -+ if (call == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } -+ } else { -+ Label retaddr; -+ __ la(t1, retaddr); -+ __ la(t0, RuntimeAddress(entry)); -+ // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc() -+ __ addi(sp, sp, -2 * wordSize); -+ __ sd(t1, Address(sp, wordSize)); -+ __ jalr(t0); -+ __ bind(retaddr); -+ __ addi(sp, sp, 2 * wordSize); -+ } -+ %} ++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { ++ assert_cond(addr != NULL); ++ assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found"); ++ return (NativeCallTrampolineStub*)addr; ++} + -+ // using the cr register as the bool result: 0 for success; others failed. -+ enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{ -+ MacroAssembler _masm(&cbuf); -+ Register flag = t1; -+ Register oop = as_Register($object$$reg); -+ Register box = as_Register($box$$reg); -+ Register disp_hdr = as_Register($tmp$$reg); -+ Register tmp = as_Register($tmp2$$reg); -+ Label cont; -+ Label object_has_monitor; ++class NativeMembar : public NativeInstruction { ++public: ++ uint32_t get_kind(); ++ void set_kind(uint32_t order_kind); ++}; + -+ assert_different_registers(oop, box, tmp, disp_hdr, t0); ++inline NativeMembar *NativeMembar_at(address addr) { ++ assert_cond(addr != NULL); ++ assert(nativeInstruction_at(addr)->is_membar(), "no membar found"); ++ return (NativeMembar*)addr; ++} + -+ // Load markOop from object into displaced_header. -+ __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); ++class NativeFenceI : public NativeInstruction { ++public: ++ static inline int instruction_size() { ++ // 2 for fence.i + fence ++ return (UseConservativeFence ? 2 : 1) * NativeInstruction::instruction_size; ++ } ++}; + -+ // Always do locking in runtime. -+ if (EmitSync & 0x01) { -+ __ mv(flag, 1); -+ return; -+ } ++#endif // CPU_RISCV_NATIVEINST_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.cpp b/src/hotspot/cpu/riscv/registerMap_riscv.cpp +new file mode 100644 +index 00000000000..26c1edc36ff +--- /dev/null ++++ b/src/hotspot/cpu/riscv/registerMap_riscv.cpp +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ if (UseBiasedLocking && !UseOptoBiasInlining) { -+ // ignore slow case here -+ __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag); -+ } ++#include "precompiled.hpp" ++#include "runtime/registerMap.hpp" ++#include "vmreg_riscv.inline.hpp" + -+ // Check for existing monitor -+ if ((EmitSync & 0x02) == 0) { -+ __ andi(t0, disp_hdr, markOopDesc::monitor_value); -+ __ bnez(t0, object_has_monitor); ++address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const { ++ if (base_reg->is_VectorRegister()) { ++ assert(base_reg->is_concrete(), "must pass base reg"); ++ int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) / ++ VectorRegisterImpl::max_slots_per_register; ++ intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size; ++ address base_location = location(base_reg); ++ if (base_location != NULL) { ++ return base_location + offset_in_bytes; ++ } else { ++ return NULL; + } ++ } else { ++ return location(base_reg->next(slot_idx)); ++ } ++} +diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp +new file mode 100644 +index 00000000000..f34349811a9 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // Set tmp to be (markOop of object | UNLOCK_VALUE). -+ __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); -+ -+ // Initialize the box. (Must happen before we update the object mark!) -+ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); -+ -+ // Compare object markOop with an unlocked value (tmp) and if -+ // equal exchange the stack address of our box with object markOop. -+ // On failure disp_hdr contains the possibly locked markOop. -+ __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, -+ Assembler::rl, /*result*/disp_hdr); -+ __ mv(flag, zr); -+ __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas -+ -+ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); -+ -+ // If the compare-and-exchange succeeded, then we found an unlocked -+ // object, will have now locked it will continue at label cont -+ // We did not see an unlocked object so try the fast recursive case. -+ -+ // Check if the owner is self by comparing the value in the -+ // markOop of object (disp_hdr) with the stack pointer. -+ __ sub(disp_hdr, disp_hdr, sp); -+ __ mv(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); -+ // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, -+ // hence we can store 0 as the displaced header in the box, which indicates that it is a -+ // recursive lock. -+ __ andr(tmp/*==0?*/, disp_hdr, tmp); -+ __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); -+ __ mv(flag, tmp); // we can use the value of tmp as the result here ++#ifndef CPU_RISCV_REGISTERMAP_RISCV_HPP ++#define CPU_RISCV_REGISTERMAP_RISCV_HPP + -+ if ((EmitSync & 0x02) == 0) { -+ __ j(cont); ++// machine-dependent implemention for register maps ++ friend class frame; + -+ // Handle existing monitor. -+ __ bind(object_has_monitor); -+ // The object's monitor m is unlocked iff m->owner == NULL, -+ // otherwise m->owner may contain a thread or a stack address. -+ // -+ // Try to CAS m->owner from NULL to current thread. -+ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value)); -+ __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, -+ Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) ++ private: ++ // This is the hook for finding a register in an "well-known" location, ++ // such as a register block of a predetermined format. ++ address pd_location(VMReg reg) const { return NULL; } ++ address pd_location(VMReg base_reg, int slot_idx) const; + -+ // Store a non-null value into the box to avoid looking like a re-entrant -+ // lock. The fast-path monitor unlock code checks for -+ // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the -+ // relevant bit set, and also matches ObjectSynchronizer::slow_enter. -+ __ mv(tmp, (address)markOopDesc::unused_mark()); -+ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); -+ } ++ // no PD state to clear or copy: ++ void pd_clear() {} ++ void pd_initialize() {} ++ void pd_initialize_from(const RegisterMap* map) {} + -+ __ bind(cont); -+ %} ++#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp +new file mode 100644 +index 00000000000..f8116e9df8c +--- /dev/null ++++ b/src/hotspot/cpu/riscv/register_riscv.cpp +@@ -0,0 +1,73 @@ ++/* ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // using cr flag to indicate the fast_unlock result: 0 for success; others failed. -+ enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{ -+ MacroAssembler _masm(&cbuf); -+ Register flag = t1; -+ Register oop = as_Register($object$$reg); -+ Register box = as_Register($box$$reg); -+ Register disp_hdr = as_Register($tmp$$reg); -+ Register tmp = as_Register($tmp2$$reg); -+ Label cont; -+ Label object_has_monitor; ++#include "precompiled.hpp" ++#include "register_riscv.hpp" + -+ assert_different_registers(oop, box, tmp, disp_hdr, flag); ++REGISTER_IMPL_DEFINITION(Register, RegisterImpl, RegisterImpl::number_of_registers); ++REGISTER_IMPL_DEFINITION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers); ++REGISTER_IMPL_DEFINITION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers); + -+ // Always do locking in runtime. -+ if (EmitSync & 0x01) { -+ __ mv(flag, 1); -+ return; -+ } ++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * ++ RegisterImpl::max_slots_per_register; + -+ if (UseBiasedLocking && !UseOptoBiasInlining) { -+ __ biased_locking_exit(oop, tmp, cont, flag); -+ } ++const int ConcreteRegisterImpl::max_fpr = ++ ConcreteRegisterImpl::max_gpr + ++ FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; + -+ // Find the lock address and load the displaced header from the stack. -+ __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++const int ConcreteRegisterImpl::max_vpr = ++ ConcreteRegisterImpl::max_fpr + ++ VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register; + -+ // If the displaced header is 0, we have a recursive unlock. -+ __ mv(flag, disp_hdr); -+ __ beqz(disp_hdr, cont); + -+ // Handle existing monitor. -+ if ((EmitSync & 0x02) == 0) { -+ __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); -+ __ andi(t0, disp_hdr, markOopDesc::monitor_value); -+ __ bnez(t0, object_has_monitor); -+ } ++const char* RegisterImpl::name() const { ++ static const char *const names[number_of_registers] = { ++ "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9", ++ "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7", ++ "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals", ++ "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} + -+ // Check if it is still a light weight lock, this is true if we -+ // see the stack address of the basicLock in the markOop of the -+ // object. ++const char* FloatRegisterImpl::name() const { ++ static const char *const names[number_of_registers] = { ++ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", ++ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", ++ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", ++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} + -+ __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, -+ Assembler::rl, /*result*/tmp); -+ __ xorr(flag, box, tmp); // box == tmp if cas succeeds -+ __ j(cont); ++const char* VectorRegisterImpl::name() const { ++ static const char *const names[number_of_registers] = { ++ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", ++ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", ++ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", ++ "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} +diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp +new file mode 100644 +index 00000000000..a9200cac647 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/register_riscv.hpp +@@ -0,0 +1,324 @@ ++/* ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); ++#ifndef CPU_RISCV_REGISTER_RISCV_HPP ++#define CPU_RISCV_REGISTER_RISCV_HPP + -+ // Handle existing monitor. -+ if ((EmitSync & 0x02) == 0) { -+ __ bind(object_has_monitor); -+ __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor -+ __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); -+ __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); -+ __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. -+ __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions -+ __ bnez(flag, cont); ++#include "asm/register.hpp" + -+ __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); -+ __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); -+ __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. -+ __ bnez(flag, cont); -+ // need a release store here -+ __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sd(zr, Address(tmp)); // set unowned -+ } ++#define CSR_FFLAGS 0x001 // Floating-Point Accrued Exceptions. ++#define CSR_FRM 0x002 // Floating-Point Dynamic Rounding Mode. ++#define CSR_FCSR 0x003 // Floating-Point Control and Status Register (frm + fflags). ++#define CSR_VSTART 0x008 // Vector start position ++#define CSR_VXSAT 0x009 // Fixed-Point Saturate Flag ++#define CSR_VXRM 0x00A // Fixed-Point Rounding Mode ++#define CSR_VCSR 0x00F // Vector control and status register ++#define CSR_VL 0xC20 // Vector length ++#define CSR_VTYPE 0xC21 // Vector data type register ++#define CSR_VLENB 0xC22 // VLEN/8 (vector register length in bytes) ++#define CSR_CYCLE 0xc00 // Cycle counter for RDCYCLE instruction. ++#define CSR_TIME 0xc01 // Timer for RDTIME instruction. ++#define CSR_INSTERT 0xc02 // Instructions-retired counter for RDINSTRET instruction. + -+ __ bind(cont); -+ %} ++class VMRegImpl; ++typedef VMRegImpl* VMReg; + -+ // arithmetic encodings ++// Use Register as shortcut ++class RegisterImpl; ++typedef const RegisterImpl* Register; + -+ enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ -+ MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ Register src1_reg = as_Register($src1$$reg); -+ Register src2_reg = as_Register($src2$$reg); -+ __ corrected_idivl(dst_reg, src1_reg, src2_reg, false); -+ %} ++inline constexpr Register as_Register(int encoding); + -+ enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ -+ MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ Register src1_reg = as_Register($src1$$reg); -+ Register src2_reg = as_Register($src2$$reg); -+ __ corrected_idivq(dst_reg, src1_reg, src2_reg, false); -+ %} ++class RegisterImpl: public AbstractRegisterImpl { ++ static constexpr Register first(); + -+ enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ -+ MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ Register src1_reg = as_Register($src1$$reg); -+ Register src2_reg = as_Register($src2$$reg); -+ __ corrected_idivl(dst_reg, src1_reg, src2_reg, true); -+ %} ++ public: ++ enum { ++ number_of_registers = 32, ++ max_slots_per_register = 2, + -+ enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ -+ MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ Register src1_reg = as_Register($src1$$reg); -+ Register src2_reg = as_Register($src2$$reg); -+ __ corrected_idivq(dst_reg, src1_reg, src2_reg, true); -+ %} ++ // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable ++ // for compressed instructions. See Table 17.2 in spec. ++ compressed_register_base = 8, ++ compressed_register_top = 15, ++ }; + -+ enc_class riscv_enc_tail_call(iRegP jump_target) %{ -+ MacroAssembler _masm(&cbuf); -+ Register target_reg = as_Register($jump_target$$reg); -+ __ jr(target_reg); -+ %} ++ // derived registers, offsets, and addresses ++ const Register successor() const { return this + 1; } + -+ enc_class riscv_enc_tail_jmp(iRegP jump_target) %{ -+ MacroAssembler _masm(&cbuf); -+ Register target_reg = as_Register($jump_target$$reg); -+ // exception oop should be in x10 -+ // ret addr has been popped into ra -+ // callee expects it in x13 -+ __ mv(x13, ra); -+ __ jr(target_reg); -+ %} ++ // construction ++ inline friend constexpr Register as_Register(int encoding); + -+ enc_class riscv_enc_rethrow() %{ -+ MacroAssembler _masm(&cbuf); -+ __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); -+ %} ++ VMReg as_VMReg() const; + -+ enc_class riscv_enc_ret() %{ -+ MacroAssembler _masm(&cbuf); -+ __ ret(); -+ %} ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } ++ int encoding_nocheck() const { return this - first(); } ++ bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } ++ const char* name() const; + -+%} ++ // for rvc ++ int compressed_encoding() const { ++ assert(is_compressed_valid(), "invalid compressed register"); ++ return encoding() - compressed_register_base; ++ } + -+//----------FRAME-------------------------------------------------------------- -+// Definition of frame structure and management information. -+// -+// S T A C K L A Y O U T Allocators stack-slot number -+// | (to get allocators register number -+// G Owned by | | v add OptoReg::stack0()) -+// r CALLER | | -+// o | +--------+ pad to even-align allocators stack-slot -+// w V | pad0 | numbers; owned by CALLER -+// t -----------+--------+----> Matcher::_in_arg_limit, unaligned -+// h ^ | in | 5 -+// | | args | 4 Holes in incoming args owned by SELF -+// | | | | 3 -+// | | +--------+ -+// V | | old out| Empty on Intel, window on Sparc -+// | old |preserve| Must be even aligned. -+// | SP-+--------+----> Matcher::_old_SP, even aligned -+// | | in | 3 area for Intel ret address -+// Owned by |preserve| Empty on Sparc. -+// SELF +--------+ -+// | | pad2 | 2 pad to align old SP -+// | +--------+ 1 -+// | | locks | 0 -+// | +--------+----> OptoReg::stack0(), even aligned -+// | | pad1 | 11 pad to align new SP -+// | +--------+ -+// | | | 10 -+// | | spills | 9 spills -+// V | | 8 (pad0 slot for callee) -+// -----------+--------+----> Matcher::_out_arg_limit, unaligned -+// ^ | out | 7 -+// | | args | 6 Holes in outgoing args owned by CALLEE -+// Owned by +--------+ -+// CALLEE | new out| 6 Empty on Intel, window on Sparc -+// | new |preserve| Must be even-aligned. -+// | SP-+--------+----> Matcher::_new_SP, even aligned -+// | | | -+// -+// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is -+// known from SELF's arguments and the Java calling convention. -+// Region 6-7 is determined per call site. -+// Note 2: If the calling convention leaves holes in the incoming argument -+// area, those holes are owned by SELF. Holes in the outgoing area -+// are owned by the CALLEE. Holes should not be nessecary in the -+// incoming area, as the Java calling convention is completely under -+// the control of the AD file. Doubles can be sorted and packed to -+// avoid holes. Holes in the outgoing arguments may be nessecary for -+// varargs C calling conventions. -+// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is -+// even aligned with pad0 as needed. -+// Region 6 is even aligned. Region 6-7 is NOT even aligned; -+// (the latter is true on Intel but is it false on RISCV?) -+// region 6-11 is even aligned; it may be padded out more so that -+// the region from SP to FP meets the minimum stack alignment. -+// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack -+// alignment. Region 11, pad1, may be dynamically extended so that -+// SP meets the minimum alignment. ++ int compressed_encoding_nocheck() const { ++ return encoding_nocheck() - compressed_register_base; ++ } + -+frame %{ -+ // What direction does stack grow in (assumed to be same for C & Java) -+ stack_direction(TOWARDS_LOW); ++ bool is_compressed_valid() const { ++ return encoding_nocheck() >= compressed_register_base && ++ encoding_nocheck() <= compressed_register_top; ++ } ++}; + -+ // These three registers define part of the calling convention -+ // between compiled code and the interpreter. ++REGISTER_IMPL_DECLARATION(Register, RegisterImpl, RegisterImpl::number_of_registers); + -+ // Inline Cache Register or methodOop for I2C. -+ inline_cache_reg(R31); ++// The integer registers of the RISCV architecture + -+ // Method Oop Register when calling interpreter. -+ interpreter_method_oop_reg(R31); ++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); + -+ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] -+ cisc_spilling_operand_name(indOffset); ++CONSTANT_REGISTER_DECLARATION(Register, x0, (0)); ++CONSTANT_REGISTER_DECLARATION(Register, x1, (1)); ++CONSTANT_REGISTER_DECLARATION(Register, x2, (2)); ++CONSTANT_REGISTER_DECLARATION(Register, x3, (3)); ++CONSTANT_REGISTER_DECLARATION(Register, x4, (4)); ++CONSTANT_REGISTER_DECLARATION(Register, x5, (5)); ++CONSTANT_REGISTER_DECLARATION(Register, x6, (6)); ++CONSTANT_REGISTER_DECLARATION(Register, x7, (7)); ++CONSTANT_REGISTER_DECLARATION(Register, x8, (8)); ++CONSTANT_REGISTER_DECLARATION(Register, x9, (9)); ++CONSTANT_REGISTER_DECLARATION(Register, x10, (10)); ++CONSTANT_REGISTER_DECLARATION(Register, x11, (11)); ++CONSTANT_REGISTER_DECLARATION(Register, x12, (12)); ++CONSTANT_REGISTER_DECLARATION(Register, x13, (13)); ++CONSTANT_REGISTER_DECLARATION(Register, x14, (14)); ++CONSTANT_REGISTER_DECLARATION(Register, x15, (15)); ++CONSTANT_REGISTER_DECLARATION(Register, x16, (16)); ++CONSTANT_REGISTER_DECLARATION(Register, x17, (17)); ++CONSTANT_REGISTER_DECLARATION(Register, x18, (18)); ++CONSTANT_REGISTER_DECLARATION(Register, x19, (19)); ++CONSTANT_REGISTER_DECLARATION(Register, x20, (20)); ++CONSTANT_REGISTER_DECLARATION(Register, x21, (21)); ++CONSTANT_REGISTER_DECLARATION(Register, x22, (22)); ++CONSTANT_REGISTER_DECLARATION(Register, x23, (23)); ++CONSTANT_REGISTER_DECLARATION(Register, x24, (24)); ++CONSTANT_REGISTER_DECLARATION(Register, x25, (25)); ++CONSTANT_REGISTER_DECLARATION(Register, x26, (26)); ++CONSTANT_REGISTER_DECLARATION(Register, x27, (27)); ++CONSTANT_REGISTER_DECLARATION(Register, x28, (28)); ++CONSTANT_REGISTER_DECLARATION(Register, x29, (29)); ++CONSTANT_REGISTER_DECLARATION(Register, x30, (30)); ++CONSTANT_REGISTER_DECLARATION(Register, x31, (31)); + -+ // Number of stack slots consumed by locking an object -+ // generate Compile::sync_stack_slots -+ // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2 -+ sync_stack_slots(1 * VMRegImpl::slots_per_word); ++// Use FloatRegister as shortcut ++class FloatRegisterImpl; ++typedef const FloatRegisterImpl* FloatRegister; + -+ // Compiled code's Frame Pointer -+ frame_pointer(R2); ++inline constexpr FloatRegister as_FloatRegister(int encoding); + -+ // Interpreter stores its frame pointer in a register which is -+ // stored to the stack by I2CAdaptors. -+ // I2CAdaptors convert from interpreted java to compiled java. -+ interpreter_frame_pointer(R8); ++// The implementation of floating point registers for the architecture ++class FloatRegisterImpl: public AbstractRegisterImpl { ++ static constexpr FloatRegister first(); + -+ // Stack alignment requirement -+ stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) ++ public: ++ enum { ++ number_of_registers = 32, ++ max_slots_per_register = 2, + -+ // Number of stack slots between incoming argument block and the start of -+ // a new frame. The PROLOG must add this many slots to the stack. The -+ // EPILOG must remove this many slots. -+ // RISCV needs two words for RA (return address) and FP (frame pointer). -+ in_preserve_stack_slots(2 * VMRegImpl::slots_per_word); ++ // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec. ++ compressed_register_base = 8, ++ compressed_register_top = 15, ++ }; + -+ // Number of outgoing stack slots killed above the out_preserve_stack_slots -+ // for calls to C. Supports the var-args backing area for register parms. -+ varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt); ++ // construction ++ inline friend constexpr FloatRegister as_FloatRegister(int encoding); + -+ // The after-PROLOG location of the return address. Location of -+ // return address specifies a type (REG or STACK) and a number -+ // representing the register number (i.e. - use a register name) or -+ // stack slot. -+ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. -+ // Otherwise, it is above the locks and verification slot and alignment word -+ // TODO this may well be correct but need to check why that - 2 is there -+ // ppc port uses 0 but we definitely need to allow for fixed_slots -+ // which folds in the space used for monitors -+ return_addr(STACK - 2 + -+ align_up((Compile::current()->in_preserve_stack_slots() + -+ Compile::current()->fixed_slots()), -+ stack_alignment_in_slots())); ++ VMReg as_VMReg() const; + -+ // Body of function which returns an integer array locating -+ // arguments either in registers or in stack slots. Passed an array -+ // of ideal registers called "sig" and a "length" count. Stack-slot -+ // offsets are based on outgoing arguments, i.e. a CALLER setting up -+ // arguments for a CALLEE. Incoming stack arguments are -+ // automatically biased by the preserve_stack_slots field above. ++ // derived registers, offsets, and addresses ++ FloatRegister successor() const { ++ return as_FloatRegister((encoding() + 1) % (unsigned)number_of_registers); ++ } + -+ calling_convention -+ %{ -+ // No difference between ingoing/outgoing just pass false -+ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); -+ %} ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } ++ int encoding_nocheck() const { return this - first(); } ++ int is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } ++ const char* name() const; + -+ c_calling_convention -+ %{ -+ // This is obviously always outgoing -+ (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length); -+ %} ++ // for rvc ++ int compressed_encoding() const { ++ assert(is_compressed_valid(), "invalid compressed register"); ++ return encoding() - compressed_register_base; ++ } + -+ // Location of compiled Java return values. Same as C for now. -+ return_value -+ %{ -+ assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, -+ "only return normal values"); ++ int compressed_encoding_nocheck() const { ++ return encoding_nocheck() - compressed_register_base; ++ } + -+ static const int lo[Op_RegL + 1] = { // enum name -+ 0, // Op_Node -+ 0, // Op_Set -+ R10_num, // Op_RegN -+ R10_num, // Op_RegI -+ R10_num, // Op_RegP -+ F10_num, // Op_RegF -+ F10_num, // Op_RegD -+ R10_num // Op_RegL -+ }; ++ bool is_compressed_valid() const { ++ return encoding_nocheck() >= compressed_register_base && ++ encoding_nocheck() <= compressed_register_top; ++ } ++}; + -+ static const int hi[Op_RegL + 1] = { // enum name -+ 0, // Op_Node -+ 0, // Op_Set -+ OptoReg::Bad, // Op_RegN -+ OptoReg::Bad, // Op_RegI -+ R10_H_num, // Op_RegP -+ OptoReg::Bad, // Op_RegF -+ F10_H_num, // Op_RegD -+ R10_H_num // Op_RegL -+ }; ++REGISTER_IMPL_DECLARATION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers); + -+ return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); -+ %} -+%} ++// The float registers of the RISCV architecture + -+//----------ATTRIBUTES--------------------------------------------------------- -+//----------Operand Attributes------------------------------------------------- -+op_attrib op_cost(1); // Required cost attribute ++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); + -+//----------Instruction Attributes--------------------------------------------- -+ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute -+ins_attrib ins_size(32); // Required size attribute (in bits) -+ins_attrib ins_short_branch(0); // Required flag: is this instruction -+ // a non-matching short branch variant -+ // of some long branch? -+ins_attrib ins_alignment(4); // Required alignment attribute (must -+ // be a power of 2) specifies the -+ // alignment that some part of the -+ // instruction (not necessarily the -+ // start) requires. If > 1, a -+ // compute_padding() function must be -+ // provided for the instruction ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); + -+//----------OPERANDS----------------------------------------------------------- -+// Operand definitions must precede instruction definitions for correct parsing -+// in the ADLC because operands constitute user defined types which are used in -+// instruction definitions. ++// Use VectorRegister as shortcut ++class VectorRegisterImpl; ++typedef const VectorRegisterImpl* VectorRegister; + -+//----------Simple Operands---------------------------------------------------- ++inline constexpr VectorRegister as_VectorRegister(int encoding); + -+// Integer operands 32 bit -+// 32 bit immediate -+operand immI() -+%{ -+ match(ConI); ++// The implementation of vector registers for RVV ++class VectorRegisterImpl: public AbstractRegisterImpl { ++ static constexpr VectorRegister first(); + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++ public: ++ enum { ++ number_of_registers = 32, ++ max_slots_per_register = 4 ++ }; + -+// 32 bit zero -+operand immI0() -+%{ -+ predicate(n->get_int() == 0); -+ match(ConI); ++ // construction ++ inline friend constexpr VectorRegister as_VectorRegister(int encoding); + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++ VMReg as_VMReg() const; + -+// 32 bit unit increment -+operand immI_1() -+%{ -+ predicate(n->get_int() == 1); -+ match(ConI); ++ // derived registers, offsets, and addresses ++ VectorRegister successor() const { return this + 1; } + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } ++ int encoding_nocheck() const { return this - first(); } ++ bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } ++ const char* name() const; + -+// 32 bit unit decrement -+operand immI_M1() -+%{ -+ predicate(n->get_int() == -1); -+ match(ConI); ++}; + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++REGISTER_IMPL_DECLARATION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers); + -+// Unsigned Integer Immediate: 6-bit int, greater than 32 -+operand uimmI6_ge32() %{ -+ predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32)); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++// The vector registers of RVV ++CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1)); + -+operand immI_le_4() -+%{ -+ predicate(n->get_int() <= 4); -+ match(ConI); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v10 , (10)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v11 , (11)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v12 , (12)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v13 , (13)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v14 , (14)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v15 , (15)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v16 , (16)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v17 , (17)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v18 , (18)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v19 , (19)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v20 , (20)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v21 , (21)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v22 , (22)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v23 , (23)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v24 , (24)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v25 , (25)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v26 , (26)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v27 , (27)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v28 , (28)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v29 , (29)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v30 , (30)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v31 , (31)); + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} + -+operand immI_16() -+%{ -+ predicate(n->get_int() == 16); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++// Need to know the total number of registers of all sorts for SharedInfo. ++// Define a class that exports it. ++class ConcreteRegisterImpl : public AbstractRegisterImpl { ++ public: ++ enum { ++ // A big enough number for C2: all the registers plus flags ++ // This number must be large enough to cover REG_COUNT (defined by c2) registers. ++ // There is no requirement that any ordering here matches any ordering c2 gives ++ // it's optoregs. + -+operand immI_24() -+%{ -+ predicate(n->get_int() == 24); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++ number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + ++ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers + ++ VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers) ++ }; + -+operand immI_31() -+%{ -+ predicate(n->get_int() == 31); -+ match(ConI); ++ // added to make it compile ++ static const int max_gpr; ++ static const int max_fpr; ++ static const int max_vpr; ++}; + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++typedef AbstractRegSet RegSet; ++typedef AbstractRegSet FloatRegSet; ++typedef AbstractRegSet VectorRegSet; + -+operand immI_32() -+%{ -+ predicate(n->get_int() == 32); -+ match(ConI); ++#endif // CPU_RISCV_REGISTER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp +new file mode 100644 +index 00000000000..228a64eae2c +--- /dev/null ++++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp +@@ -0,0 +1,113 @@ ++/* ++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/relocInfo.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/safepoint.hpp" + -+operand immI_63() -+%{ -+ predicate(n->get_int() == 63); -+ match(ConI); ++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { ++ if (verify_only) { ++ return; ++ } + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++ int bytes; + -+operand immI_64() -+%{ -+ predicate(n->get_int() == 64); -+ match(ConI); ++ switch (type()) { ++ case relocInfo::oop_type: { ++ oop_Relocation *reloc = (oop_Relocation *)this; ++ // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate ++ if (NativeInstruction::is_load_pc_relative_at(addr())) { ++ address constptr = (address)code()->oop_addr_at(reloc->oop_index()); ++ bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr); ++ assert(*(address*)constptr == x, "error in oop relocation"); ++ } else { ++ bytes = MacroAssembler::patch_oop(addr(), x); ++ } ++ break; ++ } ++ default: ++ bytes = MacroAssembler::pd_patch_instruction_size(addr(), x); ++ break; ++ } ++ ICache::invalidate_range(addr(), bytes); ++} + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++address Relocation::pd_call_destination(address orig_addr) { ++ assert(is_call(), "should be an address instruction here"); ++ if (NativeCall::is_call_at(addr())) { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline != NULL) { ++ return nativeCallTrampolineStub_at(trampoline)->destination(); ++ } ++ } ++ if (orig_addr != NULL) { ++ // the extracted address from the instructions in address orig_addr ++ address new_addr = MacroAssembler::pd_call_destination(orig_addr); ++ // If call is branch to self, don't try to relocate it, just leave it ++ // as branch to self. This happens during code generation if the code ++ // buffer expands. It will be relocated to the trampoline above once ++ // code generation is complete. ++ new_addr = (new_addr == orig_addr) ? addr() : new_addr; ++ return new_addr; ++ } ++ return MacroAssembler::pd_call_destination(addr()); ++} + -+// 32 bit integer valid for add immediate -+operand immIAdd() -+%{ -+ predicate(Assembler::operand_valid_for_add_immediate((long)n->get_int())); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++void Relocation::pd_set_call_destination(address x) { ++ assert(is_call(), "should be an address instruction here"); ++ if (NativeCall::is_call_at(addr())) { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline != NULL) { ++ nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false); ++ return; ++ } ++ } ++ MacroAssembler::pd_patch_instruction_size(addr(), x); ++ address pd_call = pd_call_destination(addr()); ++ assert(pd_call == x, "fail in reloc"); ++} + -+// 32 bit integer valid for sub immediate -+operand immISub() -+%{ -+ predicate(Assembler::operand_valid_for_add_immediate(-(long)n->get_int())); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++address* Relocation::pd_address_in_code() { ++ assert(NativeCall::is_load_pc_relative_at(addr()), "Not the expected instruction sequence!"); ++ return (address*)(MacroAssembler::target_addr_for_insn(addr())); ++} + -+// 5 bit signed value. -+operand immI5() -+%{ -+ predicate(n->get_int() <= 15 && n->get_int() >= -16); -+ match(ConI); ++address Relocation::pd_get_address_from_code() { ++ return MacroAssembler::pd_call_destination(addr()); ++} + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++ if (NativeInstruction::maybe_cpool_ref(addr())) { ++ address old_addr = old_addr_for(addr(), src, dest); ++ MacroAssembler::pd_patch_instruction_size(addr(), MacroAssembler::target_addr_for_insn(old_addr)); ++ } ++} + -+// 5 bit signed value (simm5) -+operand immL5() -+%{ -+ predicate(n->get_long() <= 15 && n->get_long() >= -16); -+ match(ConL); ++void metadata_Relocation::pd_fix_value(address x) { ++} +diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp +new file mode 100644 +index 00000000000..840ed935d88 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++#ifndef CPU_RISCV_RELOCINFO_RISCV_HPP ++#define CPU_RISCV_RELOCINFO_RISCV_HPP + -+// Integer operands 64 bit -+// 64 bit immediate -+operand immL() -+%{ -+ match(ConL); ++ // machine-dependent parts of class relocInfo ++ private: ++ enum { ++ // Relocations are byte-aligned. ++ offset_unit = 1, ++ // Must be at least 1 for RelocInfo::narrow_oop_in_const. ++ format_width = 1 ++ }; + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++ public: + -+// 64 bit zero -+operand immL0() -+%{ -+ predicate(n->get_long() == 0); -+ match(ConL); ++ // This platform has no oops in the code that are not also ++ // listed in the oop section. ++ static bool mustIterateImmediateOopsInCode() { return false; } + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++#endif // CPU_RISCV_RELOCINFO_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +new file mode 100644 +index 00000000000..588887e1d96 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -0,0 +1,10611 @@ ++// ++// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// + -+// Pointer operands -+// Pointer Immediate -+operand immP() -+%{ -+ match(ConP); ++// RISCV Architecture Description File + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++//----------REGISTER DEFINITION BLOCK------------------------------------------ ++// This information is used by the matcher and the register allocator to ++// describe individual registers and classes of registers within the target ++// archtecture. + -+// NULL Pointer Immediate -+operand immP0() -+%{ -+ predicate(n->get_ptr() == 0); -+ match(ConP); ++register %{ ++//----------Architecture Description Register Definitions---------------------- ++// General Registers ++// "reg_def" name ( register save type, C convention save type, ++// ideal register type, encoding ); ++// Register Save Types: ++// ++// NS = No-Save: The register allocator assumes that these registers ++// can be used without saving upon entry to the method, & ++// that they do not need to be saved at call sites. ++// ++// SOC = Save-On-Call: The register allocator assumes that these registers ++// can be used without saving upon entry to the method, ++// but that they must be saved at call sites. ++// ++// SOE = Save-On-Entry: The register allocator assumes that these registers ++// must be saved before using them upon entry to the ++// method, but they do not need to be saved at call ++// sites. ++// ++// AS = Always-Save: The register allocator assumes that these registers ++// must be saved before using them upon entry to the ++// method, & that they must be saved at call sites. ++// ++// Ideal Register Type is used to determine how to save & restore a ++// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get ++// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. ++// ++// The encoding number is the actual bit-pattern placed into the opcodes. + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++// We must define the 64 bit int registers in two 32 bit halves, the ++// real lower register and a virtual upper half register. upper halves ++// are used by the register allocator but are not actually supplied as ++// operands to memory ops. ++// ++// follow the C1 compiler in making registers ++// ++// x7, x9-x17, x27-x31 volatile (caller save) ++// x0-x4, x8, x23 system (no save, no allocate) ++// x5-x6 non-allocatable (so we can use them as temporary regs) + -+// Pointer Immediate One -+// this is used in object initialization (initial object header) -+operand immP_1() -+%{ -+ predicate(n->get_ptr() == 1); -+ match(ConP); ++// ++// as regards Java usage. we don't use any callee save registers ++// because this makes it difficult to de-optimise a frame (see comment ++// in x86 implementation of Deoptimization::unwind_callee_save_values) ++// + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++// General Registers + -+// Polling Page Pointer Immediate -+operand immPollPage() -+%{ -+ predicate((address)n->get_ptr() == os::get_polling_page()); -+ match(ConP); ++reg_def R0 ( NS, NS, Op_RegI, 0, x0->as_VMReg() ); // zr ++reg_def R0_H ( NS, NS, Op_RegI, 0, x0->as_VMReg()->next() ); ++reg_def R1 ( NS, SOC, Op_RegI, 1, x1->as_VMReg() ); // ra ++reg_def R1_H ( NS, SOC, Op_RegI, 1, x1->as_VMReg()->next() ); ++reg_def R2 ( NS, SOE, Op_RegI, 2, x2->as_VMReg() ); // sp ++reg_def R2_H ( NS, SOE, Op_RegI, 2, x2->as_VMReg()->next() ); ++reg_def R3 ( NS, NS, Op_RegI, 3, x3->as_VMReg() ); // gp ++reg_def R3_H ( NS, NS, Op_RegI, 3, x3->as_VMReg()->next() ); ++reg_def R4 ( NS, NS, Op_RegI, 4, x4->as_VMReg() ); // tp ++reg_def R4_H ( NS, NS, Op_RegI, 4, x4->as_VMReg()->next() ); ++reg_def R7 ( SOC, SOC, Op_RegI, 7, x7->as_VMReg() ); ++reg_def R7_H ( SOC, SOC, Op_RegI, 7, x7->as_VMReg()->next() ); ++reg_def R8 ( NS, SOE, Op_RegI, 8, x8->as_VMReg() ); // fp ++reg_def R8_H ( NS, SOE, Op_RegI, 8, x8->as_VMReg()->next() ); ++reg_def R9 ( SOC, SOE, Op_RegI, 9, x9->as_VMReg() ); ++reg_def R9_H ( SOC, SOE, Op_RegI, 9, x9->as_VMReg()->next() ); ++reg_def R10 ( SOC, SOC, Op_RegI, 10, x10->as_VMReg() ); ++reg_def R10_H ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next()); ++reg_def R11 ( SOC, SOC, Op_RegI, 11, x11->as_VMReg() ); ++reg_def R11_H ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next()); ++reg_def R12 ( SOC, SOC, Op_RegI, 12, x12->as_VMReg() ); ++reg_def R12_H ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next()); ++reg_def R13 ( SOC, SOC, Op_RegI, 13, x13->as_VMReg() ); ++reg_def R13_H ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next()); ++reg_def R14 ( SOC, SOC, Op_RegI, 14, x14->as_VMReg() ); ++reg_def R14_H ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next()); ++reg_def R15 ( SOC, SOC, Op_RegI, 15, x15->as_VMReg() ); ++reg_def R15_H ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next()); ++reg_def R16 ( SOC, SOC, Op_RegI, 16, x16->as_VMReg() ); ++reg_def R16_H ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next()); ++reg_def R17 ( SOC, SOC, Op_RegI, 17, x17->as_VMReg() ); ++reg_def R17_H ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next()); ++reg_def R18 ( SOC, SOE, Op_RegI, 18, x18->as_VMReg() ); ++reg_def R18_H ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next()); ++reg_def R19 ( SOC, SOE, Op_RegI, 19, x19->as_VMReg() ); ++reg_def R19_H ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next()); ++reg_def R20 ( SOC, SOE, Op_RegI, 20, x20->as_VMReg() ); // caller esp ++reg_def R20_H ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next()); ++reg_def R21 ( SOC, SOE, Op_RegI, 21, x21->as_VMReg() ); ++reg_def R21_H ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next()); ++reg_def R22 ( SOC, SOE, Op_RegI, 22, x22->as_VMReg() ); ++reg_def R22_H ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next()); ++reg_def R23 ( NS, SOE, Op_RegI, 23, x23->as_VMReg() ); // java thread ++reg_def R23_H ( NS, SOE, Op_RegI, 23, x23->as_VMReg()->next()); ++reg_def R24 ( SOC, SOE, Op_RegI, 24, x24->as_VMReg() ); ++reg_def R24_H ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next()); ++reg_def R25 ( SOC, SOE, Op_RegI, 25, x25->as_VMReg() ); ++reg_def R25_H ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next()); ++reg_def R26 ( SOC, SOE, Op_RegI, 26, x26->as_VMReg() ); ++reg_def R26_H ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next()); ++reg_def R27 ( SOC, SOE, Op_RegI, 27, x27->as_VMReg() ); // heapbase ++reg_def R27_H ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next()); ++reg_def R28 ( SOC, SOC, Op_RegI, 28, x28->as_VMReg() ); ++reg_def R28_H ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next()); ++reg_def R29 ( SOC, SOC, Op_RegI, 29, x29->as_VMReg() ); ++reg_def R29_H ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next()); ++reg_def R30 ( SOC, SOC, Op_RegI, 30, x30->as_VMReg() ); ++reg_def R30_H ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next()); ++reg_def R31 ( SOC, SOC, Op_RegI, 31, x31->as_VMReg() ); ++reg_def R31_H ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next()); + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++// ---------------------------- ++// Float/Double Registers ++// ---------------------------- + -+// Card Table Byte Map Base -+operand immByteMapBase() -+%{ -+ // Get base of card map -+ predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) && -+ (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); -+ match(ConP); ++// Double Registers + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++// The rules of ADL require that double registers be defined in pairs. ++// Each pair must be two 32-bit values, but not necessarily a pair of ++// single float registers. In each pair, ADLC-assigned register numbers ++// must be adjacent, with the lower number even. Finally, when the ++// CPU stores such a register pair to memory, the word associated with ++// the lower ADLC-assigned number must be stored to the lower address. + -+// Int Immediate: low 16-bit mask -+operand immI_16bits() -+%{ -+ predicate(n->get_int() == 0xFFFF); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++// RISCV has 32 floating-point registers. Each can store a single ++// or double precision floating-point value. + -+// Long Immediate: low 32-bit mask -+operand immL_32bits() -+%{ -+ predicate(n->get_long() == 0xFFFFFFFFL); -+ match(ConL); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++// for Java use float registers f0-f31 are always save on call whereas ++// the platform ABI treats f8-f9 and f18-f27 as callee save). Other ++// float registers are SOC as per the platform spec + -+// 64 bit unit decrement -+operand immL_M1() -+%{ -+ predicate(n->get_long() == -1); -+ match(ConL); ++reg_def F0 ( SOC, SOC, Op_RegF, 0, f0->as_VMReg() ); ++reg_def F0_H ( SOC, SOC, Op_RegF, 0, f0->as_VMReg()->next() ); ++reg_def F1 ( SOC, SOC, Op_RegF, 1, f1->as_VMReg() ); ++reg_def F1_H ( SOC, SOC, Op_RegF, 1, f1->as_VMReg()->next() ); ++reg_def F2 ( SOC, SOC, Op_RegF, 2, f2->as_VMReg() ); ++reg_def F2_H ( SOC, SOC, Op_RegF, 2, f2->as_VMReg()->next() ); ++reg_def F3 ( SOC, SOC, Op_RegF, 3, f3->as_VMReg() ); ++reg_def F3_H ( SOC, SOC, Op_RegF, 3, f3->as_VMReg()->next() ); ++reg_def F4 ( SOC, SOC, Op_RegF, 4, f4->as_VMReg() ); ++reg_def F4_H ( SOC, SOC, Op_RegF, 4, f4->as_VMReg()->next() ); ++reg_def F5 ( SOC, SOC, Op_RegF, 5, f5->as_VMReg() ); ++reg_def F5_H ( SOC, SOC, Op_RegF, 5, f5->as_VMReg()->next() ); ++reg_def F6 ( SOC, SOC, Op_RegF, 6, f6->as_VMReg() ); ++reg_def F6_H ( SOC, SOC, Op_RegF, 6, f6->as_VMReg()->next() ); ++reg_def F7 ( SOC, SOC, Op_RegF, 7, f7->as_VMReg() ); ++reg_def F7_H ( SOC, SOC, Op_RegF, 7, f7->as_VMReg()->next() ); ++reg_def F8 ( SOC, SOE, Op_RegF, 8, f8->as_VMReg() ); ++reg_def F8_H ( SOC, SOE, Op_RegF, 8, f8->as_VMReg()->next() ); ++reg_def F9 ( SOC, SOE, Op_RegF, 9, f9->as_VMReg() ); ++reg_def F9_H ( SOC, SOE, Op_RegF, 9, f9->as_VMReg()->next() ); ++reg_def F10 ( SOC, SOC, Op_RegF, 10, f10->as_VMReg() ); ++reg_def F10_H ( SOC, SOC, Op_RegF, 10, f10->as_VMReg()->next() ); ++reg_def F11 ( SOC, SOC, Op_RegF, 11, f11->as_VMReg() ); ++reg_def F11_H ( SOC, SOC, Op_RegF, 11, f11->as_VMReg()->next() ); ++reg_def F12 ( SOC, SOC, Op_RegF, 12, f12->as_VMReg() ); ++reg_def F12_H ( SOC, SOC, Op_RegF, 12, f12->as_VMReg()->next() ); ++reg_def F13 ( SOC, SOC, Op_RegF, 13, f13->as_VMReg() ); ++reg_def F13_H ( SOC, SOC, Op_RegF, 13, f13->as_VMReg()->next() ); ++reg_def F14 ( SOC, SOC, Op_RegF, 14, f14->as_VMReg() ); ++reg_def F14_H ( SOC, SOC, Op_RegF, 14, f14->as_VMReg()->next() ); ++reg_def F15 ( SOC, SOC, Op_RegF, 15, f15->as_VMReg() ); ++reg_def F15_H ( SOC, SOC, Op_RegF, 15, f15->as_VMReg()->next() ); ++reg_def F16 ( SOC, SOC, Op_RegF, 16, f16->as_VMReg() ); ++reg_def F16_H ( SOC, SOC, Op_RegF, 16, f16->as_VMReg()->next() ); ++reg_def F17 ( SOC, SOC, Op_RegF, 17, f17->as_VMReg() ); ++reg_def F17_H ( SOC, SOC, Op_RegF, 17, f17->as_VMReg()->next() ); ++reg_def F18 ( SOC, SOE, Op_RegF, 18, f18->as_VMReg() ); ++reg_def F18_H ( SOC, SOE, Op_RegF, 18, f18->as_VMReg()->next() ); ++reg_def F19 ( SOC, SOE, Op_RegF, 19, f19->as_VMReg() ); ++reg_def F19_H ( SOC, SOE, Op_RegF, 19, f19->as_VMReg()->next() ); ++reg_def F20 ( SOC, SOE, Op_RegF, 20, f20->as_VMReg() ); ++reg_def F20_H ( SOC, SOE, Op_RegF, 20, f20->as_VMReg()->next() ); ++reg_def F21 ( SOC, SOE, Op_RegF, 21, f21->as_VMReg() ); ++reg_def F21_H ( SOC, SOE, Op_RegF, 21, f21->as_VMReg()->next() ); ++reg_def F22 ( SOC, SOE, Op_RegF, 22, f22->as_VMReg() ); ++reg_def F22_H ( SOC, SOE, Op_RegF, 22, f22->as_VMReg()->next() ); ++reg_def F23 ( SOC, SOE, Op_RegF, 23, f23->as_VMReg() ); ++reg_def F23_H ( SOC, SOE, Op_RegF, 23, f23->as_VMReg()->next() ); ++reg_def F24 ( SOC, SOE, Op_RegF, 24, f24->as_VMReg() ); ++reg_def F24_H ( SOC, SOE, Op_RegF, 24, f24->as_VMReg()->next() ); ++reg_def F25 ( SOC, SOE, Op_RegF, 25, f25->as_VMReg() ); ++reg_def F25_H ( SOC, SOE, Op_RegF, 25, f25->as_VMReg()->next() ); ++reg_def F26 ( SOC, SOE, Op_RegF, 26, f26->as_VMReg() ); ++reg_def F26_H ( SOC, SOE, Op_RegF, 26, f26->as_VMReg()->next() ); ++reg_def F27 ( SOC, SOE, Op_RegF, 27, f27->as_VMReg() ); ++reg_def F27_H ( SOC, SOE, Op_RegF, 27, f27->as_VMReg()->next() ); ++reg_def F28 ( SOC, SOC, Op_RegF, 28, f28->as_VMReg() ); ++reg_def F28_H ( SOC, SOC, Op_RegF, 28, f28->as_VMReg()->next() ); ++reg_def F29 ( SOC, SOC, Op_RegF, 29, f29->as_VMReg() ); ++reg_def F29_H ( SOC, SOC, Op_RegF, 29, f29->as_VMReg()->next() ); ++reg_def F30 ( SOC, SOC, Op_RegF, 30, f30->as_VMReg() ); ++reg_def F30_H ( SOC, SOC, Op_RegF, 30, f30->as_VMReg()->next() ); ++reg_def F31 ( SOC, SOC, Op_RegF, 31, f31->as_VMReg() ); ++reg_def F31_H ( SOC, SOC, Op_RegF, 31, f31->as_VMReg()->next() ); + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++// ---------------------------- ++// Vector Registers ++// ---------------------------- + ++// For RVV vector registers, we simply extend vector register size to 4 ++// 'logical' slots. This is nominally 128 bits but it actually covers ++// all possible 'physical' RVV vector register lengths from 128 ~ 1024 ++// bits. The 'physical' RVV vector register length is detected during ++// startup, so the register allocator is able to identify the correct ++// number of bytes needed for an RVV spill/unspill. + -+// 32 bit offset of pc in thread anchor ++reg_def V0 ( SOC, SOC, Op_VecA, 0, v0->as_VMReg() ); ++reg_def V0_H ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next() ); ++reg_def V0_J ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(2) ); ++reg_def V0_K ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(3) ); + -+operand immL_pc_off() -+%{ -+ predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) + -+ in_bytes(JavaFrameAnchor::last_Java_pc_offset())); -+ match(ConL); ++reg_def V1 ( SOC, SOC, Op_VecA, 1, v1->as_VMReg() ); ++reg_def V1_H ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next() ); ++reg_def V1_J ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(2) ); ++reg_def V1_K ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(3) ); + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++reg_def V2 ( SOC, SOC, Op_VecA, 2, v2->as_VMReg() ); ++reg_def V2_H ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next() ); ++reg_def V2_J ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(2) ); ++reg_def V2_K ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(3) ); + -+// 64 bit integer valid for add immediate -+operand immLAdd() -+%{ -+ predicate(Assembler::operand_valid_for_add_immediate(n->get_long())); -+ match(ConL); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++reg_def V3 ( SOC, SOC, Op_VecA, 3, v3->as_VMReg() ); ++reg_def V3_H ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next() ); ++reg_def V3_J ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(2) ); ++reg_def V3_K ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(3) ); + -+// 64 bit integer valid for sub immediate -+operand immLSub() -+%{ -+ predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long()))); -+ match(ConL); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++reg_def V4 ( SOC, SOC, Op_VecA, 4, v4->as_VMReg() ); ++reg_def V4_H ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next() ); ++reg_def V4_J ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(2) ); ++reg_def V4_K ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(3) ); + -+// Narrow pointer operands -+// Narrow Pointer Immediate -+operand immN() -+%{ -+ match(ConN); ++reg_def V5 ( SOC, SOC, Op_VecA, 5, v5->as_VMReg() ); ++reg_def V5_H ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next() ); ++reg_def V5_J ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(2) ); ++reg_def V5_K ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(3) ); + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++reg_def V6 ( SOC, SOC, Op_VecA, 6, v6->as_VMReg() ); ++reg_def V6_H ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next() ); ++reg_def V6_J ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(2) ); ++reg_def V6_K ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(3) ); + -+// Narrow NULL Pointer Immediate -+operand immN0() -+%{ -+ predicate(n->get_narrowcon() == 0); -+ match(ConN); ++reg_def V7 ( SOC, SOC, Op_VecA, 7, v7->as_VMReg() ); ++reg_def V7_H ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next() ); ++reg_def V7_J ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(2) ); ++reg_def V7_K ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(3) ); + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++reg_def V8 ( SOC, SOC, Op_VecA, 8, v8->as_VMReg() ); ++reg_def V8_H ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next() ); ++reg_def V8_J ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(2) ); ++reg_def V8_K ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(3) ); + -+operand immNKlass() -+%{ -+ match(ConNKlass); ++reg_def V9 ( SOC, SOC, Op_VecA, 9, v9->as_VMReg() ); ++reg_def V9_H ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next() ); ++reg_def V9_J ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(2) ); ++reg_def V9_K ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(3) ); + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++reg_def V10 ( SOC, SOC, Op_VecA, 10, v10->as_VMReg() ); ++reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next() ); ++reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) ); ++reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) ); + -+// Float and Double operands -+// Double Immediate -+operand immD() -+%{ -+ match(ConD); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++reg_def V11 ( SOC, SOC, Op_VecA, 11, v11->as_VMReg() ); ++reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next() ); ++reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) ); ++reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) ); + -+// Double Immediate: +0.0d -+operand immD0() -+%{ -+ predicate(jlong_cast(n->getd()) == 0); -+ match(ConD); ++reg_def V12 ( SOC, SOC, Op_VecA, 12, v12->as_VMReg() ); ++reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next() ); ++reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) ); ++reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) ); + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++reg_def V13 ( SOC, SOC, Op_VecA, 13, v13->as_VMReg() ); ++reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next() ); ++reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) ); ++reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) ); + -+// Float Immediate -+operand immF() -+%{ -+ match(ConF); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++reg_def V14 ( SOC, SOC, Op_VecA, 14, v14->as_VMReg() ); ++reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next() ); ++reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) ); ++reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) ); + -+// Float Immediate: +0.0f. -+operand immF0() -+%{ -+ predicate(jint_cast(n->getf()) == 0); -+ match(ConF); ++reg_def V15 ( SOC, SOC, Op_VecA, 15, v15->as_VMReg() ); ++reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next() ); ++reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) ); ++reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) ); + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++reg_def V16 ( SOC, SOC, Op_VecA, 16, v16->as_VMReg() ); ++reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next() ); ++reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) ); ++reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) ); + -+operand immIOffset() -+%{ -+ predicate(is_imm_in_range(n->get_int(), 12, 0)); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++reg_def V17 ( SOC, SOC, Op_VecA, 17, v17->as_VMReg() ); ++reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next() ); ++reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) ); ++reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) ); + -+operand immLOffset() -+%{ -+ predicate(is_imm_in_range(n->get_long(), 12, 0)); -+ match(ConL); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++reg_def V18 ( SOC, SOC, Op_VecA, 18, v18->as_VMReg() ); ++reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next() ); ++reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) ); ++reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) ); + -+// Scale values -+operand immIScale() -+%{ -+ predicate(1 <= n->get_int() && (n->get_int() <= 3)); -+ match(ConI); ++reg_def V19 ( SOC, SOC, Op_VecA, 19, v19->as_VMReg() ); ++reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next() ); ++reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) ); ++reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) ); + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++reg_def V20 ( SOC, SOC, Op_VecA, 20, v20->as_VMReg() ); ++reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next() ); ++reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) ); ++reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) ); + -+// Integer 32 bit Register Operands -+operand iRegI() -+%{ -+ constraint(ALLOC_IN_RC(any_reg32)); -+ match(RegI); -+ match(iRegINoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++reg_def V21 ( SOC, SOC, Op_VecA, 21, v21->as_VMReg() ); ++reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next() ); ++reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) ); ++reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) ); + -+// Integer 32 bit Register not Special -+operand iRegINoSp() -+%{ -+ constraint(ALLOC_IN_RC(no_special_reg32)); -+ match(RegI); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++reg_def V22 ( SOC, SOC, Op_VecA, 22, v22->as_VMReg() ); ++reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next() ); ++reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) ); ++reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) ); + -+// Register R10 only -+operand iRegI_R10() -+%{ -+ constraint(ALLOC_IN_RC(int_r10_reg)); -+ match(RegI); -+ match(iRegINoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++reg_def V23 ( SOC, SOC, Op_VecA, 23, v23->as_VMReg() ); ++reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next() ); ++reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) ); ++reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) ); + -+// Register R12 only -+operand iRegI_R12() -+%{ -+ constraint(ALLOC_IN_RC(int_r12_reg)); -+ match(RegI); -+ match(iRegINoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++reg_def V24 ( SOC, SOC, Op_VecA, 24, v24->as_VMReg() ); ++reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next() ); ++reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) ); ++reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) ); + -+// Register R13 only -+operand iRegI_R13() -+%{ -+ constraint(ALLOC_IN_RC(int_r13_reg)); -+ match(RegI); -+ match(iRegINoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++reg_def V25 ( SOC, SOC, Op_VecA, 25, v25->as_VMReg() ); ++reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next() ); ++reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) ); ++reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) ); + -+// Register R14 only -+operand iRegI_R14() -+%{ -+ constraint(ALLOC_IN_RC(int_r14_reg)); -+ match(RegI); -+ match(iRegINoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++reg_def V26 ( SOC, SOC, Op_VecA, 26, v26->as_VMReg() ); ++reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next() ); ++reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) ); ++reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) ); + -+// Integer 64 bit Register Operands -+operand iRegL() -+%{ -+ constraint(ALLOC_IN_RC(any_reg)); -+ match(RegL); -+ match(iRegLNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++reg_def V27 ( SOC, SOC, Op_VecA, 27, v27->as_VMReg() ); ++reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next() ); ++reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) ); ++reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) ); + -+// Integer 64 bit Register not Special -+operand iRegLNoSp() -+%{ -+ constraint(ALLOC_IN_RC(no_special_reg)); -+ match(RegL); -+ match(iRegL_R10); -+ format %{ %} -+ interface(REG_INTER); -+%} ++reg_def V28 ( SOC, SOC, Op_VecA, 28, v28->as_VMReg() ); ++reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next() ); ++reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) ); ++reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) ); + -+// Long 64 bit Register R28 only -+operand iRegL_R28() -+%{ -+ constraint(ALLOC_IN_RC(r28_reg)); -+ match(RegL); -+ match(iRegLNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++reg_def V29 ( SOC, SOC, Op_VecA, 29, v29->as_VMReg() ); ++reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next() ); ++reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) ); ++reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) ); + -+// Long 64 bit Register R29 only -+operand iRegL_R29() -+%{ -+ constraint(ALLOC_IN_RC(r29_reg)); -+ match(RegL); -+ match(iRegLNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++reg_def V30 ( SOC, SOC, Op_VecA, 30, v30->as_VMReg() ); ++reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next() ); ++reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) ); ++reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) ); + -+// Long 64 bit Register R30 only -+operand iRegL_R30() -+%{ -+ constraint(ALLOC_IN_RC(r30_reg)); -+ match(RegL); -+ match(iRegLNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++reg_def V31 ( SOC, SOC, Op_VecA, 31, v31->as_VMReg() ); ++reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next() ); ++reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) ); ++reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) ); + -+// Pointer Register Operands -+// Pointer Register -+operand iRegP() -+%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(RegP); -+ match(iRegPNoSp); -+ match(iRegP_R10); -+ match(javaThread_RegP); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++// ---------------------------- ++// Special Registers ++// ---------------------------- + -+// Pointer 64 bit Register not Special -+operand iRegPNoSp() -+%{ -+ constraint(ALLOC_IN_RC(no_special_ptr_reg)); -+ match(RegP); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++// On riscv, the physical flag register is missing, so we use t1 instead, ++// to bridge the RegFlag semantics in share/opto + -+operand iRegP_R10() -+%{ -+ constraint(ALLOC_IN_RC(r10_reg)); -+ match(RegP); -+ // match(iRegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++reg_def RFLAGS (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg() ); + -+// Pointer 64 bit Register R11 only -+operand iRegP_R11() -+%{ -+ constraint(ALLOC_IN_RC(r11_reg)); -+ match(RegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++// Specify priority of register selection within phases of register ++// allocation. Highest priority is first. A useful heuristic is to ++// give registers a low priority when they are required by machine ++// instructions, like EAX and EDX on I486, and choose no-save registers ++// before save-on-call, & save-on-call before save-on-entry. Registers ++// which participate in fixed calling sequences should come last. ++// Registers which are used as pairs must fall on an even boundary. + -+operand iRegP_R12() -+%{ -+ constraint(ALLOC_IN_RC(r12_reg)); -+ match(RegP); -+ // match(iRegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++alloc_class chunk0( ++ // volatiles ++ R7, R7_H, ++ R28, R28_H, ++ R29, R29_H, ++ R30, R30_H, ++ R31, R31_H, + -+// Pointer 64 bit Register R13 only -+operand iRegP_R13() -+%{ -+ constraint(ALLOC_IN_RC(r13_reg)); -+ match(RegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ // arg registers ++ R10, R10_H, ++ R11, R11_H, ++ R12, R12_H, ++ R13, R13_H, ++ R14, R14_H, ++ R15, R15_H, ++ R16, R16_H, ++ R17, R17_H, + -+operand iRegP_R14() -+%{ -+ constraint(ALLOC_IN_RC(r14_reg)); -+ match(RegP); -+ // match(iRegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ // non-volatiles ++ R9, R9_H, ++ R18, R18_H, ++ R19, R19_H, ++ R20, R20_H, ++ R21, R21_H, ++ R22, R22_H, ++ R24, R24_H, ++ R25, R25_H, ++ R26, R26_H, + -+operand iRegP_R15() -+%{ -+ constraint(ALLOC_IN_RC(r15_reg)); -+ match(RegP); -+ // match(iRegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ // non-allocatable registers ++ R23, R23_H, // java thread ++ R27, R27_H, // heapbase ++ R4, R4_H, // thread ++ R8, R8_H, // fp ++ R0, R0_H, // zero ++ R1, R1_H, // ra ++ R2, R2_H, // sp ++ R3, R3_H, // gp ++); + -+operand iRegP_R16() -+%{ -+ constraint(ALLOC_IN_RC(r16_reg)); -+ match(RegP); -+ // match(iRegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++alloc_class chunk1( + -+// Pointer 64 bit Register R28 only -+operand iRegP_R28() -+%{ -+ constraint(ALLOC_IN_RC(r28_reg)); -+ match(RegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ // no save ++ F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F28, F28_H, ++ F29, F29_H, ++ F30, F30_H, ++ F31, F31_H, + -+// Pointer Register Operands -+// Narrow Pointer Register -+operand iRegN() -+%{ -+ constraint(ALLOC_IN_RC(any_reg32)); -+ match(RegN); -+ match(iRegNNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ // arg registers ++ F10, F10_H, ++ F11, F11_H, ++ F12, F12_H, ++ F13, F13_H, ++ F14, F14_H, ++ F15, F15_H, ++ F16, F16_H, ++ F17, F17_H, + -+// Integer 64 bit Register not Special -+operand iRegNNoSp() -+%{ -+ constraint(ALLOC_IN_RC(no_special_reg32)); -+ match(RegN); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ // non-volatiles ++ F8, F8_H, ++ F9, F9_H, ++ F18, F18_H, ++ F19, F19_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++); + -+// heap base register -- used for encoding immN0 -+operand iRegIHeapbase() -+%{ -+ constraint(ALLOC_IN_RC(heapbase_reg)); -+ match(RegI); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++alloc_class chunk2( ++ V0, V0_H, V0_J, V0_K, ++ V1, V1_H, V1_J, V1_K, ++ V2, V2_H, V2_J, V2_K, ++ V3, V3_H, V3_J, V3_K, ++ V4, V4_H, V4_J, V4_K, ++ V5, V5_H, V5_J, V5_K, ++ V6, V6_H, V6_J, V6_K, ++ V7, V7_H, V7_J, V7_K, ++ V8, V8_H, V8_J, V8_K, ++ V9, V9_H, V9_J, V9_K, ++ V10, V10_H, V10_J, V10_K, ++ V11, V11_H, V11_J, V11_K, ++ V12, V12_H, V12_J, V12_K, ++ V13, V13_H, V13_J, V13_K, ++ V14, V14_H, V14_J, V14_K, ++ V15, V15_H, V15_J, V15_K, ++ V16, V16_H, V16_J, V16_K, ++ V17, V17_H, V17_J, V17_K, ++ V18, V18_H, V18_J, V18_K, ++ V19, V19_H, V19_J, V19_K, ++ V20, V20_H, V20_J, V20_K, ++ V21, V21_H, V21_J, V21_K, ++ V22, V22_H, V22_J, V22_K, ++ V23, V23_H, V23_J, V23_K, ++ V24, V24_H, V24_J, V24_K, ++ V25, V25_H, V25_J, V25_K, ++ V26, V26_H, V26_J, V26_K, ++ V27, V27_H, V27_J, V27_K, ++ V28, V28_H, V28_J, V28_K, ++ V29, V29_H, V29_J, V29_K, ++ V30, V30_H, V30_J, V30_K, ++ V31, V31_H, V31_J, V31_K, ++); + -+// Long 64 bit Register R10 only -+operand iRegL_R10() -+%{ -+ constraint(ALLOC_IN_RC(r10_reg)); -+ match(RegL); -+ match(iRegLNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++alloc_class chunk3(RFLAGS); + -+// Float Register -+// Float register operands -+operand fRegF() -+%{ -+ constraint(ALLOC_IN_RC(float_reg)); -+ match(RegF); ++//----------Architecture Description Register Classes-------------------------- ++// Several register classes are automatically defined based upon information in ++// this architecture description. ++// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) ++// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) ++// + -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++// Class for all 32 bit general purpose registers ++reg_class all_reg32( ++ R0, ++ R1, ++ R2, ++ R3, ++ R4, ++ R7, ++ R8, ++ R9, ++ R10, ++ R11, ++ R12, ++ R13, ++ R14, ++ R15, ++ R16, ++ R17, ++ R18, ++ R19, ++ R20, ++ R21, ++ R22, ++ R23, ++ R24, ++ R25, ++ R26, ++ R27, ++ R28, ++ R29, ++ R30, ++ R31 ++); ++ ++// Class for any 32 bit integer registers (excluding zr) ++reg_class any_reg32 %{ ++ return _ANY_REG32_mask; +%} + -+// Double Register -+// Double register operands -+operand fRegD() -+%{ -+ constraint(ALLOC_IN_RC(double_reg)); -+ match(RegD); ++// Singleton class for R10 int register ++reg_class int_r10_reg(R10); + -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++// Singleton class for R12 int register ++reg_class int_r12_reg(R12); + -+// Generic vector class. This will be used for -+// all vector operands. -+operand vReg() -+%{ -+ constraint(ALLOC_IN_RC(vectora_reg)); -+ match(VecA); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++// Singleton class for R13 int register ++reg_class int_r13_reg(R13); + -+operand vReg_V1() -+%{ -+ constraint(ALLOC_IN_RC(v1_reg)); -+ match(VecA); -+ match(vReg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++// Singleton class for R14 int register ++reg_class int_r14_reg(R14); + -+operand vReg_V2() -+%{ -+ constraint(ALLOC_IN_RC(v2_reg)); -+ match(VecA); -+ match(vReg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++// Class for all long integer registers ++reg_class all_reg( ++ R0, R0_H, ++ R1, R1_H, ++ R2, R2_H, ++ R3, R3_H, ++ R4, R4_H, ++ R7, R7_H, ++ R8, R8_H, ++ R9, R9_H, ++ R10, R10_H, ++ R11, R11_H, ++ R12, R12_H, ++ R13, R13_H, ++ R14, R14_H, ++ R15, R15_H, ++ R16, R16_H, ++ R17, R17_H, ++ R18, R18_H, ++ R19, R19_H, ++ R20, R20_H, ++ R21, R21_H, ++ R22, R22_H, ++ R23, R23_H, ++ R24, R24_H, ++ R25, R25_H, ++ R26, R26_H, ++ R27, R27_H, ++ R28, R28_H, ++ R29, R29_H, ++ R30, R30_H, ++ R31, R31_H ++); + -+operand vReg_V3() -+%{ -+ constraint(ALLOC_IN_RC(v3_reg)); -+ match(VecA); -+ match(vReg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++// Class for all long integer registers (excluding zr) ++reg_class any_reg %{ ++ return _ANY_REG_mask; +%} + -+operand vReg_V4() -+%{ -+ constraint(ALLOC_IN_RC(v4_reg)); -+ match(VecA); -+ match(vReg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++// Class for non-allocatable 32 bit registers ++reg_class non_allocatable_reg32( ++ R0, // zr ++ R1, // ra ++ R2, // sp ++ R3, // gp ++ R4, // tp ++ R23 // java thread ++); + -+operand vReg_V5() -+%{ -+ constraint(ALLOC_IN_RC(v5_reg)); -+ match(VecA); -+ match(vReg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++// Class for non-allocatable 64 bit registers ++reg_class non_allocatable_reg( ++ R0, R0_H, // zr ++ R1, R1_H, // ra ++ R2, R2_H, // sp ++ R3, R3_H, // gp ++ R4, R4_H, // tp ++ R23, R23_H // java thread ++); ++ ++reg_class no_special_reg32 %{ ++ return _NO_SPECIAL_REG32_mask; +%} + -+// Java Thread Register -+operand javaThread_RegP(iRegP reg) -+%{ -+ constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg -+ match(reg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++reg_class no_special_reg %{ ++ return _NO_SPECIAL_REG_mask; +%} + -+//----------Memory Operands---------------------------------------------------- -+// RISCV has only base_plus_offset and literal address mode, so no need to use -+// index and scale. Here set index as 0xffffffff and scale as 0x0. -+operand indirect(iRegP reg) -+%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(reg); -+ op_cost(0); -+ format %{ "[$reg]" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp(0x0); -+ %} ++reg_class ptr_reg %{ ++ return _PTR_REG_mask; +%} + -+operand indOffI(iRegP reg, immIOffset off) -+%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP reg off); -+ op_cost(0); -+ format %{ "[$reg, $off]" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); -+ %} ++reg_class no_special_ptr_reg %{ ++ return _NO_SPECIAL_PTR_REG_mask; +%} + -+operand indOffL(iRegP reg, immLOffset off) -+%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP reg off); -+ op_cost(0); -+ format %{ "[$reg, $off]" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); -+ %} -+%} ++// Class for 64 bit register r10 ++reg_class r10_reg( ++ R10, R10_H ++); + -+operand indirectN(iRegN reg) -+%{ -+ predicate(Universe::narrow_oop_shift() == 0); -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(DecodeN reg); -+ op_cost(0); -+ format %{ "[$reg]\t# narrow" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp(0x0); -+ %} -+%} ++// Class for 64 bit register r11 ++reg_class r11_reg( ++ R11, R11_H ++); + -+operand indOffIN(iRegN reg, immIOffset off) -+%{ -+ predicate(Universe::narrow_oop_shift() == 0); -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP (DecodeN reg) off); -+ op_cost(0); -+ format %{ "[$reg, $off]\t# narrow" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); -+ %} -+%} ++// Class for 64 bit register r12 ++reg_class r12_reg( ++ R12, R12_H ++); + -+operand indOffLN(iRegN reg, immLOffset off) -+%{ -+ predicate(Universe::narrow_oop_shift() == 0); -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP (DecodeN reg) off); -+ op_cost(0); -+ format %{ "[$reg, $off]\t# narrow" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); -+ %} -+%} ++// Class for 64 bit register r13 ++reg_class r13_reg( ++ R13, R13_H ++); + -+// RISCV opto stubs need to write to the pc slot in the thread anchor -+operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off) -+%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP reg off); -+ op_cost(0); -+ format %{ "[$reg, $off]" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); -+ %} -+%} ++// Class for 64 bit register r14 ++reg_class r14_reg( ++ R14, R14_H ++); + ++// Class for 64 bit register r15 ++reg_class r15_reg( ++ R15, R15_H ++); + -+//----------Special Memory Operands-------------------------------------------- -+// Stack Slot Operand - This operand is used for loading and storing temporary -+// values on the stack where a match requires a value to -+// flow through memory. -+operand stackSlotI(sRegI reg) -+%{ -+ constraint(ALLOC_IN_RC(stack_slots)); -+ // No match rule because this operand is only generated in matching -+ // match(RegI); -+ format %{ "[$reg]" %} -+ interface(MEMORY_INTER) %{ -+ base(0x02); // RSP -+ index(0xffffffff); // No Index -+ scale(0x0); // No Scale -+ disp($reg); // Stack Offset -+ %} -+%} ++// Class for 64 bit register r16 ++reg_class r16_reg( ++ R16, R16_H ++); + -+operand stackSlotF(sRegF reg) -+%{ -+ constraint(ALLOC_IN_RC(stack_slots)); -+ // No match rule because this operand is only generated in matching -+ // match(RegF); -+ format %{ "[$reg]" %} -+ interface(MEMORY_INTER) %{ -+ base(0x02); // RSP -+ index(0xffffffff); // No Index -+ scale(0x0); // No Scale -+ disp($reg); // Stack Offset -+ %} -+%} ++// Class for method register ++reg_class method_reg( ++ R31, R31_H ++); + -+operand stackSlotD(sRegD reg) -+%{ -+ constraint(ALLOC_IN_RC(stack_slots)); -+ // No match rule because this operand is only generated in matching -+ // match(RegD); -+ format %{ "[$reg]" %} -+ interface(MEMORY_INTER) %{ -+ base(0x02); // RSP -+ index(0xffffffff); // No Index -+ scale(0x0); // No Scale -+ disp($reg); // Stack Offset -+ %} -+%} ++// Class for heapbase register ++reg_class heapbase_reg( ++ R27, R27_H ++); + -+operand stackSlotL(sRegL reg) -+%{ -+ constraint(ALLOC_IN_RC(stack_slots)); -+ // No match rule because this operand is only generated in matching -+ // match(RegL); -+ format %{ "[$reg]" %} -+ interface(MEMORY_INTER) %{ -+ base(0x02); // RSP -+ index(0xffffffff); // No Index -+ scale(0x0); // No Scale -+ disp($reg); // Stack Offset -+ %} -+%} ++// Class for java thread register ++reg_class java_thread_reg( ++ R23, R23_H ++); + -+// Special operand allowing long args to int ops to be truncated for free ++reg_class r28_reg( ++ R28, R28_H ++); + -+operand iRegL2I(iRegL reg) %{ ++reg_class r29_reg( ++ R29, R29_H ++); + -+ op_cost(0); ++reg_class r30_reg( ++ R30, R30_H ++); + -+ match(ConvL2I reg); ++// Class for zero registesr ++reg_class zr_reg( ++ R0, R0_H ++); + -+ format %{ "l2i($reg)" %} ++// Class for thread register ++reg_class thread_reg( ++ R4, R4_H ++); + -+ interface(REG_INTER) -+%} ++// Class for frame pointer register ++reg_class fp_reg( ++ R8, R8_H ++); + ++// Class for link register ++reg_class ra_reg( ++ R1, R1_H ++); + -+// Comparison Operands -+// NOTE: Label is a predefined operand which should not be redefined in -+// the AD file. It is generically handled within the ADLC. ++// Class for long sp register ++reg_class sp_reg( ++ R2, R2_H ++); + -+//----------Conditional Branch Operands---------------------------------------- -+// Comparison Op - This is the operation of the comparison, and is limited to -+// the following set of codes: -+// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) -+// -+// Other attributes of the comparison, such as unsignedness, are specified -+// by the comparison instruction that sets a condition code flags register. -+// That result is represented by a flags operand whose subtype is appropriate -+// to the unsignedness (etc.) of the comparison. -+// -+// Later, the instruction which matches both the Comparison Op (a Bool) and -+// the flags (produced by the Cmp) specifies the coding of the comparison op -+// by matching a specific subtype of Bool operand below, such as cmpOpU. ++// Class for all float registers ++reg_class float_reg( ++ F0, ++ F1, ++ F2, ++ F3, ++ F4, ++ F5, ++ F6, ++ F7, ++ F8, ++ F9, ++ F10, ++ F11, ++ F12, ++ F13, ++ F14, ++ F15, ++ F16, ++ F17, ++ F18, ++ F19, ++ F20, ++ F21, ++ F22, ++ F23, ++ F24, ++ F25, ++ F26, ++ F27, ++ F28, ++ F29, ++ F30, ++ F31 ++); + ++// Double precision float registers have virtual `high halves' that ++// are needed by the allocator. ++// Class for all double registers ++reg_class double_reg( ++ F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F12, F12_H, ++ F13, F13_H, ++ F14, F14_H, ++ F15, F15_H, ++ F16, F16_H, ++ F17, F17_H, ++ F18, F18_H, ++ F19, F19_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F29, F29_H, ++ F30, F30_H, ++ F31, F31_H ++); + -+// used for signed integral comparisons and fp comparisons -+operand cmpOp() -+%{ -+ match(Bool); ++// Class for all RVV vector registers ++reg_class vectora_reg( ++ V1, V1_H, V1_J, V1_K, ++ V2, V2_H, V2_J, V2_K, ++ V3, V3_H, V3_J, V3_K, ++ V4, V4_H, V4_J, V4_K, ++ V5, V5_H, V5_J, V5_K, ++ V6, V6_H, V6_J, V6_K, ++ V7, V7_H, V7_J, V7_K, ++ V8, V8_H, V8_J, V8_K, ++ V9, V9_H, V9_J, V9_K, ++ V10, V10_H, V10_J, V10_K, ++ V11, V11_H, V11_J, V11_K, ++ V12, V12_H, V12_J, V12_K, ++ V13, V13_H, V13_J, V13_K, ++ V14, V14_H, V14_J, V14_K, ++ V15, V15_H, V15_J, V15_K, ++ V16, V16_H, V16_J, V16_K, ++ V17, V17_H, V17_J, V17_K, ++ V18, V18_H, V18_J, V18_K, ++ V19, V19_H, V19_J, V19_K, ++ V20, V20_H, V20_J, V20_K, ++ V21, V21_H, V21_J, V21_K, ++ V22, V22_H, V22_J, V22_K, ++ V23, V23_H, V23_J, V23_K, ++ V24, V24_H, V24_J, V24_K, ++ V25, V25_H, V25_J, V25_K, ++ V26, V26_H, V26_J, V26_K, ++ V27, V27_H, V27_J, V27_K, ++ V28, V28_H, V28_J, V28_K, ++ V29, V29_H, V29_J, V29_K, ++ V30, V30_H, V30_J, V30_K, ++ V31, V31_H, V31_J, V31_K ++); + -+ format %{ "" %} ++// Class for 64 bit register f0 ++reg_class f0_reg( ++ F0, F0_H ++); + -+ // the values in interface derives from struct BoolTest::mask -+ interface(COND_INTER) %{ -+ equal(0x0, "eq"); -+ greater(0x1, "gt"); -+ overflow(0x2, "overflow"); -+ less(0x3, "lt"); -+ not_equal(0x4, "ne"); -+ less_equal(0x5, "le"); -+ no_overflow(0x6, "no_overflow"); -+ greater_equal(0x7, "ge"); -+ %} -+%} ++// Class for 64 bit register f1 ++reg_class f1_reg( ++ F1, F1_H ++); + -+// used for unsigned integral comparisons -+operand cmpOpU() -+%{ -+ match(Bool); ++// Class for 64 bit register f2 ++reg_class f2_reg( ++ F2, F2_H ++); + -+ format %{ "" %} -+ // the values in interface derives from struct BoolTest::mask -+ interface(COND_INTER) %{ -+ equal(0x0, "eq"); -+ greater(0x1, "gtu"); -+ overflow(0x2, "overflow"); -+ less(0x3, "ltu"); -+ not_equal(0x4, "ne"); -+ less_equal(0x5, "leu"); -+ no_overflow(0x6, "no_overflow"); -+ greater_equal(0x7, "geu"); -+ %} -+%} ++// Class for 64 bit register f3 ++reg_class f3_reg( ++ F3, F3_H ++); + -+// used for certain integral comparisons which can be -+// converted to bxx instructions -+operand cmpOpEqNe() -+%{ -+ match(Bool); -+ op_cost(0); -+ predicate(n->as_Bool()->_test._test == BoolTest::ne || -+ n->as_Bool()->_test._test == BoolTest::eq); ++// class for vector register v1 ++reg_class v1_reg( ++ V1, V1_H, V1_J, V1_K ++); + -+ format %{ "" %} -+ interface(COND_INTER) %{ -+ equal(0x0, "eq"); -+ greater(0x1, "gt"); -+ overflow(0x2, "overflow"); -+ less(0x3, "lt"); -+ not_equal(0x4, "ne"); -+ less_equal(0x5, "le"); -+ no_overflow(0x6, "no_overflow"); -+ greater_equal(0x7, "ge"); -+ %} -+%} ++// class for vector register v2 ++reg_class v2_reg( ++ V2, V2_H, V2_J, V2_K ++); + -+operand cmpOpULtGe() -+%{ -+ match(Bool); -+ op_cost(0); -+ predicate(n->as_Bool()->_test._test == BoolTest::lt || -+ n->as_Bool()->_test._test == BoolTest::ge); ++// class for vector register v3 ++reg_class v3_reg( ++ V3, V3_H, V3_J, V3_K ++); + -+ format %{ "" %} -+ interface(COND_INTER) %{ -+ equal(0x0, "eq"); -+ greater(0x1, "gt"); -+ overflow(0x2, "overflow"); -+ less(0x3, "lt"); -+ not_equal(0x4, "ne"); -+ less_equal(0x5, "le"); -+ no_overflow(0x6, "no_overflow"); -+ greater_equal(0x7, "ge"); -+ %} -+%} ++// class for vector register v4 ++reg_class v4_reg( ++ V4, V4_H, V4_J, V4_K ++); + -+operand cmpOpUEqNeLeGt() -+%{ -+ match(Bool); -+ op_cost(0); -+ predicate(n->as_Bool()->_test._test == BoolTest::ne || -+ n->as_Bool()->_test._test == BoolTest::eq || -+ n->as_Bool()->_test._test == BoolTest::le || -+ n->as_Bool()->_test._test == BoolTest::gt); ++// class for vector register v5 ++reg_class v5_reg( ++ V5, V5_H, V5_J, V5_K ++); + -+ format %{ "" %} -+ interface(COND_INTER) %{ -+ equal(0x0, "eq"); -+ greater(0x1, "gt"); -+ overflow(0x2, "overflow"); -+ less(0x3, "lt"); -+ not_equal(0x4, "ne"); -+ less_equal(0x5, "le"); -+ no_overflow(0x6, "no_overflow"); -+ greater_equal(0x7, "ge"); -+ %} ++// class for condition codes ++reg_class reg_flags(RFLAGS); +%} + ++//----------DEFINITION BLOCK--------------------------------------------------- ++// Define name --> value mappings to inform the ADLC of an integer valued name ++// Current support includes integer values in the range [0, 0x7FFFFFFF] ++// Format: ++// int_def ( , ); ++// Generated Code in ad_.hpp ++// #define () ++// // value == ++// Generated code in ad_.cpp adlc_verification() ++// assert( == , "Expect () to equal "); ++// + -+// Flags register, used as output of compare logic -+operand rFlagsReg() -+%{ -+ constraint(ALLOC_IN_RC(reg_flags)); -+ match(RegFlags); ++// we follow the ppc-aix port in using a simple cost model which ranks ++// register operations as cheap, memory ops as more expensive and ++// branches as most expensive. the first two have a low as well as a ++// normal cost. huge cost appears to be a way of saying don't do ++// something + -+ op_cost(0); -+ format %{ "RFLAGS" %} -+ interface(REG_INTER); ++definitions %{ ++ // The default cost (of a register move instruction). ++ int_def DEFAULT_COST ( 100, 100); ++ int_def ALU_COST ( 100, 1 * DEFAULT_COST); // unknown, const, arith, shift, slt, ++ // multi, auipc, nop, logical, move ++ int_def LOAD_COST ( 300, 3 * DEFAULT_COST); // load, fpload ++ int_def STORE_COST ( 100, 1 * DEFAULT_COST); // store, fpstore ++ int_def XFER_COST ( 300, 3 * DEFAULT_COST); // mfc, mtc, fcvt, fmove, fcmp ++ int_def BRANCH_COST ( 100, 1 * DEFAULT_COST); // branch, jmp, call ++ int_def IMUL_COST ( 1000, 10 * DEFAULT_COST); // imul ++ int_def IDIVSI_COST ( 3400, 34 * DEFAULT_COST); // idivdi ++ int_def IDIVDI_COST ( 6600, 66 * DEFAULT_COST); // idivsi ++ int_def FMUL_SINGLE_COST ( 500, 5 * DEFAULT_COST); // fadd, fmul, fmadd ++ int_def FMUL_DOUBLE_COST ( 700, 7 * DEFAULT_COST); // fadd, fmul, fmadd ++ int_def FDIV_COST ( 2000, 20 * DEFAULT_COST); // fdiv ++ int_def FSQRT_COST ( 2500, 25 * DEFAULT_COST); // fsqrt ++ int_def VOLATILE_REF_COST ( 1000, 10 * DEFAULT_COST); +%} + -+// Special Registers + -+// Method Register -+operand inline_cache_RegP(iRegP reg) -+%{ -+ constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg -+ match(reg); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} + -+//----------OPERAND CLASSES---------------------------------------------------- -+// Operand Classes are groups of operands that are used as to simplify -+// instruction definitions by not requiring the AD writer to specify -+// separate instructions for every form of operand when the -+// instruction accepts multiple operand types with the same basic -+// encoding and format. The classic case of this is memory operands. ++//----------SOURCE BLOCK------------------------------------------------------- ++// This is a block of C++ code which provides values, functions, and ++// definitions necessary in the rest of the architecture description + -+// memory is used to define read/write location for load/store -+// instruction defs. we can turn a memory op into an Address ++source_hpp %{ + -+opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN); -+ -+// iRegIorL2I is used for src inputs in rules for 32 bit int (I) -+// operations. it allows the src to be either an iRegI or a (ConvL2I -+// iRegL). in the latter case the l2i normally planted for a ConvL2I -+// can be elided because the 32-bit instruction will just employ the -+// lower 32 bits anyway. -+// -+// n.b. this does not elide all L2I conversions. if the truncated -+// value is consumed by more than one operation then the ConvL2I -+// cannot be bundled into the consuming nodes so an l2i gets planted -+// (actually a mvw $dst $src) and the downstream instructions consume -+// the result of the l2i as an iRegI input. That's a shame since the -+// mvw is actually redundant but its not too costly. ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "opto/addnode.hpp" ++#include "opto/convertnode.hpp" + -+opclass iRegIorL2I(iRegI, iRegL2I); -+opclass iRegIorL(iRegI, iRegL); -+opclass iRegNorP(iRegN, iRegP); -+opclass iRegILNP(iRegI, iRegL, iRegN, iRegP); -+opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp); -+opclass immIorL(immI, immL); ++extern RegMask _ANY_REG32_mask; ++extern RegMask _ANY_REG_mask; ++extern RegMask _PTR_REG_mask; ++extern RegMask _NO_SPECIAL_REG32_mask; ++extern RegMask _NO_SPECIAL_REG_mask; ++extern RegMask _NO_SPECIAL_PTR_REG_mask; + -+//----------PIPELINE----------------------------------------------------------- -+// Rules which define the behavior of the target architectures pipeline. ++class CallStubImpl { + -+// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline -+//pipe_desc(ID, EX, MEM, WR); -+#define ID S0 -+#define EX S1 -+#define MEM S2 -+#define WR S3 ++ //-------------------------------------------------------------- ++ //---< Used for optimization in Compile::shorten_branches >--- ++ //-------------------------------------------------------------- + -+// Integer ALU reg operation -+pipeline %{ ++ public: ++ // Size of call trampoline stub. ++ static uint size_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } + -+attributes %{ -+ // RISC-V instructions are of fixed length -+ fixed_size_instructions; // Fixed size instructions TODO does -+ max_instructions_per_bundle = 2; // Generic RISC-V 1, Sifive Series 7 2 -+ // RISC-V instructions come in 32-bit word units -+ instruction_unit_size = 4; // An instruction is 4 bytes long -+ instruction_fetch_unit_size = 64; // The processor fetches one line -+ instruction_fetch_units = 1; // of 64 bytes ++ // number of relocations needed by a call trampoline stub ++ static uint reloc_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++}; + -+ // List of nop instructions -+ nops( MachNop ); -+%} ++class HandlerImpl { + -+// We don't use an actual pipeline model so don't care about resources -+// or description. we do use pipeline classes to introduce fixed -+// latencies ++ public: + -+//----------RESOURCES---------------------------------------------------------- -+// Resources are the functional units available to the machine ++ static int emit_exception_handler(CodeBuffer &cbuf); ++ static int emit_deopt_handler(CodeBuffer& cbuf); + -+// Generic RISC-V pipeline -+// 1 decoder -+// 1 instruction decoded per cycle -+// 1 load/store ops per cycle, 1 branch, 1 FPU -+// 1 mul, 1 div ++ static uint size_exception_handler() { ++ return MacroAssembler::far_branch_size(); ++ } + -+resources ( DECODE, -+ ALU, -+ MUL, -+ DIV, -+ BRANCH, -+ LDST, -+ FPU); ++ static uint size_deopt_handler() { ++ // count auipc + far branch ++ return NativeInstruction::instruction_size + MacroAssembler::far_branch_size(); ++ } ++}; + -+//----------PIPELINE DESCRIPTION----------------------------------------------- -+// Pipeline Description specifies the stages in the machine's pipeline ++class Node::PD { ++public: ++ enum NodeFlags { ++ _last_flag = Node::_last_flag ++ }; ++}; + -+// Define the pipeline as a generic 6 stage pipeline -+pipe_desc(S0, S1, S2, S3, S4, S5); ++bool is_CAS(int opcode, bool maybe_volatile); + -+//----------PIPELINE CLASSES--------------------------------------------------- -+// Pipeline Classes describe the stages in which input and output are -+// referenced by the hardware pipeline. ++// predicate controlling translation of CompareAndSwapX ++bool needs_acquiring_load_reserved(const Node *load); + -+pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2) -+%{ -+ single_instruction; -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++// predicate controlling addressing modes ++bool size_fits_all_mem_uses(AddPNode* addp, int shift); +%} + -+pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2) -+%{ -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++source %{ + -+pipe_class fp_uop_s(fRegF dst, fRegF src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++// Derived RegMask with conditionally allocatable registers + -+pipe_class fp_uop_d(fRegD dst, fRegD src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++RegMask _ANY_REG32_mask; ++RegMask _ANY_REG_mask; ++RegMask _PTR_REG_mask; ++RegMask _NO_SPECIAL_REG32_mask; ++RegMask _NO_SPECIAL_REG_mask; ++RegMask _NO_SPECIAL_PTR_REG_mask; + -+pipe_class fp_d2f(fRegF dst, fRegD src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++void reg_mask_init() { + -+pipe_class fp_f2d(fRegD dst, fRegF src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ _ANY_REG32_mask = _ALL_REG32_mask; ++ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg())); + -+pipe_class fp_f2i(iRegINoSp dst, fRegF src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ _ANY_REG_mask = _ALL_REG_mask; ++ _ANY_REG_mask.SUBTRACT(_ZR_REG_mask); + -+pipe_class fp_f2l(iRegLNoSp dst, fRegF src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ _PTR_REG_mask = _ALL_REG_mask; ++ _PTR_REG_mask.SUBTRACT(_ZR_REG_mask); + -+pipe_class fp_i2f(fRegF dst, iRegIorL2I src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ _NO_SPECIAL_REG32_mask = _ALL_REG32_mask; ++ _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask); + -+pipe_class fp_l2f(fRegF dst, iRegL src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ _NO_SPECIAL_REG_mask = _ALL_REG_mask; ++ _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); + -+pipe_class fp_d2i(iRegINoSp dst, fRegD src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask; ++ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); + -+pipe_class fp_d2l(iRegLNoSp dst, fRegD src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ // x27 is not allocatable when compressed oops is on ++ if (UseCompressedOops) { ++ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg())); ++ _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); ++ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); ++ } + -+pipe_class fp_i2d(fRegD dst, iRegIorL2I src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ // x8 is not allocatable when PreserveFramePointer is on ++ if (PreserveFramePointer) { ++ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg())); ++ _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask); ++ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask); ++ } ++} + -+pipe_class fp_l2d(fRegD dst, iRegIorL2I src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++void PhaseOutput::pd_perform_mach_node_analysis() { ++} + -+pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2) -+%{ -+ single_instruction; -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++int MachNode::pd_alignment_required() const { ++ return 1; ++} + -+pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2) -+%{ -+ single_instruction; -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++int MachNode::compute_padding(int current_offset) const { ++ return 0; ++} + -+pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2) -+%{ -+ single_instruction; -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++// is_CAS(int opcode, bool maybe_volatile) ++// ++// return true if opcode is one of the possible CompareAndSwapX ++// values otherwise false. ++bool is_CAS(int opcode, bool maybe_volatile) ++{ ++ switch (opcode) { ++ // We handle these ++ case Op_CompareAndSwapI: ++ case Op_CompareAndSwapL: ++ case Op_CompareAndSwapP: ++ case Op_CompareAndSwapN: ++ case Op_ShenandoahCompareAndSwapP: ++ case Op_ShenandoahCompareAndSwapN: ++ case Op_CompareAndSwapB: ++ case Op_CompareAndSwapS: ++ case Op_GetAndSetI: ++ case Op_GetAndSetL: ++ case Op_GetAndSetP: ++ case Op_GetAndSetN: ++ case Op_GetAndAddI: ++ case Op_GetAndAddL: ++ return true; ++ case Op_CompareAndExchangeI: ++ case Op_CompareAndExchangeN: ++ case Op_CompareAndExchangeB: ++ case Op_CompareAndExchangeS: ++ case Op_CompareAndExchangeL: ++ case Op_CompareAndExchangeP: ++ case Op_WeakCompareAndSwapB: ++ case Op_WeakCompareAndSwapS: ++ case Op_WeakCompareAndSwapI: ++ case Op_WeakCompareAndSwapL: ++ case Op_WeakCompareAndSwapP: ++ case Op_WeakCompareAndSwapN: ++ case Op_ShenandoahWeakCompareAndSwapP: ++ case Op_ShenandoahWeakCompareAndSwapN: ++ case Op_ShenandoahCompareAndExchangeP: ++ case Op_ShenandoahCompareAndExchangeN: ++ return maybe_volatile; ++ default: ++ return false; ++ } ++} + -+pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2) -+%{ -+ single_instruction; -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++// predicate controlling translation of CAS ++// ++// returns true if CAS needs to use an acquiring load otherwise false ++bool needs_acquiring_load_reserved(const Node *n) ++{ ++ assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap"); + -+pipe_class fp_load_constant_s(fRegF dst) -+%{ -+ single_instruction; -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ LoadStoreNode* ldst = n->as_LoadStore(); ++ if (n != NULL && is_CAS(n->Opcode(), false)) { ++ assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar"); ++ } else { ++ return ldst != NULL && ldst->trailing_membar() != NULL; ++ } ++ // so we can just return true here ++ return true; ++} ++#define __ _masm. + -+pipe_class fp_load_constant_d(fRegD dst) -+%{ -+ single_instruction; -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++// advance declarations for helper functions to convert register ++// indices to register objects + -+pipe_class fp_load_mem_s(fRegF dst, memory mem) -+%{ -+ single_instruction; -+ mem : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ LDST : MEM; -+%} ++// the ad file has to provide implementations of certain methods ++// expected by the generic code ++// ++// REQUIRED FUNCTIONALITY + -+pipe_class fp_load_mem_d(fRegD dst, memory mem) -+%{ -+ single_instruction; -+ mem : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ LDST : MEM; -+%} ++//============================================================================= + -+pipe_class fp_store_reg_s(fRegF src, memory mem) -+%{ -+ single_instruction; -+ src : S1(read); -+ mem : S5(write); -+ DECODE : ID; -+ LDST : MEM; -+%} ++// !!!!! Special hack to get all types of calls to specify the byte offset ++// from the start of the call to the point where the return address ++// will point. + -+pipe_class fp_store_reg_d(fRegD src, memory mem) -+%{ -+ single_instruction; -+ src : S1(read); -+ mem : S5(write); -+ DECODE : ID; -+ LDST : MEM; -+%} ++int MachCallStaticJavaNode::ret_addr_offset() ++{ ++ // jal ++ return 1 * NativeInstruction::instruction_size; ++} + -+//------- Integer ALU operations -------------------------- ++int MachCallDynamicJavaNode::ret_addr_offset() ++{ ++ return 7 * NativeInstruction::instruction_size; // movptr, jal ++} + -+// Integer ALU reg-reg operation -+// Operands needs in ID, result generated in EX -+// E.g. ADD Rd, Rs1, Rs2 -+pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) -+%{ -+ single_instruction; -+ dst : EX(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ ALU : EX; -+%} ++int MachCallRuntimeNode::ret_addr_offset() { ++ // for generated stubs the call will be ++ // jal(addr) ++ // or with far branches ++ // jal(trampoline_stub) ++ // for real runtime callouts it will be 11 instructions ++ // see riscv_enc_java_to_runtime ++ // la(t1, retaddr) -> auipc + addi ++ // la(t0, RuntimeAddress(addr)) -> lui + addi + slli + addi + slli + addi ++ // addi(sp, sp, -2 * wordSize) -> addi ++ // sd(t1, Address(sp, wordSize)) -> sd ++ // jalr(t0) -> jalr ++ CodeBlob *cb = CodeCache::find_blob(_entry_point); ++ if (cb != NULL) { ++ return 1 * NativeInstruction::instruction_size; ++ } else { ++ return 11 * NativeInstruction::instruction_size; ++ } ++} + -+// Integer ALU reg operation with constant shift -+// E.g. SLLI Rd, Rs1, #shift -+pipe_class ialu_reg_shift(iRegI dst, iRegI src1) -+%{ -+ single_instruction; -+ dst : EX(write); -+ src1 : ID(read); -+ DECODE : ID; -+ ALU : EX; -+%} ++int MachCallNativeNode::ret_addr_offset() { ++ Unimplemented(); ++ return -1; ++} + -+// Integer ALU reg-reg operation with variable shift -+// both operands must be available in ID -+// E.g. SLL Rd, Rs1, Rs2 -+pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2) -+%{ -+ single_instruction; -+ dst : EX(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ ALU : EX; -+%} ++// ++// Compute padding required for nodes which need alignment ++// + -+// Integer ALU reg operation -+// E.g. NEG Rd, Rs2 -+pipe_class ialu_reg(iRegI dst, iRegI src) -+%{ -+ single_instruction; -+ dst : EX(write); -+ src : ID(read); -+ DECODE : ID; -+ ALU : EX; -+%} ++// With RVC a call instruction may get 2-byte aligned. ++// The address of the call instruction needs to be 4-byte aligned to ++// ensure that it does not span a cache line so that it can be patched. ++int CallStaticJavaDirectNode::compute_padding(int current_offset) const ++{ ++ // to make sure the address of jal 4-byte aligned. ++ return align_up(current_offset, alignment_required()) - current_offset; ++} + -+// Integer ALU reg immediate operation -+// E.g. ADDI Rd, Rs1, #imm -+pipe_class ialu_reg_imm(iRegI dst, iRegI src1) -+%{ -+ single_instruction; -+ dst : EX(write); -+ src1 : ID(read); -+ DECODE : ID; -+ ALU : EX; -+%} ++// With RVC a call instruction may get 2-byte aligned. ++// The address of the call instruction needs to be 4-byte aligned to ++// ensure that it does not span a cache line so that it can be patched. ++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const ++{ ++ // skip the movptr in MacroAssembler::ic_call(): ++ // lui + addi + slli + addi + slli + addi ++ // Though movptr() has already 4-byte aligned with or without RVC, ++ // We need to prevent from further changes by explicitly calculating the size. ++ const int movptr_size = 6 * NativeInstruction::instruction_size; ++ current_offset += movptr_size; ++ // to make sure the address of jal 4-byte aligned. ++ return align_up(current_offset, alignment_required()) - current_offset; ++} + -+// Integer ALU immediate operation (no source operands) -+// E.g. LI Rd, #imm -+pipe_class ialu_imm(iRegI dst) -+%{ -+ single_instruction; -+ dst : EX(write); -+ DECODE : ID; -+ ALU : EX; -+%} ++//============================================================================= + -+//------- Multiply pipeline operations -------------------- ++#ifndef PRODUCT ++void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const { ++ assert_cond(st != NULL); ++ st->print("BREAKPOINT"); ++} ++#endif + -+// Multiply reg-reg -+// E.g. MULW Rd, Rs1, Rs2 -+pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) -+%{ -+ single_instruction; -+ dst : WR(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ MUL : WR; -+%} ++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ C2_MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ebreak(); ++} + -+// E.g. MUL RD, Rs1, Rs2 -+pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2) -+%{ -+ single_instruction; -+ fixed_latency(3); // Maximum latency for 64 bit mul -+ dst : WR(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ MUL : WR; -+%} ++uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} + -+//------- Divide pipeline operations -------------------- ++//============================================================================= + -+// E.g. DIVW Rd, Rs1, Rs2 -+pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) -+%{ -+ single_instruction; -+ fixed_latency(8); // Maximum latency for 32 bit divide -+ dst : WR(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ DIV : WR; -+%} -+ -+// E.g. DIV RD, Rs1, Rs2 -+pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) -+%{ -+ single_instruction; -+ fixed_latency(16); // Maximum latency for 64 bit divide -+ dst : WR(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ DIV : WR; -+%} -+ -+//------- Load pipeline operations ------------------------ -+ -+// Load - reg, mem -+// E.g. LA Rd, mem -+pipe_class iload_reg_mem(iRegI dst, memory mem) -+%{ -+ single_instruction; -+ dst : WR(write); -+ mem : ID(read); -+ DECODE : ID; -+ LDST : MEM; -+%} -+ -+// Load - reg, reg -+// E.g. LD Rd, Rs -+pipe_class iload_reg_reg(iRegI dst, iRegI src) -+%{ -+ single_instruction; -+ dst : WR(write); -+ src : ID(read); -+ DECODE : ID; -+ LDST : MEM; -+%} -+ -+//------- Store pipeline operations ----------------------- -+ -+// Store - zr, mem -+// E.g. SD zr, mem -+pipe_class istore_mem(memory mem) -+%{ -+ single_instruction; -+ mem : ID(read); -+ DECODE : ID; -+ LDST : MEM; -+%} -+ -+// Store - reg, mem -+// E.g. SD Rs, mem -+pipe_class istore_reg_mem(iRegI src, memory mem) -+%{ -+ single_instruction; -+ mem : ID(read); -+ src : EX(read); -+ DECODE : ID; -+ LDST : MEM; -+%} -+ -+// Store - reg, reg -+// E.g. SD Rs2, Rs1 -+pipe_class istore_reg_reg(iRegI dst, iRegI src) -+%{ -+ single_instruction; -+ dst : ID(read); -+ src : EX(read); -+ DECODE : ID; -+ LDST : MEM; -+%} -+ -+//------- Store pipeline operations ----------------------- -+ -+// Branch -+pipe_class pipe_branch() -+%{ -+ single_instruction; -+ DECODE : ID; -+ BRANCH : EX; -+%} -+ -+// Branch -+pipe_class pipe_branch_reg(iRegI src) -+%{ -+ single_instruction; -+ src : ID(read); -+ DECODE : ID; -+ BRANCH : EX; -+%} -+ -+// Compare & Branch -+// E.g. BEQ Rs1, Rs2, L -+pipe_class pipe_cmp_branch(iRegI src1, iRegI src2) -+%{ -+ single_instruction; -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ BRANCH : EX; -+%} -+ -+// E.g. BEQZ Rs, L -+pipe_class pipe_cmpz_branch(iRegI src) -+%{ -+ single_instruction; -+ src : ID(read); -+ DECODE : ID; -+ BRANCH : EX; -+%} -+ -+//------- Synchronisation operations ---------------------- -+// Any operation requiring serialization -+// E.g. FENCE/Atomic Ops/Load Acquire/Store Release -+pipe_class pipe_serial() -+%{ -+ single_instruction; -+ force_serialization; -+ fixed_latency(16); -+ DECODE : ID; -+ LDST : MEM; -+%} -+ -+pipe_class pipe_slow() -+%{ -+ instruction_count(10); -+ multiple_bundles; -+ force_serialization; -+ fixed_latency(16); -+ DECODE : ID; -+ LDST : MEM; -+%} -+ -+// Empty pipeline class -+pipe_class pipe_class_empty() -+%{ -+ single_instruction; -+ fixed_latency(0); -+%} -+ -+// Default pipeline class. -+pipe_class pipe_class_default() -+%{ -+ single_instruction; -+ fixed_latency(2); -+%} -+ -+// Pipeline class for compares. -+pipe_class pipe_class_compare() -+%{ -+ single_instruction; -+ fixed_latency(16); -+%} -+ -+// Pipeline class for memory operations. -+pipe_class pipe_class_memory() -+%{ -+ single_instruction; -+ fixed_latency(16); -+%} ++#ifndef PRODUCT ++ void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { ++ st->print("nop \t# %d bytes pad for loops and calls", _count); ++ } ++#endif + -+// Pipeline class for call. -+pipe_class pipe_class_call() -+%{ -+ single_instruction; -+ fixed_latency(100); -+%} ++ void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { ++ C2_MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes. ++ for (int i = 0; i < _count; i++) { ++ __ nop(); ++ } ++ } + -+// Define the class for the Nop node. -+define %{ -+ MachNop = pipe_class_empty; -+%} -+%} -+//----------INSTRUCTIONS------------------------------------------------------- -+// -+// match -- States which machine-independent subtree may be replaced -+// by this instruction. -+// ins_cost -- The estimated cost of this instruction is used by instruction -+// selection to identify a minimum cost tree of machine -+// instructions that matches a tree of machine-independent -+// instructions. -+// format -- A string providing the disassembly for this instruction. -+// The value of an instruction's operand may be inserted -+// by referring to it with a '$' prefix. -+// opcode -- Three instruction opcodes may be provided. These are referred -+// to within an encode class as $primary, $secondary, and $tertiary -+// rrspectively. The primary opcode is commonly used to -+// indicate the type of machine instruction, while secondary -+// and tertiary are often used for prefix options or addressing -+// modes. -+// ins_encode -- A list of encode classes with parameters. The encode class -+// name must have been defined in an 'enc_class' specification -+// in the encode section of the architecture description. ++ uint MachNopNode::size(PhaseRegAlloc*) const { ++ return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size); ++ } + -+// ============================================================================ -+// Memory (Load/Store) Instructions ++//============================================================================= ++const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; + -+// Load Instructions ++int ConstantTable::calculate_table_base_offset() const { ++ return 0; // absolute addressing, no offset ++} + -+// Load Byte (8 bit signed) -+instruct loadB(iRegINoSp dst, memory mem) -+%{ -+ match(Set dst (LoadB mem)); ++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } ++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { ++ ShouldNotReachHere(); ++} + -+ ins_cost(LOAD_COST); -+ format %{ "lb $dst, $mem\t# byte, #@loadB" %} ++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { ++ // Empty encoding ++} + -+ ins_encode %{ -+ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { ++ return 0; ++} + -+ ins_pipe(iload_reg_mem); -+%} ++#ifndef PRODUCT ++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { ++ assert_cond(st != NULL); ++ st->print("-- \t// MachConstantBaseNode (empty encoding)"); ++} ++#endif + -+// Load Byte (8 bit signed) into long -+instruct loadB2L(iRegLNoSp dst, memory mem) -+%{ -+ match(Set dst (ConvI2L (LoadB mem))); ++#ifndef PRODUCT ++void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { ++ assert_cond(st != NULL && ra_ != NULL); ++ Compile* C = ra_->C; + -+ ins_cost(LOAD_COST); -+ format %{ "lb $dst, $mem\t# byte, #@loadB2L" %} ++ int framesize = C->output()->frame_slots() << LogBytesPerInt; + -+ ins_encode %{ -+ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ if (C->output()->need_stack_bang(framesize)) { ++ st->print("# stack bang size=%d\n\t", framesize); ++ } + -+ ins_pipe(iload_reg_mem); -+%} ++ st->print("sd fp, [sp, #%d]\n\t", - 2 * wordSize); ++ st->print("sd ra, [sp, #%d]\n\t", - wordSize); ++ if (PreserveFramePointer) { st->print("sub fp, sp, #%d\n\t", 2 * wordSize); } ++ st->print("sub sp, sp, #%d\n\t", framesize); + -+// Load Byte (8 bit unsigned) -+instruct loadUB(iRegINoSp dst, memory mem) -+%{ -+ match(Set dst (LoadUB mem)); ++ if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) { ++ st->print("ld t0, [guard]\n\t"); ++ st->print("membar LoadLoad\n\t"); ++ st->print("ld t1, [xthread, #thread_disarmed_offset]\n\t"); ++ st->print("beq t0, t1, skip\n\t"); ++ st->print("jalr #nmethod_entry_barrier_stub\n\t"); ++ st->print("j skip\n\t"); ++ st->print("guard: int\n\t"); ++ st->print("skip:\n\t"); ++ } ++} ++#endif + -+ ins_cost(LOAD_COST); -+ format %{ "lbu $dst, $mem\t# byte, #@loadUB" %} ++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ assert_cond(ra_ != NULL); ++ Compile* C = ra_->C; ++ C2_MacroAssembler _masm(&cbuf); + -+ ins_encode %{ -+ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ // n.b. frame size includes space for return pc and fp ++ const int framesize = C->output()->frame_size_in_bytes(); + -+ ins_pipe(iload_reg_mem); -+%} ++ // insert a nop at the start of the prolog so we can patch in a ++ // branch if we need to invalidate the method later ++ __ nop(); + -+// Load Byte (8 bit unsigned) into long -+instruct loadUB2L(iRegLNoSp dst, memory mem) -+%{ -+ match(Set dst (ConvI2L (LoadUB mem))); ++ assert_cond(C != NULL); + -+ ins_cost(LOAD_COST); -+ format %{ "lbu $dst, $mem\t# byte, #@loadUB2L" %} ++ if (C->clinit_barrier_on_entry()) { ++ assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); + -+ ins_encode %{ -+ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ Label L_skip_barrier; + -+ ins_pipe(iload_reg_mem); -+%} ++ __ mov_metadata(t1, C->method()->holder()->constant_encoding()); ++ __ clinit_barrier(t1, t0, &L_skip_barrier); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); ++ __ bind(L_skip_barrier); ++ } + -+// Load Short (16 bit signed) -+instruct loadS(iRegINoSp dst, memory mem) -+%{ -+ match(Set dst (LoadS mem)); ++ int bangsize = C->output()->bang_size_in_bytes(); ++ if (C->output()->need_stack_bang(bangsize)) { ++ __ generate_stack_overflow_check(bangsize); ++ } + -+ ins_cost(LOAD_COST); -+ format %{ "lh $dst, $mem\t# short, #@loadS" %} ++ __ build_frame(framesize); + -+ ins_encode %{ -+ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ if (C->stub_function() == NULL) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->nmethod_entry_barrier(&_masm); ++ } + -+ ins_pipe(iload_reg_mem); -+%} ++ if (VerifyStackAtCalls) { ++ Unimplemented(); ++ } + -+// Load Short (16 bit signed) into long -+instruct loadS2L(iRegLNoSp dst, memory mem) -+%{ -+ match(Set dst (ConvI2L (LoadS mem))); ++ C->output()->set_frame_complete(cbuf.insts_size()); + -+ ins_cost(LOAD_COST); -+ format %{ "lh $dst, $mem\t# short, #@loadS2L" %} ++ if (C->has_mach_constant_base_node()) { ++ // NOTE: We set the table base offset here because users might be ++ // emitted before MachConstantBaseNode. ++ ConstantTable& constant_table = C->output()->constant_table(); ++ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); ++ } ++} + -+ ins_encode %{ -+ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++uint MachPrologNode::size(PhaseRegAlloc* ra_) const ++{ ++ assert_cond(ra_ != NULL); ++ return MachNode::size(ra_); // too many variables; just compute it ++ // the hard way ++} + -+ ins_pipe(iload_reg_mem); -+%} ++int MachPrologNode::reloc() const ++{ ++ return 0; ++} + -+// Load Char (16 bit unsigned) -+instruct loadUS(iRegINoSp dst, memory mem) -+%{ -+ match(Set dst (LoadUS mem)); ++//============================================================================= + -+ ins_cost(LOAD_COST); -+ format %{ "lhu $dst, $mem\t# short, #@loadUS" %} ++#ifndef PRODUCT ++void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { ++ assert_cond(st != NULL && ra_ != NULL); ++ Compile* C = ra_->C; ++ assert_cond(C != NULL); ++ int framesize = C->output()->frame_size_in_bytes(); + -+ ins_encode %{ -+ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ st->print("# pop frame %d\n\t", framesize); + -+ ins_pipe(iload_reg_mem); -+%} ++ if (framesize == 0) { ++ st->print("ld ra, [sp,#%d]\n\t", (2 * wordSize)); ++ st->print("ld fp, [sp,#%d]\n\t", (3 * wordSize)); ++ st->print("add sp, sp, #%d\n\t", (2 * wordSize)); ++ } else { ++ st->print("add sp, sp, #%d\n\t", framesize); ++ st->print("ld ra, [sp,#%d]\n\t", - 2 * wordSize); ++ st->print("ld fp, [sp,#%d]\n\t", - wordSize); ++ } + -+// Load Short/Char (16 bit unsigned) into long -+instruct loadUS2L(iRegLNoSp dst, memory mem) -+%{ -+ match(Set dst (ConvI2L (LoadUS mem))); ++ if (do_polling() && C->is_method_compilation()) { ++ st->print("# test polling word\n\t"); ++ st->print("ld t0, [xthread,#%d]\n\t", in_bytes(JavaThread::polling_word_offset())); ++ st->print("bgtu sp, t0, #slow_path"); ++ } ++} ++#endif + -+ ins_cost(LOAD_COST); -+ format %{ "lhu $dst, $mem\t# short, #@loadUS2L" %} ++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ assert_cond(ra_ != NULL); ++ Compile* C = ra_->C; ++ C2_MacroAssembler _masm(&cbuf); ++ assert_cond(C != NULL); ++ int framesize = C->output()->frame_size_in_bytes(); + -+ ins_encode %{ -+ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ __ remove_frame(framesize); + -+ ins_pipe(iload_reg_mem); -+%} ++ if (StackReservedPages > 0 && C->has_reserved_stack_access()) { ++ __ reserved_stack_check(); ++ } + -+// Load Integer (32 bit signed) -+instruct loadI(iRegINoSp dst, memory mem) -+%{ -+ match(Set dst (LoadI mem)); ++ if (do_polling() && C->is_method_compilation()) { ++ Label dummy_label; ++ Label* code_stub = &dummy_label; ++ if (!C->output()->in_scratch_emit_size()) { ++ code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset()); ++ } ++ __ relocate(relocInfo::poll_return_type); ++ __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */); ++ } ++} + -+ ins_cost(LOAD_COST); -+ format %{ "lw $dst, $mem\t# int, #@loadI" %} ++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { ++ assert_cond(ra_ != NULL); ++ // Variable size. Determine dynamically. ++ return MachNode::size(ra_); ++} + -+ ins_encode %{ -+ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++int MachEpilogNode::reloc() const { ++ // Return number of relocatable values contained in this instruction. ++ return 1; // 1 for polling page. ++} ++const Pipeline * MachEpilogNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} + -+ ins_pipe(iload_reg_mem); -+%} ++//============================================================================= + -+// Load Integer (32 bit signed) into long -+instruct loadI2L(iRegLNoSp dst, memory mem) -+%{ -+ match(Set dst (ConvI2L (LoadI mem))); ++// Figure out which register class each belongs in: rc_int, rc_float or ++// rc_stack. ++enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack }; + -+ ins_cost(LOAD_COST); -+ format %{ "lw $dst, $mem\t# int, #@loadI2L" %} ++static enum RC rc_class(OptoReg::Name reg) { + -+ ins_encode %{ -+ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ if (reg == OptoReg::Bad) { ++ return rc_bad; ++ } + -+ ins_pipe(iload_reg_mem); -+%} ++ // we have 30 int registers * 2 halves ++ // (t0 and t1 are omitted) ++ int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2); ++ if (reg < slots_of_int_registers) { ++ return rc_int; ++ } + -+// Load Integer (32 bit unsigned) into long -+instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) -+%{ -+ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ++ // we have 32 float register * 2 halves ++ int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers; ++ if (reg < slots_of_int_registers + slots_of_float_registers) { ++ return rc_float; ++ } + -+ ins_cost(LOAD_COST); -+ format %{ "lwu $dst, $mem\t# int, #@loadUI2L" %} ++ // we have 32 vector register * 4 halves ++ int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers; ++ if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) { ++ return rc_vector; ++ } + -+ ins_encode %{ -+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ // Between vector regs & stack is the flags regs. ++ assert(OptoReg::is_stack(reg), "blow up if spilling flags"); + -+ ins_pipe(iload_reg_mem); -+%} ++ return rc_stack; ++} + -+// Load Long (64 bit signed) -+instruct loadL(iRegLNoSp dst, memory mem) -+%{ -+ match(Set dst (LoadL mem)); ++uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const { ++ assert_cond(ra_ != NULL); ++ Compile* C = ra_->C; + -+ ins_cost(LOAD_COST); -+ format %{ "ld $dst, $mem\t# int, #@loadL" %} ++ // Get registers to move. ++ OptoReg::Name src_hi = ra_->get_reg_second(in(1)); ++ OptoReg::Name src_lo = ra_->get_reg_first(in(1)); ++ OptoReg::Name dst_hi = ra_->get_reg_second(this); ++ OptoReg::Name dst_lo = ra_->get_reg_first(this); + -+ ins_encode %{ -+ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ enum RC src_hi_rc = rc_class(src_hi); ++ enum RC src_lo_rc = rc_class(src_lo); ++ enum RC dst_hi_rc = rc_class(dst_hi); ++ enum RC dst_lo_rc = rc_class(dst_lo); + -+ ins_pipe(iload_reg_mem); -+%} ++ assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register"); + -+// Load Range -+instruct loadRange(iRegINoSp dst, memory mem) -+%{ -+ match(Set dst (LoadRange mem)); ++ if (src_hi != OptoReg::Bad) { ++ assert((src_lo & 1) == 0 && src_lo + 1 == src_hi && ++ (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi, ++ "expected aligned-adjacent pairs"); ++ } + -+ ins_cost(LOAD_COST); -+ format %{ "lwu $dst, $mem\t# range, #@loadRange" %} ++ if (src_lo == dst_lo && src_hi == dst_hi) { ++ return 0; // Self copy, no move. ++ } + -+ ins_encode %{ -+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi && ++ (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi; ++ int src_offset = ra_->reg2offset(src_lo); ++ int dst_offset = ra_->reg2offset(dst_lo); + -+ ins_pipe(iload_reg_mem); -+%} ++ if (bottom_type()->isa_vect() != NULL) { ++ uint ireg = ideal_reg(); ++ if (ireg == Op_VecA && cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); ++ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { ++ // stack to stack ++ __ spill_copy_vector_stack_to_stack(src_offset, dst_offset, ++ vector_reg_size_in_bytes); ++ } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) { ++ // vpr to stack ++ __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo)); ++ } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) { ++ // stack to vpr ++ __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo)); ++ } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) { ++ // vpr to vpr ++ __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo])); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } ++ } else if (cbuf != NULL) { ++ C2_MacroAssembler _masm(cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ switch (src_lo_rc) { ++ case rc_int: ++ if (dst_lo_rc == rc_int) { // gpr --> gpr copy ++ if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass ++ __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32); ++ } else { ++ __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo])); ++ } ++ } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy ++ if (is64) { ++ __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ as_Register(Matcher::_regEncode[src_lo])); ++ } else { ++ __ fmv_w_x(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ as_Register(Matcher::_regEncode[src_lo])); ++ } ++ } else { // gpr --> stack spill ++ assert(dst_lo_rc == rc_stack, "spill to bad register class"); ++ __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset); ++ } ++ break; ++ case rc_float: ++ if (dst_lo_rc == rc_int) { // fpr --> gpr copy ++ if (is64) { ++ __ fmv_x_d(as_Register(Matcher::_regEncode[dst_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo])); ++ } else { ++ __ fmv_x_w(as_Register(Matcher::_regEncode[dst_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo])); ++ } ++ } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy ++ if (is64) { ++ __ fmv_d(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo])); ++ } else { ++ __ fmv_s(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo])); ++ } ++ } else { // fpr --> stack spill ++ assert(dst_lo_rc == rc_stack, "spill to bad register class"); ++ __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), ++ is64, dst_offset); ++ } ++ break; ++ case rc_stack: ++ if (dst_lo_rc == rc_int) { // stack --> gpr load ++ if (this->ideal_reg() == Op_RegI) { ++ __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); ++ } else { // // zero extended for narrow oop or klass ++ __ unspillu(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); ++ } ++ } else if (dst_lo_rc == rc_float) { // stack --> fpr load ++ __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ is64, src_offset); ++ } else { // stack --> stack copy ++ assert(dst_lo_rc == rc_stack, "spill to bad register class"); ++ if (this->ideal_reg() == Op_RegI) { ++ __ unspill(t0, is64, src_offset); ++ } else { // zero extended for narrow oop or klass ++ __ unspillu(t0, is64, src_offset); ++ } ++ __ spill(t0, is64, dst_offset); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } + -+// Load Pointer -+instruct loadP(iRegPNoSp dst, memory mem) -+%{ -+ match(Set dst (LoadP mem)); ++ if (st != NULL) { ++ st->print("spill "); ++ if (src_lo_rc == rc_stack) { ++ st->print("[sp, #%d] -> ", src_offset); ++ } else { ++ st->print("%s -> ", Matcher::regName[src_lo]); ++ } ++ if (dst_lo_rc == rc_stack) { ++ st->print("[sp, #%d]", dst_offset); ++ } else { ++ st->print("%s", Matcher::regName[dst_lo]); ++ } ++ if (bottom_type()->isa_vect() != NULL) { ++ int vsize = 0; ++ if (ideal_reg() == Op_VecA) { ++ vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8; ++ } else { ++ ShouldNotReachHere(); ++ } ++ st->print("\t# vector spill size = %d", vsize); ++ } else { ++ st->print("\t# spill size = %d", is64 ? 64 : 32); ++ } ++ } + -+ ins_cost(LOAD_COST); -+ format %{ "ld $dst, $mem\t# ptr, #@loadP" %} ++ return 0; ++} + -+ ins_encode %{ -+ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++#ifndef PRODUCT ++void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const { ++ if (ra_ == NULL) { ++ st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx); ++ } else { ++ implementation(NULL, ra_, false, st); ++ } ++} ++#endif + -+ ins_pipe(iload_reg_mem); -+%} ++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ implementation(&cbuf, ra_, false, NULL); ++} + -+// Load Compressed Pointer -+instruct loadN(iRegNNoSp dst, memory mem) -+%{ -+ match(Set dst (LoadN mem)); ++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} + -+ ins_cost(LOAD_COST); -+ format %{ "lwu $dst, $mem\t# loadN, compressed ptr, #@loadN" %} ++//============================================================================= + -+ ins_encode %{ -+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++#ifndef PRODUCT ++void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const { ++ assert_cond(ra_ != NULL && st != NULL); ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_reg_first(this); ++ st->print("add %s, sp, #%d\t# box lock", ++ Matcher::regName[reg], offset); ++} ++#endif + -+ ins_pipe(iload_reg_mem); -+%} ++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ C2_MacroAssembler _masm(&cbuf); + -+// Load Klass Pointer -+instruct loadKlass(iRegPNoSp dst, memory mem) -+%{ -+ match(Set dst (LoadKlass mem)); ++ assert_cond(ra_ != NULL); ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_encode(this); + -+ ins_cost(LOAD_COST); -+ format %{ "ld $dst, $mem\t# class, #@loadKlass" %} ++ if (is_imm_in_range(offset, 12, 0)) { ++ __ addi(as_Register(reg), sp, offset); ++ } else if (is_imm_in_range(offset, 32, 0)) { ++ __ li32(t0, offset); ++ __ add(as_Register(reg), sp, t0); ++ } else { ++ ShouldNotReachHere(); ++ } ++} + -+ ins_encode %{ -+ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++uint BoxLockNode::size(PhaseRegAlloc *ra_) const { ++ // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_). ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + -+ ins_pipe(iload_reg_mem); -+%} ++ if (is_imm_in_range(offset, 12, 0)) { ++ return NativeInstruction::instruction_size; ++ } else { ++ return 3 * NativeInstruction::instruction_size; // lui + addiw + add; ++ } ++} + -+// Load Narrow Klass Pointer -+instruct loadNKlass(iRegNNoSp dst, memory mem) -+%{ -+ match(Set dst (LoadNKlass mem)); ++//============================================================================= + -+ ins_cost(LOAD_COST); -+ format %{ "lwu $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %} ++#ifndef PRODUCT ++void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const ++{ ++ assert_cond(st != NULL); ++ st->print_cr("# MachUEPNode"); ++ if (UseCompressedClassPointers) { ++ st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); ++ if (CompressedKlassPointers::shift() != 0) { ++ st->print_cr("\tdecode_klass_not_null t0, t0"); ++ } ++ } else { ++ st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); ++ } ++ st->print_cr("\tbeq t0, t1, ic_hit"); ++ st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check"); ++ st->print_cr("\tic_hit:"); ++} ++#endif + -+ ins_encode %{ -+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const ++{ ++ // This is the unverified entry point. ++ C2_MacroAssembler _masm(&cbuf); + -+ ins_pipe(iload_reg_mem); -+%} ++ Label skip; ++ __ cmp_klass(j_rarg0, t1, t0, skip); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); ++ __ bind(skip); ++} + -+// Load Float -+instruct loadF(fRegF dst, memory mem) -+%{ -+ match(Set dst (LoadF mem)); ++uint MachUEPNode::size(PhaseRegAlloc* ra_) const ++{ ++ assert_cond(ra_ != NULL); ++ return MachNode::size(ra_); ++} + -+ ins_cost(LOAD_COST); -+ format %{ "flw $dst, $mem\t# float, #@loadF" %} ++// REQUIRED EMIT CODE + -+ ins_encode %{ -+ __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++//============================================================================= + -+ ins_pipe(fp_load_mem_s); -+%} ++// Emit exception handler code. ++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) ++{ ++ // la_patchable t0, #exception_blob_entry_point ++ // jr (offset)t0 ++ // or ++ // j #exception_blob_entry_point ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ C2_MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_exception_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ int offset = __ offset(); ++ __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); ++ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} + -+// Load Double -+instruct loadD(fRegD dst, memory mem) -+%{ -+ match(Set dst (LoadD mem)); ++// Emit deopt handler code. ++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) ++{ ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ C2_MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_deopt_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ int offset = __ offset(); + -+ ins_cost(LOAD_COST); -+ format %{ "fld $dst, $mem\t# double, #@loadD" %} ++ __ auipc(ra, 0); ++ __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); + -+ ins_encode %{ -+ __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; + -+ ins_pipe(fp_load_mem_d); -+%} ++} ++// REQUIRED MATCHER CODE + -+// Load Int Constant -+instruct loadConI(iRegINoSp dst, immI src) -+%{ -+ match(Set dst src); ++//============================================================================= + -+ ins_cost(ALU_COST); -+ format %{ "li $dst, $src\t# int, #@loadConI" %} ++const bool Matcher::match_rule_supported(int opcode) { ++ if (!has_match_rule(opcode)) { ++ return false; ++ } + -+ ins_encode(riscv_enc_li_imm(dst, src)); ++ switch (opcode) { ++ case Op_CacheWB: // fall through ++ case Op_CacheWBPreSync: // fall through ++ case Op_CacheWBPostSync: ++ if (!VM_Version::supports_data_cache_line_flush()) { ++ return false; ++ } ++ break; + -+ ins_pipe(ialu_imm); -+%} ++ case Op_StrCompressedCopy: // fall through ++ case Op_StrInflatedCopy: // fall through ++ case Op_CountPositives: ++ return UseRVV; + -+// Load Long Constant -+instruct loadConL(iRegLNoSp dst, immL src) -+%{ -+ match(Set dst src); ++ case Op_EncodeISOArray: ++ return UseRVV && SpecialEncodeISOArray; + -+ ins_cost(ALU_COST); -+ format %{ "li $dst, $src\t# long, #@loadConL" %} ++ case Op_PopCountI: ++ case Op_PopCountL: ++ return UsePopCountInstruction; + -+ ins_encode(riscv_enc_li_imm(dst, src)); ++ case Op_RotateRight: ++ case Op_RotateLeft: ++ case Op_CountLeadingZerosI: ++ case Op_CountLeadingZerosL: ++ case Op_CountTrailingZerosI: ++ case Op_CountTrailingZerosL: ++ return UseRVB; ++ } + -+ ins_pipe(ialu_imm); -+%} ++ return true; // Per default match rules are supported. ++} + -+// Load Pointer Constant -+instruct loadConP(iRegPNoSp dst, immP con) -+%{ -+ match(Set dst con); ++// Identify extra cases that we might want to provide match rules for vector nodes and ++// other intrinsics guarded with vector length (vlen) and element type (bt). ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { ++ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { ++ return false; ++ } + -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $con\t# ptr, #@loadConP" %} ++ return op_vec_supported(opcode); ++} + -+ ins_encode(riscv_enc_mov_p(dst, con)); ++const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { ++ return false; ++} + -+ ins_pipe(ialu_imm); -+%} ++const RegMask* Matcher::predicate_reg_mask(void) { ++ return NULL; ++} + -+// Load Null Pointer Constant -+instruct loadConP0(iRegPNoSp dst, immP0 con) -+%{ -+ match(Set dst con); ++const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { ++ return NULL; ++} + -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $con\t# NULL ptr, #@loadConP0" %} ++// Vector calling convention not yet implemented. ++const bool Matcher::supports_vector_calling_convention(void) { ++ return false; ++} + -+ ins_encode(riscv_enc_mov_zero(dst)); ++OptoRegPair Matcher::vector_return_value(uint ideal_reg) { ++ Unimplemented(); ++ return OptoRegPair(0, 0); ++} + -+ ins_pipe(ialu_imm); -+%} ++// Is this branch offset short enough that a short branch can be used? ++// ++// NOTE: If the platform does not provide any short branch variants, then ++// this method should return false for offset 0. ++// |---label(L1)-----| ++// |-----------------| ++// |-----------------|----------eq: float------------------- ++// |-----------------| // far_cmpD_branch | cmpD_branch ++// |------- ---------| feq; | feq; ++// |-far_cmpD_branch-| beqz done; | bnez L; ++// |-----------------| j L; | ++// |-----------------| bind(done); | ++// |-----------------|-------------------------------------- ++// |-----------------| // so shortBrSize = br_size - 4; ++// |-----------------| // so offs = offset - shortBrSize + 4; ++// |---label(L2)-----| ++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { ++ // The passed offset is relative to address of the branch. ++ int shortBrSize = br_size - 4; ++ int offs = offset - shortBrSize + 4; ++ return (-4096 <= offs && offs < 4096); ++} + -+// Load Pointer Constant One -+instruct loadConP1(iRegPNoSp dst, immP_1 con) -+%{ -+ match(Set dst con); ++// Vector width in bytes. ++const int Matcher::vector_width_in_bytes(BasicType bt) { ++ if (UseRVV) { ++ // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV. ++ // MaxVectorSize == VM_Version::_initial_vector_length ++ return MaxVectorSize; ++ } ++ return 0; ++} + -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $con\t# load ptr constant one, #@loadConP1" %} ++// Limits on vector size (number of elements) loaded into vector. ++const int Matcher::max_vector_size(const BasicType bt) { ++ return vector_width_in_bytes(bt) / type2aelembytes(bt); ++} ++const int Matcher::min_vector_size(const BasicType bt) { ++ return max_vector_size(bt); ++} + -+ ins_encode(riscv_enc_mov_p1(dst)); ++// Vector ideal reg. ++const uint Matcher::vector_ideal_reg(int len) { ++ assert(MaxVectorSize >= len, ""); ++ if (UseRVV) { ++ return Op_VecA; ++ } + -+ ins_pipe(ialu_imm); -+%} ++ ShouldNotReachHere(); ++ return 0; ++} + -+// Load Poll Page Constant -+instruct loadConPollPage(iRegPNoSp dst, immPollPage con) -+%{ -+ match(Set dst con); ++const int Matcher::scalable_vector_reg_size(const BasicType bt) { ++ return Matcher::max_vector_size(bt); ++} + -+ ins_cost(ALU_COST * 6); -+ format %{ "movptr $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %} ++MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) { ++ ShouldNotReachHere(); // generic vector operands not supported ++ return NULL; ++} + -+ ins_encode(riscv_enc_mov_poll_page(dst, con)); ++bool Matcher::is_reg2reg_move(MachNode* m) { ++ ShouldNotReachHere(); // generic vector operands not supported ++ return false; ++} + -+ ins_pipe(ialu_imm); -+%} ++bool Matcher::is_generic_vector(MachOper* opnd) { ++ ShouldNotReachHere(); // generic vector operands not supported ++ return false; ++} + -+// Load Byte Map Base Constant -+instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con) -+%{ -+ match(Set dst con); -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $con\t# Byte Map Base, #@loadByteMapBase" %} ++// Return whether or not this register is ever used as an argument. ++// This function is used on startup to build the trampoline stubs in ++// generateOptoStub. Registers not mentioned will be killed by the VM ++// call in the trampoline, and arguments in those registers not be ++// available to the callee. ++bool Matcher::can_be_java_arg(int reg) ++{ ++ return ++ reg == R10_num || reg == R10_H_num || ++ reg == R11_num || reg == R11_H_num || ++ reg == R12_num || reg == R12_H_num || ++ reg == R13_num || reg == R13_H_num || ++ reg == R14_num || reg == R14_H_num || ++ reg == R15_num || reg == R15_H_num || ++ reg == R16_num || reg == R16_H_num || ++ reg == R17_num || reg == R17_H_num || ++ reg == F10_num || reg == F10_H_num || ++ reg == F11_num || reg == F11_H_num || ++ reg == F12_num || reg == F12_H_num || ++ reg == F13_num || reg == F13_H_num || ++ reg == F14_num || reg == F14_H_num || ++ reg == F15_num || reg == F15_H_num || ++ reg == F16_num || reg == F16_H_num || ++ reg == F17_num || reg == F17_H_num; ++} + -+ ins_encode(riscv_enc_mov_byte_map_base(dst)); ++bool Matcher::is_spillable_arg(int reg) ++{ ++ return can_be_java_arg(reg); ++} + -+ ins_pipe(ialu_imm); -+%} ++uint Matcher::int_pressure_limit() ++{ ++ // A derived pointer is live at CallNode and then is flagged by RA ++ // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip ++ // derived pointers and lastly fail to spill after reaching maximum ++ // number of iterations. Lowering the default pressure threshold to ++ // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become ++ // a high register pressure area of the code so that split_DEF can ++ // generate DefinitionSpillCopy for the derived pointer. ++ uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1; ++ if (!PreserveFramePointer) { ++ // When PreserveFramePointer is off, frame pointer is allocatable, ++ // but different from other SOC registers, it is excluded from ++ // fatproj's mask because its save type is No-Save. Decrease 1 to ++ // ensure high pressure at fatproj when PreserveFramePointer is off. ++ // See check_pressure_at_fatproj(). ++ default_int_pressure_threshold--; ++ } ++ return (INTPRESSURE == -1) ? default_int_pressure_threshold : INTPRESSURE; ++} ++ ++uint Matcher::float_pressure_limit() ++{ ++ // _FLOAT_REG_mask is generated by adlc from the float_reg register class. ++ return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE; ++} + -+// Load Narrow Pointer Constant -+instruct loadConN(iRegNNoSp dst, immN con) -+%{ -+ match(Set dst con); ++bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { ++ return false; ++} + -+ ins_cost(ALU_COST * 4); -+ format %{ "mv $dst, $con\t# compressed ptr, #@loadConN" %} ++RegMask Matcher::divI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} + -+ ins_encode(riscv_enc_mov_n(dst, con)); ++// Register for MODI projection of divmodI. ++RegMask Matcher::modI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} + -+ ins_pipe(ialu_imm); -+%} ++// Register for DIVL projection of divmodL. ++RegMask Matcher::divL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} + -+// Load Narrow Null Pointer Constant -+instruct loadConN0(iRegNNoSp dst, immN0 con) -+%{ -+ match(Set dst con); ++// Register for MODL projection of divmodL. ++RegMask Matcher::modL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} + -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $con\t# compressed NULL ptr, #@loadConN0" %} ++const RegMask Matcher::method_handle_invoke_SP_save_mask() { ++ return FP_REG_mask(); ++} + -+ ins_encode(riscv_enc_mov_zero(dst)); ++bool size_fits_all_mem_uses(AddPNode* addp, int shift) { ++ assert_cond(addp != NULL); ++ for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) { ++ Node* u = addp->fast_out(i); ++ if (u != NULL && u->is_Mem()) { ++ int opsize = u->as_Mem()->memory_size(); ++ assert(opsize > 0, "unexpected memory operand size"); ++ if (u->as_Mem()->memory_size() != (1 << shift)) { ++ return false; ++ } ++ } ++ } ++ return true; ++} + -+ ins_pipe(ialu_imm); -+%} ++// Should the Matcher clone input 'm' of node 'n'? ++bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { ++ assert_cond(m != NULL); ++ if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) ++ mstack.push(m, Visit); // m = ShiftCntV ++ return true; ++ } ++ return false; ++} + -+// Load Narrow Klass Constant -+instruct loadConNKlass(iRegNNoSp dst, immNKlass con) -+%{ -+ match(Set dst con); ++// Should the Matcher clone shifts on addressing modes, expecting them ++// to be subsumed into complex addressing expressions or compute them ++// into registers? ++bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { ++ return clone_base_plus_offset_address(m, mstack, address_visited); ++} + -+ ins_cost(ALU_COST * 6); -+ format %{ "mv $dst, $con\t# compressed klass ptr, #@loadConNKlass" %} ++%} + -+ ins_encode(riscv_enc_mov_nk(dst, con)); + -+ ins_pipe(ialu_imm); -+%} + -+// Load Float Constant -+instruct loadConF(fRegF dst, immF con) %{ -+ match(Set dst con); ++//----------ENCODING BLOCK----------------------------------------------------- ++// This block specifies the encoding classes used by the compiler to ++// output byte streams. Encoding classes are parameterized macros ++// used by Machine Instruction Nodes in order to generate the bit ++// encoding of the instruction. Operands specify their base encoding ++// interface with the interface keyword. There are currently ++// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & ++// COND_INTER. REG_INTER causes an operand to generate a function ++// which returns its register number when queried. CONST_INTER causes ++// an operand to generate a function which returns the value of the ++// constant when queried. MEMORY_INTER causes an operand to generate ++// four functions which return the Base Register, the Index Register, ++// the Scale Value, and the Offset Value of the operand when queried. ++// COND_INTER causes an operand to generate six functions which return ++// the encoding code (ie - encoding bits for the instruction) ++// associated with each basic boolean condition for a conditional ++// instruction. ++// ++// Instructions specify two basic values for encoding. Again, a ++// function is available to check if the constant displacement is an ++// oop. They use the ins_encode keyword to specify their encoding ++// classes (which must be a sequence of enc_class names, and their ++// parameters, specified in the encoding block), and they use the ++// opcode keyword to specify, in order, their primary, secondary, and ++// tertiary opcode. Only the opcode sections which a particular ++// instruction needs for encoding need to be specified. ++encode %{ ++ // BEGIN Non-volatile memory access + -+ ins_cost(LOAD_COST); -+ format %{ -+ "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF" ++ enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ int64_t con = (int64_t)$src$$constant; ++ Register dst_reg = as_Register($dst$$reg); ++ __ li(dst_reg, con); + %} + -+ ins_encode %{ -+ __ flw(as_FloatRegister($dst$$reg), $constantaddress($con)); ++ enc_class riscv_enc_mov_p(iRegP dst, immP src) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ address con = (address)$src$$constant; ++ if (con == NULL || con == (address)1) { ++ ShouldNotReachHere(); ++ } else { ++ relocInfo::relocType rtype = $src->constant_reloc(); ++ if (rtype == relocInfo::oop_type) { ++ __ movoop(dst_reg, (jobject)con, /*immediate*/true); ++ } else if (rtype == relocInfo::metadata_type) { ++ __ mov_metadata(dst_reg, (Metadata*)con); ++ } else { ++ assert(rtype == relocInfo::none, "unexpected reloc type"); ++ __ li(dst_reg, $src$$constant); ++ } ++ } + %} + -+ ins_pipe(fp_load_constant_s); -+%} ++ enc_class riscv_enc_mov_p1(iRegP dst) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ Register dst_reg = as_Register($dst$$reg); ++ __ li(dst_reg, 1); ++ %} + -+instruct loadConF0(fRegF dst, immF0 con) %{ -+ match(Set dst con); ++ enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ __ load_byte_map_base($dst$$Register); ++ %} + -+ ins_cost(XFER_COST); ++ enc_class riscv_enc_mov_n(iRegN dst, immN src) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ address con = (address)$src$$constant; ++ if (con == NULL) { ++ ShouldNotReachHere(); ++ } else { ++ relocInfo::relocType rtype = $src->constant_reloc(); ++ assert(rtype == relocInfo::oop_type, "unexpected reloc type"); ++ __ set_narrow_oop(dst_reg, (jobject)con); ++ } ++ %} + -+ format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %} ++ enc_class riscv_enc_mov_zero(iRegNorP dst) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ __ mv(dst_reg, zr); ++ %} + -+ ins_encode %{ -+ __ fmv_w_x(as_FloatRegister($dst$$reg), zr); ++ enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ address con = (address)$src$$constant; ++ if (con == NULL) { ++ ShouldNotReachHere(); ++ } else { ++ relocInfo::relocType rtype = $src->constant_reloc(); ++ assert(rtype == relocInfo::metadata_type, "unexpected reloc type"); ++ __ set_narrow_klass(dst_reg, (Klass *)con); ++ } + %} + -+ ins_pipe(fp_load_constant_s); -+%} ++ enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + -+// Load Double Constant -+instruct loadConD(fRegD dst, immD con) %{ -+ match(Set dst con); ++ enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + -+ ins_cost(LOAD_COST); -+ format %{ -+ "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD" ++ enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); + %} + -+ ins_encode %{ -+ __ fld(as_FloatRegister($dst$$reg), $constantaddress($con)); ++ enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); + %} + -+ ins_pipe(fp_load_constant_d); -+%} ++ enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + -+instruct loadConD0(fRegD dst, immD0 con) %{ -+ match(Set dst con); ++ enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + -+ ins_cost(XFER_COST); ++ // compare and branch instruction encodings + -+ format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %} ++ enc_class riscv_enc_j(label lbl) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Label* L = $lbl$$label; ++ __ j(*L); ++ %} + -+ ins_encode %{ -+ __ fmv_d_x(as_FloatRegister($dst$$reg), zr); ++ enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Label* L = $lbl$$label; ++ switch ($cmp$$cmpcode) { ++ case(BoolTest::ge): ++ __ j(*L); ++ break; ++ case(BoolTest::lt): ++ break; ++ default: ++ Unimplemented(); ++ } + %} + -+ ins_pipe(fp_load_constant_d); -+%} ++ // call instruction encodings + -+// Store Instructions -+// Store CMS card-mark Immediate -+instruct storeimmCM0(immI0 zero, memory mem) -+%{ -+ match(Set mem (StoreCM mem zero)); -+ predicate(unnecessary_storestore(n)); ++ enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{ ++ Register sub_reg = as_Register($sub$$reg); ++ Register super_reg = as_Register($super$$reg); ++ Register temp_reg = as_Register($temp$$reg); ++ Register result_reg = as_Register($result$$reg); ++ Register cr_reg = t1; + -+ ins_cost(STORE_COST); -+ format %{ "storestore (elided)\n\t" -+ "sb zr, $mem\t# byte, #@storeimmCM0" %} ++ Label miss; ++ Label done; ++ C2_MacroAssembler _masm(&cbuf); ++ __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg, ++ NULL, &miss); ++ if ($primary) { ++ __ mv(result_reg, zr); ++ } else { ++ __ mv(cr_reg, zr); ++ __ j(done); ++ } + -+ ins_encode %{ -+ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ __ bind(miss); ++ if (!$primary) { ++ __ li(cr_reg, 1); ++ } + -+ ins_pipe(istore_mem); -+%} ++ __ bind(done); ++ %} + -+// Store CMS card-mark Immediate with intervening StoreStore -+// needed when using CMS with no conditional card marking -+instruct storeimmCM0_ordered(immI0 zero, memory mem) -+%{ -+ match(Set mem (StoreCM mem zero)); ++ enc_class riscv_enc_java_static_call(method meth) %{ ++ C2_MacroAssembler _masm(&cbuf); + -+ ins_cost(ALU_COST + STORE_COST); -+ format %{ "membar(StoreStore)\n\t" -+ "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %} ++ address addr = (address)$meth$$method; ++ address call = NULL; ++ assert_cond(addr != NULL); ++ if (!_method) { ++ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. ++ call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } else { ++ int method_index = resolved_method_index(cbuf); ++ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) ++ : static_call_Relocation::spec(method_index); ++ call = __ trampoline_call(Address(addr, rspec), &cbuf); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } + -+ ins_encode %{ -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); ++ // Emit stub for static call ++ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); ++ if (stub == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } + %} + -+ ins_pipe(istore_mem); -+%} -+ -+// Store Byte -+instruct storeB(iRegIorL2I src, memory mem) -+%{ -+ match(Set mem (StoreB mem src)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sb $src, $mem\t# byte, #@storeB" %} ++ enc_class riscv_enc_java_dynamic_call(method meth) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ int method_index = resolved_method_index(cbuf); ++ address call = __ ic_call((address)$meth$$method, method_index); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} + -+ ins_encode %{ -+ __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ enc_class riscv_enc_call_epilog() %{ ++ C2_MacroAssembler _masm(&cbuf); ++ if (VerifyStackAtCalls) { ++ // Check that stack depth is unchanged: find majik cookie on stack ++ __ call_Unimplemented(); ++ } + %} + -+ ins_pipe(istore_reg_mem); -+%} ++ enc_class riscv_enc_java_to_runtime(method meth) %{ ++ C2_MacroAssembler _masm(&cbuf); + -+instruct storeimmB0(immI0 zero, memory mem) -+%{ -+ match(Set mem (StoreB mem zero)); ++ // some calls to generated routines (arraycopy code) are scheduled ++ // by C2 as runtime calls. if so we can call them using a jr (they ++ // will be in a reachable segment) otherwise we have to use a jalr ++ // which loads the absolute address into a register. ++ address entry = (address)$meth$$method; ++ CodeBlob *cb = CodeCache::find_blob(entry); ++ if (cb != NULL) { ++ address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type)); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } else { ++ Label retaddr; ++ __ la(t1, retaddr); ++ __ la(t0, RuntimeAddress(entry)); ++ // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc() ++ __ addi(sp, sp, -2 * wordSize); ++ __ sd(t1, Address(sp, wordSize)); ++ __ jalr(t0); ++ __ bind(retaddr); ++ __ addi(sp, sp, 2 * wordSize); ++ } ++ %} + -+ ins_cost(STORE_COST); -+ format %{ "sb zr, $mem\t# byte, #@storeimmB0" %} ++ // using the cr register as the bool result: 0 for success; others failed. ++ enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Register flag = t1; ++ Register oop = as_Register($object$$reg); ++ Register box = as_Register($box$$reg); ++ Register disp_hdr = as_Register($tmp1$$reg); ++ Register tmp = as_Register($tmp2$$reg); ++ Label cont; ++ Label object_has_monitor; + -+ ins_encode %{ -+ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ assert_different_registers(oop, box, tmp, disp_hdr, t0); + -+ ins_pipe(istore_mem); -+%} ++ // Load markWord from object into displaced_header. ++ __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); + -+// Store Char/Short -+instruct storeC(iRegIorL2I src, memory mem) -+%{ -+ match(Set mem (StoreC mem src)); ++ if (DiagnoseSyncOnValueBasedClasses != 0) { ++ __ load_klass(flag, oop); ++ __ lwu(flag, Address(flag, Klass::access_flags_offset())); ++ __ andi(flag, flag, JVM_ACC_IS_VALUE_BASED_CLASS, tmp /* tmp */); ++ __ bnez(flag, cont, true /* is_far */); ++ } + -+ ins_cost(STORE_COST); -+ format %{ "sh $src, $mem\t# short, #@storeC" %} ++ // Check for existing monitor ++ __ andi(t0, disp_hdr, markWord::monitor_value); ++ __ bnez(t0, object_has_monitor); + -+ ins_encode %{ -+ __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ if (!UseHeavyMonitors) { ++ // Set tmp to be (markWord of object | UNLOCK_VALUE). ++ __ ori(tmp, disp_hdr, markWord::unlocked_value); + -+ ins_pipe(istore_reg_mem); -+%} ++ // Initialize the box. (Must happen before we update the object mark!) ++ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + -+instruct storeimmC0(immI0 zero, memory mem) -+%{ -+ match(Set mem (StoreC mem zero)); ++ // Compare object markWord with an unlocked value (tmp) and if ++ // equal exchange the stack address of our box with object markWord. ++ // On failure disp_hdr contains the possibly locked markWord. ++ __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, ++ Assembler::rl, /*result*/disp_hdr); ++ __ mv(flag, zr); ++ __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas ++ ++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); ++ ++ // If the compare-and-exchange succeeded, then we found an unlocked ++ // object, will have now locked it will continue at label cont ++ // We did not see an unlocked object so try the fast recursive case. ++ ++ // Check if the owner is self by comparing the value in the ++ // markWord of object (disp_hdr) with the stack pointer. ++ __ sub(disp_hdr, disp_hdr, sp); ++ __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place)); ++ // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, ++ // hence we can store 0 as the displaced header in the box, which indicates that it is a ++ // recursive lock. ++ __ andr(tmp/*==0?*/, disp_hdr, tmp); ++ __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ __ mv(flag, tmp); // we can use the value of tmp as the result here ++ } else { ++ __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path ++ } + -+ ins_cost(STORE_COST); -+ format %{ "sh zr, $mem\t# short, #@storeimmC0" %} ++ __ j(cont); + -+ ins_encode %{ -+ __ sh(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ // Handle existing monitor. ++ __ bind(object_has_monitor); ++ // The object's monitor m is unlocked iff m->owner == NULL, ++ // otherwise m->owner may contain a thread or a stack address. ++ // ++ // Try to CAS m->owner from NULL to current thread. ++ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value)); ++ __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, ++ Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) ++ ++ // Store a non-null value into the box to avoid looking like a re-entrant ++ // lock. The fast-path monitor unlock code checks for ++ // markWord::monitor_value so use markWord::unused_mark which has the ++ // relevant bit set, and also matches ObjectSynchronizer::slow_enter. ++ __ mv(tmp, (address)markWord::unused_mark().value()); ++ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + -+ ins_pipe(istore_mem); -+%} ++ __ beqz(flag, cont); // CAS success means locking succeeded + -+// Store Integer -+instruct storeI(iRegIorL2I src, memory mem) -+%{ -+ match(Set mem(StoreI mem src)); ++ __ bne(flag, xthread, cont); // Check for recursive locking + -+ ins_cost(STORE_COST); -+ format %{ "sw $src, $mem\t# int, #@storeI" %} ++ // Recursive lock case ++ __ mv(flag, zr); ++ __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value)); ++ __ add(tmp, tmp, 1u); ++ __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value)); + -+ ins_encode %{ -+ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ bind(cont); + %} + -+ ins_pipe(istore_reg_mem); -+%} -+ -+instruct storeimmI0(immI0 zero, memory mem) -+%{ -+ match(Set mem(StoreI mem zero)); ++ // using cr flag to indicate the fast_unlock result: 0 for success; others failed. ++ enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Register flag = t1; ++ Register oop = as_Register($object$$reg); ++ Register box = as_Register($box$$reg); ++ Register disp_hdr = as_Register($tmp1$$reg); ++ Register tmp = as_Register($tmp2$$reg); ++ Label cont; ++ Label object_has_monitor; + -+ ins_cost(STORE_COST); -+ format %{ "sw zr, $mem\t# int, #@storeimmI0" %} ++ assert_different_registers(oop, box, tmp, disp_hdr, flag); + -+ ins_encode %{ -+ __ sw(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ if (!UseHeavyMonitors) { ++ // Find the lock address and load the displaced header from the stack. ++ __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); + -+ ins_pipe(istore_mem); -+%} ++ // If the displaced header is 0, we have a recursive unlock. ++ __ mv(flag, disp_hdr); ++ __ beqz(disp_hdr, cont); ++ } + -+// Store Long (64 bit signed) -+instruct storeL(iRegL src, memory mem) -+%{ -+ match(Set mem (StoreL mem src)); ++ // Handle existing monitor. ++ __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); ++ __ andi(t0, disp_hdr, markWord::monitor_value); ++ __ bnez(t0, object_has_monitor); ++ ++ if (!UseHeavyMonitors) { ++ // Check if it is still a light weight lock, this is true if we ++ // see the stack address of the basicLock in the markWord of the ++ // object. ++ ++ __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, ++ Assembler::rl, /*result*/tmp); ++ __ xorr(flag, box, tmp); // box == tmp if cas succeeds ++ } else { ++ __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path ++ } ++ __ j(cont); + -+ ins_cost(STORE_COST); -+ format %{ "sd $src, $mem\t# long, #@storeL" %} ++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + -+ ins_encode %{ -+ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ // Handle existing monitor. ++ __ bind(object_has_monitor); ++ STATIC_ASSERT(markWord::monitor_value <= INT_MAX); ++ __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor ++ __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); + -+ ins_pipe(istore_reg_mem); -+%} ++ Label notRecursive; ++ __ beqz(disp_hdr, notRecursive); // Will be 0 if not recursive. + -+// Store Long (64 bit signed) -+instruct storeimmL0(immL0 zero, memory mem) -+%{ -+ match(Set mem (StoreL mem zero)); ++ // Recursive lock ++ __ addi(disp_hdr, disp_hdr, -1); ++ __ sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); ++ __ mv(flag, zr); ++ __ j(cont); + -+ ins_cost(STORE_COST); -+ format %{ "sd zr, $mem\t# long, #@storeimmL0" %} ++ __ bind(notRecursive); ++ __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); ++ __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); ++ __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. ++ __ bnez(flag, cont); ++ // need a release store here ++ __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sd(zr, Address(tmp)); // set unowned + -+ ins_encode %{ -+ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); ++ __ bind(cont); + %} + -+ ins_pipe(istore_mem); -+%} -+ -+// Store Pointer -+instruct storeP(iRegP src, memory mem) -+%{ -+ match(Set mem (StoreP mem src)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sd $src, $mem\t# ptr, #@storeP" %} ++ // arithmetic encodings + -+ ins_encode %{ -+ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivl(dst_reg, src1_reg, src2_reg, false); + %} + -+ ins_pipe(istore_reg_mem); -+%} -+ -+// Store Pointer -+instruct storeimmP0(immP0 zero, memory mem) -+%{ -+ match(Set mem (StoreP mem zero)); ++ enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivq(dst_reg, src1_reg, src2_reg, false); ++ %} + -+ ins_cost(STORE_COST); -+ format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %} ++ enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivl(dst_reg, src1_reg, src2_reg, true); ++ %} + -+ ins_encode %{ -+ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); ++ enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivq(dst_reg, src1_reg, src2_reg, true); + %} + -+ ins_pipe(istore_mem); -+%} ++ enc_class riscv_enc_tail_call(iRegP jump_target) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ Register target_reg = as_Register($jump_target$$reg); ++ __ jr(target_reg); ++ %} + -+// Store Compressed Pointer -+instruct storeN(iRegN src, memory mem) -+%{ -+ match(Set mem (StoreN mem src)); ++ enc_class riscv_enc_tail_jmp(iRegP jump_target) %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ Register target_reg = as_Register($jump_target$$reg); ++ // exception oop should be in x10 ++ // ret addr has been popped into ra ++ // callee expects it in x13 ++ __ mv(x13, ra); ++ __ jr(target_reg); ++ %} + -+ ins_cost(STORE_COST); -+ format %{ "sw $src, $mem\t# compressed ptr, #@storeN" %} ++ enc_class riscv_enc_rethrow() %{ ++ C2_MacroAssembler _masm(&cbuf); ++ __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); ++ %} + -+ ins_encode %{ -+ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ enc_class riscv_enc_ret() %{ ++ C2_MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ret(); + %} + -+ ins_pipe(istore_reg_mem); +%} + -+instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) -+%{ -+ match(Set mem (StoreN mem zero)); -+ predicate(Universe::narrow_oop_base() == NULL && -+ Universe::narrow_klass_base() == NULL); -+ -+ ins_cost(STORE_COST); -+ format %{ "sw rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %} ++//----------FRAME-------------------------------------------------------------- ++// Definition of frame structure and management information. ++// ++// S T A C K L A Y O U T Allocators stack-slot number ++// | (to get allocators register number ++// G Owned by | | v add OptoReg::stack0()) ++// r CALLER | | ++// o | +--------+ pad to even-align allocators stack-slot ++// w V | pad0 | numbers; owned by CALLER ++// t -----------+--------+----> Matcher::_in_arg_limit, unaligned ++// h ^ | in | 5 ++// | | args | 4 Holes in incoming args owned by SELF ++// | | | | 3 ++// | | +--------+ ++// V | | old out| Empty on Intel, window on Sparc ++// | old |preserve| Must be even aligned. ++// | SP-+--------+----> Matcher::_old_SP, even aligned ++// | | in | 3 area for Intel ret address ++// Owned by |preserve| Empty on Sparc. ++// SELF +--------+ ++// | | pad2 | 2 pad to align old SP ++// | +--------+ 1 ++// | | locks | 0 ++// | +--------+----> OptoReg::stack0(), even aligned ++// | | pad1 | 11 pad to align new SP ++// | +--------+ ++// | | | 10 ++// | | spills | 9 spills ++// V | | 8 (pad0 slot for callee) ++// -----------+--------+----> Matcher::_out_arg_limit, unaligned ++// ^ | out | 7 ++// | | args | 6 Holes in outgoing args owned by CALLEE ++// Owned by +--------+ ++// CALLEE | new out| 6 Empty on Intel, window on Sparc ++// | new |preserve| Must be even-aligned. ++// | SP-+--------+----> Matcher::_new_SP, even aligned ++// | | | ++// ++// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is ++// known from SELF's arguments and the Java calling convention. ++// Region 6-7 is determined per call site. ++// Note 2: If the calling convention leaves holes in the incoming argument ++// area, those holes are owned by SELF. Holes in the outgoing area ++// are owned by the CALLEE. Holes should not be nessecary in the ++// incoming area, as the Java calling convention is completely under ++// the control of the AD file. Doubles can be sorted and packed to ++// avoid holes. Holes in the outgoing arguments may be nessecary for ++// varargs C calling conventions. ++// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is ++// even aligned with pad0 as needed. ++// Region 6 is even aligned. Region 6-7 is NOT even aligned; ++// (the latter is true on Intel but is it false on RISCV?) ++// region 6-11 is even aligned; it may be padded out more so that ++// the region from SP to FP meets the minimum stack alignment. ++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack ++// alignment. Region 11, pad1, may be dynamically extended so that ++// SP meets the minimum alignment. + -+ ins_encode %{ -+ __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++frame %{ ++ // These three registers define part of the calling convention ++ // between compiled code and the interpreter. + -+ ins_pipe(istore_reg_mem); -+%} ++ // Inline Cache Register or methodOop for I2C. ++ inline_cache_reg(R31); + -+// Store Float -+instruct storeF(fRegF src, memory mem) -+%{ -+ match(Set mem (StoreF mem src)); ++ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] ++ cisc_spilling_operand_name(indOffset); + -+ ins_cost(STORE_COST); -+ format %{ "fsw $src, $mem\t# float, #@storeF" %} ++ // Number of stack slots consumed by locking an object ++ // generate Compile::sync_stack_slots ++ // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2 ++ sync_stack_slots(1 * VMRegImpl::slots_per_word); + -+ ins_encode %{ -+ __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ // Compiled code's Frame Pointer ++ frame_pointer(R2); + -+ ins_pipe(fp_store_reg_s); -+%} ++ // Interpreter stores its frame pointer in a register which is ++ // stored to the stack by I2CAdaptors. ++ // I2CAdaptors convert from interpreted java to compiled java. ++ interpreter_frame_pointer(R8); + -+// Store Double -+instruct storeD(fRegD src, memory mem) -+%{ -+ match(Set mem (StoreD mem src)); ++ // Stack alignment requirement ++ stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) + -+ ins_cost(STORE_COST); -+ format %{ "fsd $src, $mem\t# double, #@storeD" %} ++ // Number of outgoing stack slots killed above the out_preserve_stack_slots ++ // for calls to C. Supports the var-args backing area for register parms. ++ varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt); + -+ ins_encode %{ -+ __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ // The after-PROLOG location of the return address. Location of ++ // return address specifies a type (REG or STACK) and a number ++ // representing the register number (i.e. - use a register name) or ++ // stack slot. ++ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. ++ // Otherwise, it is above the locks and verification slot and alignment word ++ // TODO this may well be correct but need to check why that - 2 is there ++ // ppc port uses 0 but we definitely need to allow for fixed_slots ++ // which folds in the space used for monitors ++ return_addr(STACK - 2 + ++ align_up((Compile::current()->in_preserve_stack_slots() + ++ Compile::current()->fixed_slots()), ++ stack_alignment_in_slots())); + -+ ins_pipe(fp_store_reg_d); -+%} ++ // Location of compiled Java return values. Same as C for now. ++ return_value ++ %{ ++ assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, ++ "only return normal values"); + -+// Store Compressed Klass Pointer -+instruct storeNKlass(iRegN src, memory mem) -+%{ -+ match(Set mem (StoreNKlass mem src)); ++ static const int lo[Op_RegL + 1] = { // enum name ++ 0, // Op_Node ++ 0, // Op_Set ++ R10_num, // Op_RegN ++ R10_num, // Op_RegI ++ R10_num, // Op_RegP ++ F10_num, // Op_RegF ++ F10_num, // Op_RegD ++ R10_num // Op_RegL ++ }; + -+ ins_cost(STORE_COST); -+ format %{ "sw $src, $mem\t# compressed klass ptr, #@storeNKlass" %} ++ static const int hi[Op_RegL + 1] = { // enum name ++ 0, // Op_Node ++ 0, // Op_Set ++ OptoReg::Bad, // Op_RegN ++ OptoReg::Bad, // Op_RegI ++ R10_H_num, // Op_RegP ++ OptoReg::Bad, // Op_RegF ++ F10_H_num, // Op_RegD ++ R10_H_num // Op_RegL ++ }; + -+ ins_encode %{ -+ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); + %} -+ -+ ins_pipe(istore_reg_mem); +%} + -+// ============================================================================ -+// Atomic operation instructions -+// -+// Intel and SPARC both implement Ideal Node LoadPLocked and -+// Store{PIL}Conditional instructions using a normal load for the -+// LoadPLocked and a CAS for the Store{PIL}Conditional. -+// -+// The ideal code appears only to use LoadPLocked/storePConditional as a -+// pair to lock object allocations from Eden space when not using -+// TLABs. -+// -+// There does not appear to be a Load{IL}Locked Ideal Node and the -+// Ideal code appears to use Store{IL}Conditional as an alias for CAS -+// and to use StoreIConditional only for 32-bit and StoreLConditional -+// only for 64-bit. -+// -+// We implement LoadPLocked and storePConditional instructions using, -+// respectively the RISCV hw load-reserve and store-conditional -+// instructions. Whereas we must implement each of -+// Store{IL}Conditional using a CAS which employs a pair of -+// instructions comprising a load-reserve followed by a -+// store-conditional. -+ ++//----------ATTRIBUTES--------------------------------------------------------- ++//----------Operand Attributes------------------------------------------------- ++op_attrib op_cost(1); // Required cost attribute + -+// Locked-load (load reserved) of the current heap-top -+// used when updating the eden heap top -+// implemented using lr_d on RISCV64 -+instruct loadPLocked(iRegPNoSp dst, indirect mem) -+%{ -+ match(Set dst (LoadPLocked mem)); ++//----------Instruction Attributes--------------------------------------------- ++ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute ++ins_attrib ins_size(32); // Required size attribute (in bits) ++ins_attrib ins_short_branch(0); // Required flag: is this instruction ++ // a non-matching short branch variant ++ // of some long branch? ++ins_attrib ins_alignment(4); // Required alignment attribute (must ++ // be a power of 2) specifies the ++ // alignment that some part of the ++ // instruction (not necessarily the ++ // start) requires. If > 1, a ++ // compute_padding() function must be ++ // provided for the instruction + -+ ins_cost(ALU_COST * 2 + LOAD_COST); ++//----------OPERANDS----------------------------------------------------------- ++// Operand definitions must precede instruction definitions for correct parsing ++// in the ADLC because operands constitute user defined types which are used in ++// instruction definitions. + -+ format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %} ++//----------Simple Operands---------------------------------------------------- + -+ ins_encode %{ -+ __ la(t0, Address(as_Register($mem$$base), $mem$$disp)); -+ __ lr_d($dst$$Register, t0, Assembler::aq); -+ %} ++// Integer operands 32 bit ++// 32 bit immediate ++operand immI() ++%{ ++ match(ConI); + -+ ins_pipe(pipe_serial); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+// Conditional-store of the updated heap-top. -+// Used during allocation of the shared heap. -+// implemented using sc_d on RISCV. -+instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) ++// 32 bit zero ++operand immI0() +%{ -+ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); -+ -+ ins_cost(ALU_COST * 2 + STORE_COST); ++ predicate(n->get_int() == 0); ++ match(ConI); + -+ format %{ -+ "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional" -+ %} ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ ins_encode %{ -+ __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp)); -+ __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl); -+ %} ++// 32 bit unit increment ++operand immI_1() ++%{ ++ predicate(n->get_int() == 1); ++ match(ConI); + -+ ins_pipe(pipe_serial); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+// storeLConditional is used by PhaseMacroExpand::expand_lock_node -+// when attempting to rebias a lock towards the current thread. -+instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) ++// 32 bit unit decrement ++operand immI_M1() +%{ -+ match(Set cr (StoreLConditional mem (Binary oldval newval))); ++ predicate(n->get_int() == -1); ++ match(ConI); + -+ ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ format %{ -+ "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" -+ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional" -+ %} ++// Unsigned Integer Immediate: 6-bit int, greater than 32 ++operand uimmI6_ge32() %{ ++ predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32)); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); -+ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); -+ %} ++operand immI_le_4() ++%{ ++ predicate(n->get_int() <= 4); ++ match(ConI); + -+ ins_pipe(pipe_slow); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+// storeIConditional also has acquire semantics, for no better reason -+// than matching storeLConditional. -+instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) ++operand immI_16() +%{ -+ match(Set cr (StoreIConditional mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2); ++ predicate(n->get_int() == 16); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ format %{ -+ "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" -+ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional" -+ %} ++operand immI_24() ++%{ ++ predicate(n->get_int() == 24); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); -+ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); -+ %} ++operand immI_31() ++%{ ++ predicate(n->get_int() == 31); ++ match(ConI); + -+ ins_pipe(pipe_slow); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+// standard CompareAndSwapX when we are using barriers -+// these have higher priority than the rules selected by a predicate -+instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++operand immI_63() +%{ -+ match(Set res (CompareAndSwapB mem (Binary oldval newval))); ++ predicate(n->get_int() == 63); ++ match(ConI); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++// 32 bit integer valid for add immediate ++operand immIAdd() ++%{ ++ predicate(Assembler::operand_valid_for_add_immediate((int64_t)n->get_int())); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB" -+ %} ++// 32 bit integer valid for sub immediate ++operand immISub() ++%{ ++ predicate(Assembler::operand_valid_for_add_immediate(-(int64_t)n->get_int())); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ %} ++// 5 bit signed value. ++operand immI5() ++%{ ++ predicate(n->get_int() <= 15 && n->get_int() >= -16); ++ match(ConI); + -+ ins_pipe(pipe_slow); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// 5 bit signed value (simm5) ++operand immL5() +%{ -+ match(Set res (CompareAndSwapS mem (Binary oldval newval))); ++ predicate(n->get_long() <= 15 && n->get_long() >= -16); ++ match(ConL); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++// Integer operands 64 bit ++// 64 bit immediate ++operand immL() ++%{ ++ match(ConL); + -+ format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS" -+ %} ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ %} ++// 64 bit zero ++operand immL0() ++%{ ++ predicate(n->get_long() == 0); ++ match(ConL); + -+ ins_pipe(pipe_slow); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval) ++// Pointer operands ++// Pointer Immediate ++operand immP() +%{ -+ match(Set res (CompareAndSwapI mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ match(ConP); + -+ format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI" -+ %} ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval)); ++// NULL Pointer Immediate ++operand immP0() ++%{ ++ predicate(n->get_ptr() == 0); ++ match(ConP); + -+ ins_pipe(pipe_slow); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval) ++// Pointer Immediate One ++// this is used in object initialization (initial object header) ++operand immP_1() +%{ -+ match(Set res (CompareAndSwapL mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ predicate(n->get_ptr() == 1); ++ match(ConP); + -+ format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL" -+ %} ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ++// Card Table Byte Map Base ++operand immByteMapBase() ++%{ ++ // Get base of card map ++ predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) && ++ (CardTable::CardValue*)n->get_ptr() == ++ ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); ++ match(ConP); + -+ ins_pipe(pipe_slow); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++// Int Immediate: low 16-bit mask ++operand immI_16bits() +%{ -+ match(Set res (CompareAndSwapP mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ predicate(n->get_int() == 0xFFFF); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP" -+ %} ++// Long Immediate: low 32-bit mask ++operand immL_32bits() ++%{ ++ predicate(n->get_long() == 0xFFFFFFFFL); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ++// 64 bit unit decrement ++operand immL_M1() ++%{ ++ predicate(n->get_long() == -1); ++ match(ConL); + -+ ins_pipe(pipe_slow); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval) -+%{ -+ match(Set res (CompareAndSwapN mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); ++// 32 bit offset of pc in thread anchor + -+ format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN" -+ %} -+ -+ ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval)); ++operand immL_pc_off() ++%{ ++ predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) + ++ in_bytes(JavaFrameAnchor::last_Java_pc_offset())); ++ match(ConL); + -+ ins_pipe(pipe_slow); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+// alternative CompareAndSwapX when we are eliding barriers -+instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// 64 bit integer valid for add immediate ++operand immLAdd() +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndSwapB mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); -+ -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); -+ -+ format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq" -+ %} -+ -+ ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ %} -+ -+ ins_pipe(pipe_slow); ++ predicate(Assembler::operand_valid_for_add_immediate(n->get_long())); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// 64 bit integer valid for sub immediate ++operand immLSub() +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndSwapS mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); -+ -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); -+ -+ format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq" -+ %} -+ -+ ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ %} -+ -+ ins_pipe(pipe_slow); ++ predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long()))); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval) ++// Narrow pointer operands ++// Narrow Pointer Immediate ++operand immN() +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndSwapI mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); -+ -+ format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq" -+ %} -+ -+ ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval)); ++ match(ConN); + -+ ins_pipe(pipe_slow); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval) ++// Narrow NULL Pointer Immediate ++operand immN0() +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndSwapL mem (Binary oldval newval))); ++ predicate(n->get_narrowcon() == 0); ++ match(ConN); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq" -+ %} ++operand immNKlass() ++%{ ++ match(ConNKlass); + -+ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ ins_pipe(pipe_slow); ++// Float and Double operands ++// Double Immediate ++operand immD() ++%{ ++ match(ConD); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++// Double Immediate: +0.0d ++operand immD0() +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(jlong_cast(n->getd()) == 0); ++ match(ConD); + -+ match(Set res (CompareAndSwapP mem (Binary oldval newval))); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++// Float Immediate ++operand immF() ++%{ ++ match(ConF); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq" -+ %} ++// Float Immediate: +0.0f. ++operand immF0() ++%{ ++ predicate(jint_cast(n->getf()) == 0); ++ match(ConF); + -+ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ ins_pipe(pipe_slow); ++operand immIOffset() ++%{ ++ predicate(is_imm_in_range(n->get_int(), 12, 0)); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval) ++operand immLOffset() +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(is_imm_in_range(n->get_long(), 12, 0)); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ match(Set res (CompareAndSwapN mem (Binary oldval newval))); ++// Scale values ++operand immIScale() ++%{ ++ predicate(1 <= n->get_int() && (n->get_int() <= 3)); ++ match(ConI); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq" -+ %} ++// Integer 32 bit Register Operands ++operand iRegI() ++%{ ++ constraint(ALLOC_IN_RC(any_reg32)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval)); ++// Integer 32 bit Register not Special ++operand iRegINoSp() ++%{ ++ constraint(ALLOC_IN_RC(no_special_reg32)); ++ match(RegI); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_pipe(pipe_slow); ++// Register R10 only ++operand iRegI_R10() ++%{ ++ constraint(ALLOC_IN_RC(int_r10_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Sundry CAS operations. Note that release is always true, -+// regardless of the memory ordering of the CAS. This is because we -+// need the volatile case to be sequentially consistent but there is -+// no trailing StoreLoad barrier emitted by C2. Unfortunately we -+// can't check the type of memory ordering here, so we always emit a -+// sc_d(w) with rl bit set. -+instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// Register R12 only ++operand iRegI_R12() +%{ -+ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); ++ constraint(ALLOC_IN_RC(int_r12_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); ++// Register R13 only ++operand iRegI_R13() ++%{ ++ constraint(ALLOC_IN_RC(int_r13_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++// Register R14 only ++operand iRegI_R14() ++%{ ++ constraint(ALLOC_IN_RC(int_r14_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB" -+ %} ++// Integer 64 bit Register Operands ++operand iRegL() ++%{ ++ constraint(ALLOC_IN_RC(any_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ %} ++// Integer 64 bit Register not Special ++operand iRegLNoSp() ++%{ ++ constraint(ALLOC_IN_RC(no_special_reg)); ++ match(RegL); ++ match(iRegL_R10); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_pipe(pipe_slow); ++// Long 64 bit Register R28 only ++operand iRegL_R28() ++%{ ++ constraint(ALLOC_IN_RC(r28_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// Long 64 bit Register R29 only ++operand iRegL_R29() +%{ -+ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); ++ constraint(ALLOC_IN_RC(r29_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); ++// Long 64 bit Register R30 only ++operand iRegL_R30() ++%{ ++ constraint(ALLOC_IN_RC(r30_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++// Pointer Register Operands ++// Pointer Register ++operand iRegP() ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ match(iRegP_R10); ++ match(javaThread_RegP); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS" -+ %} ++// Pointer 64 bit Register not Special ++operand iRegPNoSp() ++%{ ++ constraint(ALLOC_IN_RC(no_special_ptr_reg)); ++ match(RegP); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ %} ++operand iRegP_R10() ++%{ ++ constraint(ALLOC_IN_RC(r10_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_pipe(pipe_slow); ++// Pointer 64 bit Register R11 only ++operand iRegP_R11() ++%{ ++ constraint(ALLOC_IN_RC(r11_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++operand iRegP_R12() +%{ -+ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); ++ constraint(ALLOC_IN_RC(r12_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++// Pointer 64 bit Register R13 only ++operand iRegP_R13() ++%{ ++ constraint(ALLOC_IN_RC(r13_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ effect(TEMP_DEF res); ++operand iRegP_R14() ++%{ ++ constraint(ALLOC_IN_RC(r14_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI" -+ %} ++operand iRegP_R15() ++%{ ++ constraint(ALLOC_IN_RC(r15_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); -+ %} ++operand iRegP_R16() ++%{ ++ constraint(ALLOC_IN_RC(r16_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_pipe(pipe_slow); ++// Pointer 64 bit Register R28 only ++operand iRegP_R28() ++%{ ++ constraint(ALLOC_IN_RC(r28_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) ++// Pointer Register Operands ++// Narrow Pointer Register ++operand iRegN() +%{ -+ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); ++ constraint(ALLOC_IN_RC(any_reg32)); ++ match(RegN); ++ match(iRegNNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++// Integer 64 bit Register not Special ++operand iRegNNoSp() ++%{ ++ constraint(ALLOC_IN_RC(no_special_reg32)); ++ match(RegN); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ effect(TEMP_DEF res); ++// heap base register -- used for encoding immN0 ++operand iRegIHeapbase() ++%{ ++ constraint(ALLOC_IN_RC(heapbase_reg)); ++ match(RegI); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL" -+ %} ++// Long 64 bit Register R10 only ++operand iRegL_R10() ++%{ ++ constraint(ALLOC_IN_RC(r10_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); -+ %} ++// Float Register ++// Float register operands ++operand fRegF() ++%{ ++ constraint(ALLOC_IN_RC(float_reg)); ++ match(RegF); + -+ ins_pipe(pipe_slow); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) ++// Double Register ++// Double register operands ++operand fRegD() +%{ -+ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); ++ constraint(ALLOC_IN_RC(double_reg)); ++ match(RegD); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ effect(TEMP_DEF res); ++// Generic vector class. This will be used for ++// all vector operands. ++operand vReg() ++%{ ++ constraint(ALLOC_IN_RC(vectora_reg)); ++ match(VecA); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN" -+ %} ++operand vReg_V1() ++%{ ++ constraint(ALLOC_IN_RC(v1_reg)); ++ match(VecA); ++ match(vReg); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); -+ %} ++operand vReg_V2() ++%{ ++ constraint(ALLOC_IN_RC(v2_reg)); ++ match(VecA); ++ match(vReg); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_pipe(pipe_slow); ++operand vReg_V3() ++%{ ++ constraint(ALLOC_IN_RC(v3_reg)); ++ match(VecA); ++ match(vReg); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) ++operand vReg_V4() +%{ -+ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ constraint(ALLOC_IN_RC(v4_reg)); ++ match(VecA); ++ match(vReg); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++operand vReg_V5() ++%{ ++ constraint(ALLOC_IN_RC(v5_reg)); ++ match(VecA); ++ match(vReg); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ effect(TEMP_DEF res); ++// Java Thread Register ++operand javaThread_RegP(iRegP reg) ++%{ ++ constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg ++ match(reg); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP" ++//----------Memory Operands---------------------------------------------------- ++// RISCV has only base_plus_offset and literal address mode, so no need to use ++// index and scale. Here set index as 0xffffffff and scale as 0x0. ++operand indirect(iRegP reg) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(reg); ++ op_cost(0); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp(0x0); + %} ++%} + -+ ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++operand indOffI(iRegP reg, immIOffset off) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); + %} -+ -+ ins_pipe(pipe_slow); +%} + -+instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++operand indOffL(iRegP reg, immLOffset off) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); -+ -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); -+ -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq" ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); + %} ++%} + -+ ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++operand indirectN(iRegN reg) ++%{ ++ predicate(CompressedOops::shift() == 0); ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(DecodeN reg); ++ op_cost(0); ++ format %{ "[$reg]\t# narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp(0x0); + %} -+ -+ ins_pipe(pipe_slow); +%} + -+instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++operand indOffIN(iRegN reg, immIOffset off) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); -+ -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); -+ -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq" ++ predicate(CompressedOops::shift() == 0); ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP (DecodeN reg) off); ++ op_cost(0); ++ format %{ "[$reg, $off]\t# narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); + %} ++%} + -+ ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++operand indOffLN(iRegN reg, immLOffset off) ++%{ ++ predicate(CompressedOops::shift() == 0); ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP (DecodeN reg) off); ++ op_cost(0); ++ format %{ "[$reg, $off]\t# narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); + %} -+ -+ ins_pipe(pipe_slow); +%} + -+instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++// RISCV opto stubs need to write to the pc slot in the thread anchor ++operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); -+ -+ effect(TEMP_DEF res); -+ -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq" ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); + %} ++%} + -+ ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); -+ %} + -+ ins_pipe(pipe_slow); ++//----------Special Memory Operands-------------------------------------------- ++// Stack Slot Operand - This operand is used for loading and storing temporary ++// values on the stack where a match requires a value to ++// flow through memory. ++operand stackSlotI(sRegI reg) ++%{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegI); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} +%} + -+instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) ++operand stackSlotF(sRegF reg) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); -+ -+ effect(TEMP_DEF res); -+ -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq" ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegF); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset + %} ++%} + -+ ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++operand stackSlotD(sRegD reg) ++%{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegD); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset + %} -+ -+ ins_pipe(pipe_slow); +%} + -+instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) ++operand stackSlotL(sRegL reg) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegL); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} + -+ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); ++// Special operand allowing long args to int ops to be truncated for free + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++operand iRegL2I(iRegL reg) %{ + -+ effect(TEMP_DEF res); ++ op_cost(0); + -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq" -+ %} ++ match(ConvL2I reg); + -+ ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); -+ %} ++ format %{ "l2i($reg)" %} + -+ ins_pipe(pipe_slow); ++ interface(REG_INTER) +%} + -+instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) -+%{ -+ predicate(needs_acquiring_load_exclusive(n)); + -+ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++// Comparison Operands ++// NOTE: Label is a predefined operand which should not be redefined in ++// the AD file. It is generically handled within the ADLC. + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++//----------Conditional Branch Operands---------------------------------------- ++// Comparison Op - This is the operation of the comparison, and is limited to ++// the following set of codes: ++// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) ++// ++// Other attributes of the comparison, such as unsignedness, are specified ++// by the comparison instruction that sets a condition code flags register. ++// That result is represented by a flags operand whose subtype is appropriate ++// to the unsignedness (etc.) of the comparison. ++// ++// Later, the instruction which matches both the Comparison Op (a Bool) and ++// the flags (produced by the Cmp) specifies the coding of the comparison op ++// by matching a specific subtype of Bool operand below, such as cmpOpU. + -+ effect(TEMP_DEF res); + -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq" -+ %} ++// used for signed integral comparisons and fp comparisons ++operand cmpOp() ++%{ ++ match(Bool); + -+ ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); -+ %} ++ format %{ "" %} + -+ ins_pipe(pipe_slow); ++ // the values in interface derives from struct BoolTest::mask ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gt"); ++ overflow(0x2, "overflow"); ++ less(0x3, "lt"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "le"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "ge"); ++ %} +%} + -+instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// used for unsigned integral comparisons ++operand cmpOpU() +%{ -+ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); -+ -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); -+ -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapB" -+ %} ++ match(Bool); + -+ ins_encode %{ -+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ format %{ "" %} ++ // the values in interface derives from struct BoolTest::mask ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gtu"); ++ overflow(0x2, "overflow"); ++ less(0x3, "ltu"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "leu"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "geu"); + %} -+ -+ ins_pipe(pipe_slow); +%} + -+instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// used for certain integral comparisons which can be ++// converted to bxx instructions ++operand cmpOpEqNe() +%{ -+ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); -+ -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); -+ -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapS" -+ %} ++ match(Bool); ++ op_cost(0); ++ predicate(n->as_Bool()->_test._test == BoolTest::ne || ++ n->as_Bool()->_test._test == BoolTest::eq); + -+ ins_encode %{ -+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gt"); ++ overflow(0x2, "overflow"); ++ less(0x3, "lt"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "le"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "ge"); + %} -+ -+ ins_pipe(pipe_slow); +%} + -+instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++operand cmpOpULtGe() +%{ -+ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); -+ -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapI" -+ %} ++ match(Bool); ++ op_cost(0); ++ predicate(n->as_Bool()->_test._test == BoolTest::lt || ++ n->as_Bool()->_test._test == BoolTest::ge); + -+ ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gt"); ++ overflow(0x2, "overflow"); ++ less(0x3, "lt"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "le"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "ge"); + %} -+ -+ ins_pipe(pipe_slow); +%} + -+instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) ++operand cmpOpUEqNeLeGt() +%{ -+ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); -+ -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapL" -+ %} ++ match(Bool); ++ op_cost(0); ++ predicate(n->as_Bool()->_test._test == BoolTest::ne || ++ n->as_Bool()->_test._test == BoolTest::eq || ++ n->as_Bool()->_test._test == BoolTest::le || ++ n->as_Bool()->_test._test == BoolTest::gt); + -+ ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gt"); ++ overflow(0x2, "overflow"); ++ less(0x3, "lt"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "le"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "ge"); + %} -+ -+ ins_pipe(pipe_slow); +%} + -+instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) -+%{ -+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); ++// Flags register, used as output of compare logic ++operand rFlagsReg() ++%{ ++ constraint(ALLOC_IN_RC(reg_flags)); ++ match(RegFlags); + -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapN" -+ %} ++ op_cost(0); ++ format %{ "RFLAGS" %} ++ interface(REG_INTER); ++%} + -+ ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); -+ %} ++// Special Registers + -+ ins_pipe(pipe_slow); ++// Method Register ++operand inline_cache_RegP(iRegP reg) ++%{ ++ constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg ++ match(reg); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) -+%{ -+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++//----------OPERAND CLASSES---------------------------------------------------- ++// Operand Classes are groups of operands that are used as to simplify ++// instruction definitions by not requiring the AD writer to specify ++// separate instructions for every form of operand when the ++// instruction accepts multiple operand types with the same basic ++// encoding and format. The classic case of this is memory operands. + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++// memory is used to define read/write location for load/store ++// instruction defs. we can turn a memory op into an Address + -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapP" -+ %} ++opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN); + -+ ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); -+ %} -+ -+ ins_pipe(pipe_slow); -+%} -+ -+instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) -+%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++// iRegIorL2I is used for src inputs in rules for 32 bit int (I) ++// operations. it allows the src to be either an iRegI or a (ConvL2I ++// iRegL). in the latter case the l2i normally planted for a ConvL2I ++// can be elided because the 32-bit instruction will just employ the ++// lower 32 bits anyway. ++// ++// n.b. this does not elide all L2I conversions. if the truncated ++// value is consumed by more than one operation then the ConvL2I ++// cannot be bundled into the consuming nodes so an l2i gets planted ++// (actually a mvw $dst $src) and the downstream instructions consume ++// the result of the l2i as an iRegI input. That's a shame since the ++// mvw is actually redundant but its not too costly. + -+ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); ++opclass iRegIorL2I(iRegI, iRegL2I); ++opclass iRegIorL(iRegI, iRegL); ++opclass iRegNorP(iRegN, iRegP); ++opclass iRegILNP(iRegI, iRegL, iRegN, iRegP); ++opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp); ++opclass immIorL(immI, immL); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); ++//----------PIPELINE----------------------------------------------------------- ++// Rules which define the behavior of the target architectures pipeline. + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline ++//pipe_desc(ID, EX, MEM, WR); ++#define ID S0 ++#define EX S1 ++#define MEM S2 ++#define WR S3 + -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapBAcq" -+ %} ++// Integer ALU reg operation ++pipeline %{ + -+ ins_encode %{ -+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ %} ++attributes %{ ++ // RISC-V instructions are of fixed length ++ fixed_size_instructions; // Fixed size instructions TODO does ++ max_instructions_per_bundle = 2; // Generic RISC-V 1, Sifive Series 7 2 ++ // RISC-V instructions come in 32-bit word units ++ instruction_unit_size = 4; // An instruction is 4 bytes long ++ instruction_fetch_unit_size = 64; // The processor fetches one line ++ instruction_fetch_units = 1; // of 64 bytes + -+ ins_pipe(pipe_slow); ++ // List of nop instructions ++ nops( MachNop ); +%} + -+instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) -+%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++// We don't use an actual pipeline model so don't care about resources ++// or description. we do use pipeline classes to introduce fixed ++// latencies + -+ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); ++//----------RESOURCES---------------------------------------------------------- ++// Resources are the functional units available to the machine + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); ++// Generic RISC-V pipeline ++// 1 decoder ++// 1 instruction decoded per cycle ++// 1 load/store ops per cycle, 1 branch, 1 FPU ++// 1 mul, 1 div + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++resources ( DECODE, ++ ALU, ++ MUL, ++ DIV, ++ BRANCH, ++ LDST, ++ FPU); + -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapSAcq" -+ %} ++//----------PIPELINE DESCRIPTION----------------------------------------------- ++// Pipeline Description specifies the stages in the machine's pipeline + -+ ins_encode %{ -+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ %} ++// Define the pipeline as a generic 6 stage pipeline ++pipe_desc(S0, S1, S2, S3, S4, S5); + -+ ins_pipe(pipe_slow); -+%} ++//----------PIPELINE CLASSES--------------------------------------------------- ++// Pipeline Classes describe the stages in which input and output are ++// referenced by the hardware pipeline. + -+instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); -+ -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapIAcq" -+ %} -+ -+ ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); -+ %} -+ -+ ins_pipe(pipe_slow); ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) ++pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); -+ -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapLAcq" -+ %} -+ -+ ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); -+ %} -+ -+ ins_pipe(pipe_slow); ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) ++pipe_class fp_uop_s(fRegF dst, fRegF src) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); -+ -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapNAcq" -+ %} -+ -+ ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); -+ %} -+ -+ ins_pipe(pipe_slow); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++pipe_class fp_uop_d(fRegD dst, fRegD src) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); -+ -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapPAcq" -+ %} -+ -+ ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); -+ %} -+ -+ ins_pipe(pipe_slow); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) ++pipe_class fp_d2f(fRegF dst, fRegD src) +%{ -+ match(Set prev (GetAndSetI mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchgw $prev, $newv, [$mem]\t#@get_and_setI" %} -+ -+ ins_encode %{ -+ __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) ++pipe_class fp_f2d(fRegD dst, fRegF src) +%{ -+ match(Set prev (GetAndSetL mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setL" %} -+ -+ ins_encode %{ -+ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) ++pipe_class fp_f2i(iRegINoSp dst, fRegF src) +%{ -+ match(Set prev (GetAndSetN mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %} -+ -+ ins_encode %{ -+ __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) ++pipe_class fp_f2l(iRegLNoSp dst, fRegF src) +%{ -+ match(Set prev (GetAndSetP mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setP" %} -+ -+ ins_encode %{ -+ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) ++pipe_class fp_i2f(fRegF dst, iRegIorL2I src) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set prev (GetAndSetI mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchgw_acq $prev, $newv, [$mem]\t#@get_and_setIAcq" %} -+ -+ ins_encode %{ -+ __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) ++pipe_class fp_l2f(fRegF dst, iRegL src) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set prev (GetAndSetL mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setLAcq" %} -+ -+ ins_encode %{ -+ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) ++pipe_class fp_d2i(iRegINoSp dst, fRegD src) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set prev (GetAndSetN mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %} -+ -+ ins_encode %{ -+ __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) ++pipe_class fp_d2l(iRegLNoSp dst, fRegD src) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set prev (GetAndSetP mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setPAcq" %} -+ -+ ins_encode %{ -+ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) ++pipe_class fp_i2d(fRegD dst, iRegIorL2I src) +%{ -+ match(Set newval (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %} -+ -+ ins_encode %{ -+ __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) ++pipe_class fp_l2d(fRegD dst, iRegIorL2I src) +%{ -+ predicate(n->as_LoadStore()->result_not_used()); -+ -+ match(Set dummy (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %} -+ -+ ins_encode %{ -+ __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr) ++pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2) +%{ -+ match(Set newval (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %} -+ -+ ins_encode %{ -+ __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr) ++pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2) +%{ -+ predicate(n->as_LoadStore()->result_not_used()); -+ -+ match(Set dummy (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %} -+ -+ ins_encode %{ -+ __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) ++pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2) +%{ -+ match(Set newval (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %} -+ -+ ins_encode %{ -+ __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) ++pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2) +%{ -+ predicate(n->as_LoadStore()->result_not_used()); -+ -+ match(Set dummy (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %} -+ -+ ins_encode %{ -+ __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr) ++pipe_class fp_load_constant_s(fRegF dst) +%{ -+ match(Set newval (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %} -+ -+ ins_encode %{ -+ __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ single_instruction; ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr) ++pipe_class fp_load_constant_d(fRegD dst) +%{ -+ predicate(n->as_LoadStore()->result_not_used()); -+ -+ match(Set dummy (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %} ++ single_instruction; ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} + -+ ins_encode %{ -+ __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base)); -+ %} ++pipe_class fp_load_mem_s(fRegF dst, memory mem) ++%{ ++ single_instruction; ++ mem : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ ins_pipe(pipe_serial); ++pipe_class fp_load_mem_d(fRegD dst, memory mem) ++%{ ++ single_instruction; ++ mem : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ LDST : MEM; +%} + -+instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) ++pipe_class fp_store_reg_s(fRegF src, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ single_instruction; ++ src : S1(read); ++ mem : S5(write); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ match(Set newval (GetAndAddL mem incr)); ++pipe_class fp_store_reg_d(fRegD src, memory mem) ++%{ ++ single_instruction; ++ src : S1(read); ++ mem : S5(write); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ ins_cost(ALU_COST); ++//------- Integer ALU operations -------------------------- + -+ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %} ++// Integer ALU reg-reg operation ++// Operands needs in ID, result generated in EX ++// E.g. ADD Rd, Rs1, Rs2 ++pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ ALU : EX; ++%} + -+ ins_encode %{ -+ __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base)); -+ %} ++// Integer ALU reg operation with constant shift ++// E.g. SLLI Rd, Rs1, #shift ++pipe_class ialu_reg_shift(iRegI dst, iRegI src1) ++%{ ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ DECODE : ID; ++ ALU : EX; ++%} + -+ ins_pipe(pipe_serial); ++// Integer ALU reg-reg operation with variable shift ++// both operands must be available in ID ++// E.g. SLL Rd, Rs1, Rs2 ++pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ ALU : EX; +%} + -+instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); ++// Integer ALU reg operation ++// E.g. NEG Rd, Rs2 ++pipe_class ialu_reg(iRegI dst, iRegI src) ++%{ ++ single_instruction; ++ dst : EX(write); ++ src : ID(read); ++ DECODE : ID; ++ ALU : EX; ++%} + -+ match(Set dummy (GetAndAddL mem incr)); ++// Integer ALU reg immediate operation ++// E.g. ADDI Rd, Rs1, #imm ++pipe_class ialu_reg_imm(iRegI dst, iRegI src1) ++%{ ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ DECODE : ID; ++ ALU : EX; ++%} + -+ ins_cost(ALU_COST); ++// Integer ALU immediate operation (no source operands) ++// E.g. LI Rd, #imm ++pipe_class ialu_imm(iRegI dst) ++%{ ++ single_instruction; ++ dst : EX(write); ++ DECODE : ID; ++ ALU : EX; ++%} + -+ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %} ++//------- Multiply pipeline operations -------------------- + -+ ins_encode %{ -+ __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base)); -+ %} ++// Multiply reg-reg ++// E.g. MULW Rd, Rs1, Rs2 ++pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ MUL : WR; ++%} + -+ ins_pipe(pipe_serial); ++// E.g. MUL RD, Rs1, Rs2 ++pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ fixed_latency(3); // Maximum latency for 64 bit mul ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ MUL : WR; +%} + -+instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr) ++//------- Divide pipeline operations -------------------- ++ ++// E.g. DIVW Rd, Rs1, Rs2 ++pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ single_instruction; ++ fixed_latency(8); // Maximum latency for 32 bit divide ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ DIV : WR; ++%} + -+ match(Set newval (GetAndAddL mem incr)); ++// E.g. DIV RD, Rs1, Rs2 ++pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ fixed_latency(16); // Maximum latency for 64 bit divide ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ DIV : WR; ++%} + -+ ins_cost(ALU_COST); ++//------- Load pipeline operations ------------------------ + -+ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %} ++// Load - reg, mem ++// E.g. LA Rd, mem ++pipe_class iload_reg_mem(iRegI dst, memory mem) ++%{ ++ single_instruction; ++ dst : WR(write); ++ mem : ID(read); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ ins_encode %{ -+ __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base)); -+ %} ++// Load - reg, reg ++// E.g. LD Rd, Rs ++pipe_class iload_reg_reg(iRegI dst, iRegI src) ++%{ ++ single_instruction; ++ dst : WR(write); ++ src : ID(read); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ ins_pipe(pipe_serial); ++//------- Store pipeline operations ----------------------- ++ ++// Store - zr, mem ++// E.g. SD zr, mem ++pipe_class istore_mem(memory mem) ++%{ ++ single_instruction; ++ mem : ID(read); ++ DECODE : ID; ++ LDST : MEM; +%} + -+instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr) ++// Store - reg, mem ++// E.g. SD Rs, mem ++pipe_class istore_reg_mem(iRegI src, memory mem) +%{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); ++ single_instruction; ++ mem : ID(read); ++ src : EX(read); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ match(Set dummy (GetAndAddL mem incr)); ++// Store - reg, reg ++// E.g. SD Rs2, Rs1 ++pipe_class istore_reg_reg(iRegI dst, iRegI src) ++%{ ++ single_instruction; ++ dst : ID(read); ++ src : EX(read); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ ins_cost(ALU_COST); ++//------- Store pipeline operations ----------------------- + -+ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %} ++// Branch ++pipe_class pipe_branch() ++%{ ++ single_instruction; ++ DECODE : ID; ++ BRANCH : EX; ++%} + -+ ins_encode %{ -+ __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base)); -+ %} ++// Branch ++pipe_class pipe_branch_reg(iRegI src) ++%{ ++ single_instruction; ++ src : ID(read); ++ DECODE : ID; ++ BRANCH : EX; ++%} + -+ ins_pipe(pipe_serial); ++// Compare & Branch ++// E.g. BEQ Rs1, Rs2, L ++pipe_class pipe_cmp_branch(iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ BRANCH : EX; +%} + -+instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) ++// E.g. BEQZ Rs, L ++pipe_class pipe_cmpz_branch(iRegI src) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ single_instruction; ++ src : ID(read); ++ DECODE : ID; ++ BRANCH : EX; ++%} + -+ match(Set newval (GetAndAddI mem incr)); ++//------- Synchronisation operations ---------------------- ++// Any operation requiring serialization ++// E.g. FENCE/Atomic Ops/Load Acquire/Store Release ++pipe_class pipe_serial() ++%{ ++ single_instruction; ++ force_serialization; ++ fixed_latency(16); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ ins_cost(ALU_COST); ++pipe_class pipe_slow() ++%{ ++ instruction_count(10); ++ multiple_bundles; ++ force_serialization; ++ fixed_latency(16); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %} ++// Empty pipeline class ++pipe_class pipe_class_empty() ++%{ ++ single_instruction; ++ fixed_latency(0); ++%} + -+ ins_encode %{ -+ __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base)); -+ %} ++// Default pipeline class. ++pipe_class pipe_class_default() ++%{ ++ single_instruction; ++ fixed_latency(2); ++%} + -+ ins_pipe(pipe_serial); ++// Pipeline class for compares. ++pipe_class pipe_class_compare() ++%{ ++ single_instruction; ++ fixed_latency(16); +%} + -+instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) ++// Pipeline class for memory operations. ++pipe_class pipe_class_memory() +%{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); ++ single_instruction; ++ fixed_latency(16); ++%} + -+ match(Set dummy (GetAndAddI mem incr)); ++// Pipeline class for call. ++pipe_class pipe_class_call() ++%{ ++ single_instruction; ++ fixed_latency(100); ++%} + -+ ins_cost(ALU_COST); ++// Define the class for the Nop node. ++define %{ ++ MachNop = pipe_class_empty; ++%} ++%} ++//----------INSTRUCTIONS------------------------------------------------------- ++// ++// match -- States which machine-independent subtree may be replaced ++// by this instruction. ++// ins_cost -- The estimated cost of this instruction is used by instruction ++// selection to identify a minimum cost tree of machine ++// instructions that matches a tree of machine-independent ++// instructions. ++// format -- A string providing the disassembly for this instruction. ++// The value of an instruction's operand may be inserted ++// by referring to it with a '$' prefix. ++// opcode -- Three instruction opcodes may be provided. These are referred ++// to within an encode class as $primary, $secondary, and $tertiary ++// rrspectively. The primary opcode is commonly used to ++// indicate the type of machine instruction, while secondary ++// and tertiary are often used for prefix options or addressing ++// modes. ++// ins_encode -- A list of encode classes with parameters. The encode class ++// name must have been defined in an 'enc_class' specification ++// in the encode section of the architecture description. + -+ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %} ++// ============================================================================ ++// Memory (Load/Store) Instructions ++ ++// Load Instructions ++ ++// Load Byte (8 bit signed) ++instruct loadB(iRegINoSp dst, memory mem) ++%{ ++ match(Set dst (LoadB mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lb $dst, $mem\t# byte, #@loadB" %} + + ins_encode %{ -+ __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base)); ++ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(iload_reg_mem); +%} + -+instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr) ++// Load Byte (8 bit signed) into long ++instruct loadB2L(iRegLNoSp dst, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set newval (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); ++ match(Set dst (ConvI2L (LoadB mem))); + -+ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %} ++ ins_cost(LOAD_COST); ++ format %{ "lb $dst, $mem\t# byte, #@loadB2L" %} + + ins_encode %{ -+ __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base)); ++ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(iload_reg_mem); +%} + -+instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr) ++// Load Byte (8 bit unsigned) ++instruct loadUB(iRegINoSp dst, memory mem) +%{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); -+ -+ match(Set dummy (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); ++ match(Set dst (LoadUB mem)); + -+ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %} ++ ins_cost(LOAD_COST); ++ format %{ "lbu $dst, $mem\t# byte, #@loadUB" %} + + ins_encode %{ -+ __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base)); ++ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(iload_reg_mem); +%} + -+// ============================================================================ -+// Arithmetic Instructions -+// -+ -+// Integer Addition -+ -+// TODO -+// these currently employ operations which do not set CR and hence are -+// not flagged as killing CR but we would like to isolate the cases -+// where we want to set flags from those where we don't. need to work -+// out how to do that. -+instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (AddI src1 src2)); ++// Load Byte (8 bit unsigned) into long ++instruct loadUB2L(iRegLNoSp dst, memory mem) ++%{ ++ match(Set dst (ConvI2L (LoadUB mem))); + -+ ins_cost(ALU_COST); -+ format %{ "addw $dst, $src1, $src2\t#@addI_reg_reg" %} ++ ins_cost(LOAD_COST); ++ format %{ "lbu $dst, $mem\t# byte, #@loadUB2L" %} + + ins_encode %{ -+ __ addw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(iload_reg_mem); +%} + -+instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{ -+ match(Set dst (AddI src1 src2)); ++// Load Short (16 bit signed) ++instruct loadS(iRegINoSp dst, memory mem) ++%{ ++ match(Set dst (LoadS mem)); + -+ ins_cost(ALU_COST); -+ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm" %} ++ ins_cost(LOAD_COST); ++ format %{ "lh $dst, $mem\t# short, #@loadS" %} + + ins_encode %{ -+ int32_t con = (int32_t)$src2$$constant; -+ __ addiw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ $src2$$constant); ++ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_pipe(iload_reg_mem); +%} + -+instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{ -+ match(Set dst (AddI (ConvL2I src1) src2)); ++// Load Short (16 bit signed) into long ++instruct loadS2L(iRegLNoSp dst, memory mem) ++%{ ++ match(Set dst (ConvI2L (LoadS mem))); + -+ ins_cost(ALU_COST); -+ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm_l2i" %} ++ ins_cost(LOAD_COST); ++ format %{ "lh $dst, $mem\t# short, #@loadS2L" %} + + ins_encode %{ -+ __ addiw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ $src2$$constant); ++ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_pipe(iload_reg_mem); +%} + -+// Pointer Addition -+instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{ -+ match(Set dst (AddP src1 src2)); ++// Load Char (16 bit unsigned) ++instruct loadUS(iRegINoSp dst, memory mem) ++%{ ++ match(Set dst (LoadUS mem)); + -+ ins_cost(ALU_COST); -+ format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %} ++ ins_cost(LOAD_COST); ++ format %{ "lhu $dst, $mem\t# short, #@loadUS" %} + + ins_encode %{ -+ __ add(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(iload_reg_mem); +%} + -+// If we shift more than 32 bits, we need not convert I2L. -+instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{ -+ match(Set dst (LShiftL (ConvI2L src) scale)); -+ ins_cost(ALU_COST); -+ format %{ "slli $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %} ++// Load Short/Char (16 bit unsigned) into long ++instruct loadUS2L(iRegLNoSp dst, memory mem) ++%{ ++ match(Set dst (ConvI2L (LoadUS mem))); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lhu $dst, $mem\t# short, #@loadUS2L" %} + + ins_encode %{ -+ __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63); ++ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(iload_reg_mem); +%} + -+// Pointer Immediate Addition -+// n.b. this needs to be more expensive than using an indirect memory -+// operand -+instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{ -+ match(Set dst (AddP src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "addi $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %} ++// Load Integer (32 bit signed) ++instruct loadI(iRegINoSp dst, memory mem) ++%{ ++ match(Set dst (LoadI mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lw $dst, $mem\t# int, #@loadI" %} + + ins_encode %{ -+ // src2 is imm, so actually call the addi -+ __ add(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ $src2$$constant); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_pipe(iload_reg_mem); +%} + -+// Long Addition -+instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (AddL src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "add $dst, $src1, $src2\t#@addL_reg_reg" %} ++// Load Integer (32 bit signed) into long ++instruct loadI2L(iRegLNoSp dst, memory mem) ++%{ ++ match(Set dst (ConvI2L (LoadI mem))); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lw $dst, $mem\t# int, #@loadI2L" %} + + ins_encode %{ -+ __ add(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(iload_reg_mem); +%} + -+// No constant pool entries requiredLong Immediate Addition. -+instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ -+ match(Set dst (AddL src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "addi $dst, $src1, $src2\t#@addL_reg_imm" %} ++// Load Integer (32 bit unsigned) into long ++instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) ++%{ ++ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# int, #@loadUI2L" %} + + ins_encode %{ -+ // src2 is imm, so actually call the addi -+ __ add(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ $src2$$constant); ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_pipe(iload_reg_mem); +%} + -+// Integer Subtraction -+instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (SubI src1 src2)); ++// Load Long (64 bit signed) ++instruct loadL(iRegLNoSp dst, memory mem) ++%{ ++ match(Set dst (LoadL mem)); + -+ ins_cost(ALU_COST); -+ format %{ "subw $dst, $src1, $src2\t#@subI_reg_reg" %} ++ ins_cost(LOAD_COST); ++ format %{ "ld $dst, $mem\t# int, #@loadL" %} + + ins_encode %{ -+ __ subw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(iload_reg_mem); +%} + -+// Immediate Subtraction -+instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{ -+ match(Set dst (SubI src1 src2)); ++// Load Range ++instruct loadRange(iRegINoSp dst, memory mem) ++%{ ++ match(Set dst (LoadRange mem)); + -+ ins_cost(ALU_COST); -+ format %{ "addiw $dst, $src1, -$src2\t#@subI_reg_imm" %} ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# range, #@loadRange" %} + + ins_encode %{ -+ // src2 is imm, so actually call the addiw -+ __ subw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ $src2$$constant); ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_pipe(iload_reg_mem); +%} + -+// Long Subtraction -+instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (SubL src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "sub $dst, $src1, $src2\t#@subL_reg_reg" %} ++// Load Pointer ++instruct loadP(iRegPNoSp dst, memory mem) ++%{ ++ match(Set dst (LoadP mem)); ++ predicate(n->as_Load()->barrier_data() == 0); ++ ++ ins_cost(LOAD_COST); ++ format %{ "ld $dst, $mem\t# ptr, #@loadP" %} + + ins_encode %{ -+ __ sub(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(iload_reg_mem); +%} + -+// No constant pool entries requiredLong Immediate Subtraction. -+instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{ -+ match(Set dst (SubL src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "addi $dst, $src1, -$src2\t#@subL_reg_imm" %} ++// Load Compressed Pointer ++instruct loadN(iRegNNoSp dst, memory mem) ++%{ ++ match(Set dst (LoadN mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# loadN, compressed ptr, #@loadN" %} + + ins_encode %{ -+ // src2 is imm, so actually call the addi -+ __ sub(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ $src2$$constant); ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_pipe(iload_reg_mem); +%} + -+// Integer Negation (special case for sub) ++// Load Klass Pointer ++instruct loadKlass(iRegPNoSp dst, memory mem) ++%{ ++ match(Set dst (LoadKlass mem)); + -+instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero) %{ -+ match(Set dst (SubI zero src)); -+ ins_cost(ALU_COST); -+ format %{ "subw $dst, x0, $src\t# int, #@negI_reg" %} ++ ins_cost(LOAD_COST); ++ format %{ "ld $dst, $mem\t# class, #@loadKlass" %} + + ins_encode %{ -+ // actually call the subw -+ __ negw(as_Register($dst$$reg), -+ as_Register($src$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(iload_reg_mem); +%} + -+// Long Negation ++// Load Narrow Klass Pointer ++instruct loadNKlass(iRegNNoSp dst, memory mem) ++%{ ++ match(Set dst (LoadNKlass mem)); + -+instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero) %{ -+ match(Set dst (SubL zero src)); -+ ins_cost(ALU_COST); -+ format %{ "sub $dst, x0, $src\t# long, #@negL_reg" %} ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %} + + ins_encode %{ -+ // actually call the sub -+ __ neg(as_Register($dst$$reg), -+ as_Register($src$$reg)); ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(iload_reg_mem); +%} + -+// Integer Multiply ++// Load Float ++instruct loadF(fRegF dst, memory mem) ++%{ ++ match(Set dst (LoadF mem)); + -+instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (MulI src1 src2)); -+ ins_cost(IMUL_COST); -+ format %{ "mulw $dst, $src1, $src2\t#@mulI" %} ++ ins_cost(LOAD_COST); ++ format %{ "flw $dst, $mem\t# float, #@loadF" %} + -+ //this means 2 word multi, and no sign extend to 64 bits + ins_encode %{ -+ // riscv64 mulw will sign-extension to high 32 bits in dst reg -+ __ mulw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(imul_reg_reg); ++ ins_pipe(fp_load_mem_s); +%} + -+// Long Multiply ++// Load Double ++instruct loadD(fRegD dst, memory mem) ++%{ ++ match(Set dst (LoadD mem)); + -+instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (MulL src1 src2)); -+ ins_cost(IMUL_COST); -+ format %{ "mul $dst, $src1, $src2\t#@mulL" %} ++ ins_cost(LOAD_COST); ++ format %{ "fld $dst, $mem\t# double, #@loadD" %} + + ins_encode %{ -+ __ mul(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(lmul_reg_reg); ++ ins_pipe(fp_load_mem_d); +%} + -+instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2) ++// Load Int Constant ++instruct loadConI(iRegINoSp dst, immI src) +%{ -+ match(Set dst (MulHiL src1 src2)); -+ ins_cost(IMUL_COST); -+ format %{ "mulh $dst, $src1, $src2\t# mulhi, #@mulHiL_rReg" %} ++ match(Set dst src); + -+ ins_encode %{ -+ __ mulh(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); -+ %} ++ ins_cost(ALU_COST); ++ format %{ "li $dst, $src\t# int, #@loadConI" %} + -+ ins_pipe(lmul_reg_reg); ++ ins_encode(riscv_enc_li_imm(dst, src)); ++ ++ ins_pipe(ialu_imm); +%} + -+// Integer Divide ++// Load Long Constant ++instruct loadConL(iRegLNoSp dst, immL src) ++%{ ++ match(Set dst src); + -+instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (DivI src1 src2)); -+ ins_cost(IDIVSI_COST); -+ format %{ "divw $dst, $src1, $src2\t#@divI"%} ++ ins_cost(ALU_COST); ++ format %{ "li $dst, $src\t# long, #@loadConL" %} + -+ ins_encode(riscv_enc_divw(dst, src1, src2)); -+ ins_pipe(idiv_reg_reg); ++ ins_encode(riscv_enc_li_imm(dst, src)); ++ ++ ins_pipe(ialu_imm); +%} + -+instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{ -+ match(Set dst (URShiftI (RShiftI src1 div1) div2)); ++// Load Pointer Constant ++instruct loadConP(iRegPNoSp dst, immP con) ++%{ ++ match(Set dst con); ++ + ins_cost(ALU_COST); -+ format %{ "srliw $dst, $src1, $div1\t# int signExtract, #@signExtract" %} ++ format %{ "mv $dst, $con\t# ptr, #@loadConP" %} + -+ ins_encode %{ -+ __ srliw(as_Register($dst$$reg), as_Register($src1$$reg), 31); -+ %} -+ ins_pipe(ialu_reg_shift); ++ ins_encode(riscv_enc_mov_p(dst, con)); ++ ++ ins_pipe(ialu_imm); +%} + -+// Long Divide ++// Load Null Pointer Constant ++instruct loadConP0(iRegPNoSp dst, immP0 con) ++%{ ++ match(Set dst con); + -+instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (DivL src1 src2)); -+ ins_cost(IDIVDI_COST); -+ format %{ "div $dst, $src1, $src2\t#@divL" %} ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# NULL ptr, #@loadConP0" %} + -+ ins_encode(riscv_enc_div(dst, src1, src2)); -+ ins_pipe(ldiv_reg_reg); ++ ins_encode(riscv_enc_mov_zero(dst)); ++ ++ ins_pipe(ialu_imm); +%} + -+instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{ -+ match(Set dst (URShiftL (RShiftL src1 div1) div2)); ++// Load Pointer Constant One ++instruct loadConP1(iRegPNoSp dst, immP_1 con) ++%{ ++ match(Set dst con); ++ + ins_cost(ALU_COST); -+ format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %} ++ format %{ "mv $dst, $con\t# load ptr constant one, #@loadConP1" %} + -+ ins_encode %{ -+ __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63); -+ %} -+ ins_pipe(ialu_reg_shift); ++ ins_encode(riscv_enc_mov_p1(dst)); ++ ++ ins_pipe(ialu_imm); +%} + -+// Integer Remainder ++// Load Byte Map Base Constant ++instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con) ++%{ ++ match(Set dst con); ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# Byte Map Base, #@loadByteMapBase" %} + -+instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (ModI src1 src2)); -+ ins_cost(IDIVSI_COST); -+ format %{ "remw $dst, $src1, $src2\t#@modI" %} ++ ins_encode(riscv_enc_mov_byte_map_base(dst)); + -+ ins_encode(riscv_enc_modw(dst, src1, src2)); -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(ialu_imm); +%} + -+// Long Remainder ++// Load Narrow Pointer Constant ++instruct loadConN(iRegNNoSp dst, immN con) ++%{ ++ match(Set dst con); + -+instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (ModL src1 src2)); -+ ins_cost(IDIVDI_COST); -+ format %{ "rem $dst, $src1, $src2\t#@modL" %} ++ ins_cost(ALU_COST * 4); ++ format %{ "mv $dst, $con\t# compressed ptr, #@loadConN" %} + -+ ins_encode(riscv_enc_mod(dst, src1, src2)); -+ ins_pipe(ialu_reg_reg); ++ ins_encode(riscv_enc_mov_n(dst, con)); ++ ++ ins_pipe(ialu_imm); +%} + -+// Integer Shifts ++// Load Narrow Null Pointer Constant ++instruct loadConN0(iRegNNoSp dst, immN0 con) ++%{ ++ match(Set dst con); + -+// Shift Left Register -+// In RV64I, only the low 5 bits of src2 are considered for the shift amount -+instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (LShiftI src1 src2)); + ins_cost(ALU_COST); -+ format %{ "sllw $dst, $src1, $src2\t#@lShiftI_reg_reg" %} ++ format %{ "mv $dst, $con\t# compressed NULL ptr, #@loadConN0" %} + -+ ins_encode %{ -+ __ sllw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); -+ %} ++ ins_encode(riscv_enc_mov_zero(dst)); + -+ ins_pipe(ialu_reg_reg_vshift); ++ ins_pipe(ialu_imm); +%} + -+// Shift Left Immediate -+instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ -+ match(Set dst (LShiftI src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "slliw $dst, $src1, ($src2 & 0x1f)\t#@lShiftI_reg_imm" %} ++// Load Narrow Klass Constant ++instruct loadConNKlass(iRegNNoSp dst, immNKlass con) ++%{ ++ match(Set dst con); + -+ ins_encode %{ -+ // the shift amount is encoded in the lower -+ // 5 bits of the I-immediate field for RV32I -+ __ slliw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (unsigned) $src2$$constant & 0x1f); -+ %} ++ ins_cost(ALU_COST * 6); ++ format %{ "mv $dst, $con\t# compressed klass ptr, #@loadConNKlass" %} + -+ ins_pipe(ialu_reg_shift); ++ ins_encode(riscv_enc_mov_nk(dst, con)); ++ ++ ins_pipe(ialu_imm); +%} + -+// Shift Right Logical Register -+// In RV64I, only the low 5 bits of src2 are considered for the shift amount -+instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (URShiftI src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "srlw $dst, $src1, $src2\t#@urShiftI_reg_reg" %} ++// Load Float Constant ++instruct loadConF(fRegF dst, immF con) %{ ++ match(Set dst con); + -+ ins_encode %{ -+ __ srlw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ ins_cost(LOAD_COST); ++ format %{ ++ "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF" + %} + -+ ins_pipe(ialu_reg_reg_vshift); -+%} -+ -+// Shift Right Logical Immediate -+instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ -+ match(Set dst (URShiftI src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "srliw $dst, $src1, ($src2 & 0x1f)\t#@urShiftI_reg_imm" %} -+ + ins_encode %{ -+ // the shift amount is encoded in the lower -+ // 6 bits of the I-immediate field for RV64I -+ __ srliw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (unsigned) $src2$$constant & 0x1f); ++ __ flw(as_FloatRegister($dst$$reg), $constantaddress($con)); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(fp_load_constant_s); +%} + -+// Shift Right Arithmetic Register -+// In RV64I, only the low 5 bits of src2 are considered for the shift amount -+instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (RShiftI src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "sraw $dst, $src1, $src2\t#@rShiftI_reg_reg" %} ++instruct loadConF0(fRegF dst, immF0 con) %{ ++ match(Set dst con); ++ ++ ins_cost(XFER_COST); ++ ++ format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %} + + ins_encode %{ -+ // riscv will sign-ext dst high 32 bits -+ __ sraw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ fmv_w_x(as_FloatRegister($dst$$reg), zr); + %} + -+ ins_pipe(ialu_reg_reg_vshift); ++ ins_pipe(fp_load_constant_s); +%} + -+// Shift Right Arithmetic Immediate -+instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ -+ match(Set dst (RShiftI src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "sraiw $dst, $src1, ($src2 & 0x1f)\t#@rShiftI_reg_imm" %} ++// Load Double Constant ++instruct loadConD(fRegD dst, immD con) %{ ++ match(Set dst con); ++ ++ ins_cost(LOAD_COST); ++ format %{ ++ "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD" ++ %} + + ins_encode %{ -+ // riscv will sign-ext dst high 32 bits -+ __ sraiw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (unsigned) $src2$$constant & 0x1f); ++ __ fld(as_FloatRegister($dst$$reg), $constantaddress($con)); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(fp_load_constant_d); +%} + -+// Long Shifts ++instruct loadConD0(fRegD dst, immD0 con) %{ ++ match(Set dst con); + -+// Shift Left Register -+// In RV64I, only the low 5 bits of src2 are considered for the shift amount -+instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ -+ match(Set dst (LShiftL src1 src2)); ++ ins_cost(XFER_COST); + -+ ins_cost(ALU_COST); -+ format %{ "sll $dst, $src1, $src2\t#@lShiftL_reg_reg" %} ++ format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %} + + ins_encode %{ -+ __ sll(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ fmv_d_x(as_FloatRegister($dst$$reg), zr); + %} + -+ ins_pipe(ialu_reg_reg_vshift); ++ ins_pipe(fp_load_constant_d); +%} + -+// Shift Left Immediate -+instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ -+ match(Set dst (LShiftL src1 src2)); ++// Store Instructions ++// Store CMS card-mark Immediate ++instruct storeimmCM0(immI0 zero, memory mem) ++%{ ++ match(Set mem (StoreCM mem zero)); + -+ ins_cost(ALU_COST); -+ format %{ "slli $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %} ++ ins_cost(STORE_COST); ++ format %{ "storestore (elided)\n\t" ++ "sb zr, $mem\t# byte, #@storeimmCM0" %} + + ins_encode %{ -+ // the shift amount is encoded in the lower -+ // 6 bits of the I-immediate field for RV64I -+ __ slli(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (unsigned) $src2$$constant & 0x3f); ++ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(istore_mem); +%} + -+// Shift Right Logical Register -+// In RV64I, only the low 5 bits of src2 are considered for the shift amount -+instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ -+ match(Set dst (URShiftL src1 src2)); ++// Store CMS card-mark Immediate with intervening StoreStore ++// needed when using CMS with no conditional card marking ++instruct storeimmCM0_ordered(immI0 zero, memory mem) ++%{ ++ match(Set mem (StoreCM mem zero)); + -+ ins_cost(ALU_COST); -+ format %{ "srl $dst, $src1, $src2\t#@urShiftL_reg_reg" %} ++ ins_cost(ALU_COST + STORE_COST); ++ format %{ "membar(StoreStore)\n\t" ++ "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %} + + ins_encode %{ -+ __ srl(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_reg_vshift); ++ ins_pipe(istore_mem); +%} + -+// Shift Right Logical Immediate -+instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ -+ match(Set dst (URShiftL src1 src2)); ++// Store Byte ++instruct storeB(iRegIorL2I src, memory mem) ++%{ ++ match(Set mem (StoreB mem src)); + -+ ins_cost(ALU_COST); -+ format %{ "srli $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %} ++ ins_cost(STORE_COST); ++ format %{ "sb $src, $mem\t# byte, #@storeB" %} + + ins_encode %{ -+ // the shift amount is encoded in the lower -+ // 6 bits of the I-immediate field for RV64I -+ __ srli(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (unsigned) $src2$$constant & 0x3f); ++ __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(istore_reg_mem); +%} + -+// A special-case pattern for card table stores. -+instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{ -+ match(Set dst (URShiftL (CastP2X src1) src2)); ++instruct storeimmB0(immI0 zero, memory mem) ++%{ ++ match(Set mem (StoreB mem zero)); + -+ ins_cost(ALU_COST); -+ format %{ "srli $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %} ++ ins_cost(STORE_COST); ++ format %{ "sb zr, $mem\t# byte, #@storeimmB0" %} + + ins_encode %{ -+ // the shift amount is encoded in the lower -+ // 6 bits of the I-immediate field for RV64I -+ __ srli(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (unsigned) $src2$$constant & 0x3f); ++ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(istore_mem); +%} + -+// Shift Right Arithmetic Register -+// In RV64I, only the low 5 bits of src2 are considered for the shift amount -+instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ -+ match(Set dst (RShiftL src1 src2)); ++// Store Char/Short ++instruct storeC(iRegIorL2I src, memory mem) ++%{ ++ match(Set mem (StoreC mem src)); + -+ ins_cost(ALU_COST); -+ format %{ "sra $dst, $src1, $src2\t#@rShiftL_reg_reg" %} ++ ins_cost(STORE_COST); ++ format %{ "sh $src, $mem\t# short, #@storeC" %} + + ins_encode %{ -+ __ sra(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_reg_vshift); ++ ins_pipe(istore_reg_mem); +%} + -+// Shift Right Arithmetic Immediate -+instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ -+ match(Set dst (RShiftL src1 src2)); ++instruct storeimmC0(immI0 zero, memory mem) ++%{ ++ match(Set mem (StoreC mem zero)); + -+ ins_cost(ALU_COST); -+ format %{ "srai $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %} ++ ins_cost(STORE_COST); ++ format %{ "sh zr, $mem\t# short, #@storeimmC0" %} + + ins_encode %{ -+ // the shift amount is encoded in the lower -+ // 6 bits of the I-immediate field for RV64I -+ __ srai(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (unsigned) $src2$$constant & 0x3f); ++ __ sh(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(istore_mem); +%} + -+instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{ -+ match(Set dst (XorI src1 m1)); -+ ins_cost(ALU_COST); -+ format %{ "xori $dst, $src1, -1\t#@regI_not_reg" %} ++// Store Integer ++instruct storeI(iRegIorL2I src, memory mem) ++%{ ++ match(Set mem(StoreI mem src)); ++ ++ ins_cost(STORE_COST); ++ format %{ "sw $src, $mem\t# int, #@storeI" %} + + ins_encode %{ -+ __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(istore_reg_mem); +%} + -+instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{ -+ match(Set dst (XorL src1 m1)); -+ ins_cost(ALU_COST); -+ format %{ "xori $dst, $src1, -1\t#@regL_not_reg" %} ++instruct storeimmI0(immI0 zero, memory mem) ++%{ ++ match(Set mem(StoreI mem zero)); ++ ++ ins_cost(STORE_COST); ++ format %{ "sw zr, $mem\t# int, #@storeimmI0" %} + + ins_encode %{ -+ __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1); ++ __ sw(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(istore_mem); +%} + ++// Store Long (64 bit signed) ++instruct storeL(iRegL src, memory mem) ++%{ ++ match(Set mem (StoreL mem src)); + -+// ============================================================================ -+// Floating Point Arithmetic Instructions -+ -+instruct addF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ -+ match(Set dst (AddF src1 src2)); -+ -+ ins_cost(FMUL_SINGLE_COST); -+ format %{ "fadd.s $dst, $src1, $src2\t#@addF_reg_reg" %} ++ ins_cost(STORE_COST); ++ format %{ "sd $src, $mem\t# long, #@storeL" %} + + ins_encode %{ -+ __ fadd_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(fp_dop_reg_reg_s); ++ ins_pipe(istore_reg_mem); +%} + -+instruct addD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ -+ match(Set dst (AddD src1 src2)); ++// Store Long (64 bit signed) ++instruct storeimmL0(immL0 zero, memory mem) ++%{ ++ match(Set mem (StoreL mem zero)); + -+ ins_cost(FMUL_DOUBLE_COST); -+ format %{ "fadd.d $dst, $src1, $src2\t#@addD_reg_reg" %} ++ ins_cost(STORE_COST); ++ format %{ "sd zr, $mem\t# long, #@storeimmL0" %} + + ins_encode %{ -+ __ fadd_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(fp_dop_reg_reg_d); ++ ins_pipe(istore_mem); +%} + -+instruct subF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ -+ match(Set dst (SubF src1 src2)); ++// Store Pointer ++instruct storeP(iRegP src, memory mem) ++%{ ++ match(Set mem (StoreP mem src)); + -+ ins_cost(FMUL_SINGLE_COST); -+ format %{ "fsub.s $dst, $src1, $src2\t#@subF_reg_reg" %} ++ ins_cost(STORE_COST); ++ format %{ "sd $src, $mem\t# ptr, #@storeP" %} + + ins_encode %{ -+ __ fsub_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(fp_dop_reg_reg_s); ++ ins_pipe(istore_reg_mem); +%} + -+instruct subD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ -+ match(Set dst (SubD src1 src2)); ++// Store Pointer ++instruct storeimmP0(immP0 zero, memory mem) ++%{ ++ match(Set mem (StoreP mem zero)); + -+ ins_cost(FMUL_DOUBLE_COST); -+ format %{ "fsub.d $dst, $src1, $src2\t#@subD_reg_reg" %} ++ ins_cost(STORE_COST); ++ format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %} + + ins_encode %{ -+ __ fsub_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(fp_dop_reg_reg_d); ++ ins_pipe(istore_mem); +%} + -+instruct mulF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ -+ match(Set dst (MulF src1 src2)); ++// Store Compressed Pointer ++instruct storeN(iRegN src, memory mem) ++%{ ++ match(Set mem (StoreN mem src)); + -+ ins_cost(FMUL_SINGLE_COST); -+ format %{ "fmul.s $dst, $src1, $src2\t#@mulF_reg_reg" %} ++ ins_cost(STORE_COST); ++ format %{ "sw $src, $mem\t# compressed ptr, #@storeN" %} + + ins_encode %{ -+ __ fmul_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(fp_dop_reg_reg_s); ++ ins_pipe(istore_reg_mem); +%} + -+instruct mulD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ -+ match(Set dst (MulD src1 src2)); ++instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) ++%{ ++ match(Set mem (StoreN mem zero)); + -+ ins_cost(FMUL_DOUBLE_COST); -+ format %{ "fmul.d $dst, $src1, $src2\t#@mulD_reg_reg" %} ++ ins_cost(STORE_COST); ++ format %{ "sw rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %} + + ins_encode %{ -+ __ fmul_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(fp_dop_reg_reg_d); ++ ins_pipe(istore_reg_mem); +%} + -+// src1 * src2 + src3 -+instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaF src3 (Binary src1 src2))); ++// Store Float ++instruct storeF(fRegF src, memory mem) ++%{ ++ match(Set mem (StoreF mem src)); + -+ ins_cost(FMUL_SINGLE_COST); -+ format %{ "fmadd.s $dst, $src1, $src2, $src3\t#@maddF_reg_reg" %} ++ ins_cost(STORE_COST); ++ format %{ "fsw $src, $mem\t# float, #@storeF" %} + + ins_encode %{ -+ __ fmadd_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); ++ __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(fp_store_reg_s); +%} + -+// src1 * src2 + src3 -+instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaD src3 (Binary src1 src2))); ++// Store Double ++instruct storeD(fRegD src, memory mem) ++%{ ++ match(Set mem (StoreD mem src)); + -+ ins_cost(FMUL_DOUBLE_COST); -+ format %{ "fmadd.d $dst, $src1, $src2, $src3\t#@maddD_reg_reg" %} ++ ins_cost(STORE_COST); ++ format %{ "fsd $src, $mem\t# double, #@storeD" %} + + ins_encode %{ -+ __ fmadd_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(fp_store_reg_d); +%} + -+// src1 * src2 - src3 -+instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaF (NegF src3) (Binary src1 src2))); ++// Store Compressed Klass Pointer ++instruct storeNKlass(iRegN src, memory mem) ++%{ ++ match(Set mem (StoreNKlass mem src)); + -+ ins_cost(FMUL_SINGLE_COST); -+ format %{ "fmsub.s $dst, $src1, $src2, $src3\t#@msubF_reg_reg" %} ++ ins_cost(STORE_COST); ++ format %{ "sw $src, $mem\t# compressed klass ptr, #@storeNKlass" %} + + ins_encode %{ -+ __ fmsub_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(istore_reg_mem); +%} + -+// src1 * src2 - src3 -+instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaD (NegD src3) (Binary src1 src2))); -+ -+ ins_cost(FMUL_DOUBLE_COST); -+ format %{ "fmsub.d $dst, $src1, $src2, $src3\t#@msubD_reg_reg" %} ++// ============================================================================ ++// Atomic operation instructions ++// ++// Intel and SPARC both implement Ideal Node LoadPLocked and ++// Store{PIL}Conditional instructions using a normal load for the ++// LoadPLocked and a CAS for the Store{PIL}Conditional. ++// ++// The ideal code appears only to use LoadPLocked/storePConditional as a ++// pair to lock object allocations from Eden space when not using ++// TLABs. ++// ++// There does not appear to be a Load{IL}Locked Ideal Node and the ++// Ideal code appears to use Store{IL}Conditional as an alias for CAS ++// and to use StoreIConditional only for 32-bit and StoreLConditional ++// only for 64-bit. ++// ++// We implement LoadPLocked and storePConditional instructions using, ++// respectively the RISCV hw load-reserve and store-conditional ++// instructions. Whereas we must implement each of ++// Store{IL}Conditional using a CAS which employs a pair of ++// instructions comprising a load-reserve followed by a ++// store-conditional. + -+ ins_encode %{ -+ __ fmsub_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); -+ %} + -+ ins_pipe(pipe_class_default); -+%} ++// Locked-load (load reserved) of the current heap-top ++// used when updating the eden heap top ++// implemented using lr_d on RISCV64 ++instruct loadPLocked(iRegPNoSp dst, indirect mem) ++%{ ++ match(Set dst (LoadPLocked mem)); + -+// -src1 * src2 + src3 -+instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaF src3 (Binary (NegF src1) src2))); -+ match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); ++ ins_cost(ALU_COST * 2 + LOAD_COST); + -+ ins_cost(FMUL_SINGLE_COST); -+ format %{ "fnmsub.s $dst, $src1, $src2, $src3\t#@nmsubF_reg_reg" %} ++ format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %} + + ins_encode %{ -+ __ fnmsub_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); ++ __ la(t0, Address(as_Register($mem$$base), $mem$$disp)); ++ __ lr_d($dst$$Register, t0, Assembler::aq); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(pipe_serial); +%} + -+// -src1 * src2 + src3 -+instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaD src3 (Binary (NegD src1) src2))); -+ match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); ++// Conditional-store of the updated heap-top. ++// Used during allocation of the shared heap. ++// implemented using sc_d on RISCV64. ++instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) ++%{ ++ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); + -+ ins_cost(FMUL_DOUBLE_COST); -+ format %{ "fnmsub.d $dst, $src1, $src2, $src3\t#@nmsubD_reg_reg" %} ++ ins_cost(ALU_COST * 2 + STORE_COST); ++ ++ format %{ ++ "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional" ++ %} + + ins_encode %{ -+ __ fnmsub_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); ++ __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp)); ++ __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(pipe_serial); +%} + -+// -src1 * src2 - src3 -+instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); -+ match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); ++instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) ++%{ ++ match(Set cr (StoreLConditional mem (Binary oldval newval))); + -+ ins_cost(FMUL_SINGLE_COST); -+ format %{ "fnmadd.s $dst, $src1, $src2, $src3\t#@nmaddF_reg_reg" %} ++ ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST); ++ ++ format %{ ++ "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" ++ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional" ++ %} + + ins_encode %{ -+ __ fnmadd_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); ++ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(pipe_slow); +%} + -+// -src1 * src2 - src3 -+instruct nmaddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); -+ match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); ++// storeIConditional also has acquire semantics, for no better reason ++// than matching storeLConditional. ++instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) ++%{ ++ match(Set cr (StoreIConditional mem (Binary oldval newval))); + -+ ins_cost(FMUL_DOUBLE_COST); -+ format %{ "fnmadd.d $dst, $src1, $src2, $src3\t#@nmaddD_reg_reg" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2); ++ ++ format %{ ++ "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" ++ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional" ++ %} + + ins_encode %{ -+ __ fnmadd_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); ++ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(pipe_slow); +%} + -+// Math.max(FF)F -+instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ -+ match(Set dst (MaxF src1 src2)); -+ effect(TEMP_DEF dst, USE src1, USE src2); ++// standard CompareAndSwapX when we are using barriers ++// these have higher priority than the rules selected by a predicate ++instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ match(Set res (CompareAndSwapB mem (Binary oldval newval))); + -+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_SINGLE_COST + BRANCH_COST); -+ format %{ "fsflags zr\t#@maxF_reg_reg\n\t" -+ "fmax.s $dst, $src1, $src2\n\t" -+ "flt.s zr, $src1, $src2\n\t" -+ "frflags t0\n\t" -+ "beqz t0, Ldone\n\t" -+ "fadd.s $dst, $src1, $src2" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); ++ ++ effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); ++ ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB" ++ %} + + ins_encode %{ -+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), /* is_double */ false, /* is_min */ false); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(fp_dop_reg_reg_s); ++ ins_pipe(pipe_slow); +%} + -+// Math.min(FF)F -+instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ -+ match(Set dst (MinF src1 src2)); -+ effect(TEMP_DEF dst, USE src1, USE src2); ++instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ match(Set res (CompareAndSwapS mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); ++ ++ effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + -+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_SINGLE_COST + BRANCH_COST); -+ format %{ "fsflags zr\t#@minF_reg_reg\n\t" -+ "fmin.s $dst, $src1, $src2\n\t" -+ "flt.s zr, $src1, $src2\n\t" -+ "frflags t0\n\t" -+ "beqz t0, Ldone\n\t" -+ "fadd.s $dst, $src1, $src2" %} ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS" ++ %} + + ins_encode %{ -+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), /* is_double */ false, /* is_min */ true); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(fp_dop_reg_reg_s); ++ ins_pipe(pipe_slow); +%} + -+// Math.max(DD)D -+instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ -+ match(Set dst (MaxD src1 src2)); -+ effect(TEMP_DEF dst, USE src1, USE src2); ++instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval) ++%{ ++ match(Set res (CompareAndSwapI mem (Binary oldval newval))); + -+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_DOUBLE_COST + BRANCH_COST); -+ format %{ "fsflags zr\t#@maxD_reg_reg\n\t" -+ "fmax.d $dst, $src1, $src2\n\t" -+ "flt.d zr, $src1, $src2\n\t" -+ "frflags t0\n\t" -+ "beqz t0, Ldone\n\t" -+ "fadd.d $dst, $src1, $src2" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + -+ ins_encode %{ -+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), /* is_double */ true, /* is_min */ false); ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI" + %} + -+ ins_pipe(fp_dop_reg_reg_d); ++ ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+// Math.min(DD)D -+instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ -+ match(Set dst (MinD src1 src2)); -+ effect(TEMP_DEF dst, USE src1, USE src2); ++instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval) ++%{ ++ match(Set res (CompareAndSwapL mem (Binary oldval newval))); + -+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_DOUBLE_COST + BRANCH_COST); -+ format %{ "fsflags zr\t#@minD_reg_reg\n\t" -+ "fmin.d $dst, $src1, $src2\n\t" -+ "flt.d zr, $src1, $src2\n\t" -+ "frflags t0\n\t" -+ "beqz t0, Ldone\n\t" -+ "fadd.d $dst, $src1, $src2" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + -+ ins_encode %{ -+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), /* is_double */ true, /* is_min */ true); ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL" + %} + -+ ins_pipe(fp_dop_reg_reg_d); ++ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+instruct divF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ -+ match(Set dst (DivF src1 src2)); ++instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ predicate(n->as_LoadStore()->barrier_data() == 0); + -+ ins_cost(FDIV_COST); -+ format %{ "fdiv.s $dst, $src1, $src2\t#@divF_reg_reg" %} ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); + -+ ins_encode %{ -+ __ fdiv_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP" + %} + -+ ins_pipe(fp_div_s); ++ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+instruct divD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ -+ match(Set dst (DivD src1 src2)); ++instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval) ++%{ ++ match(Set res (CompareAndSwapN mem (Binary oldval newval))); + -+ ins_cost(FDIV_COST); -+ format %{ "fdiv.d $dst, $src1, $src2\t#@divD_reg_reg" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); + -+ ins_encode %{ -+ __ fdiv_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN" + %} + -+ ins_pipe(fp_div_d); ++ ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+instruct negF_reg_reg(fRegF dst, fRegF src) %{ -+ match(Set dst (NegF src)); ++// alternative CompareAndSwapX when we are eliding barriers ++instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(XFER_COST); -+ format %{ "fsgnjn.s $dst, $src, $src\t#@negF_reg_reg" %} ++ match(Set res (CompareAndSwapB mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq" ++ %} + + ins_encode %{ -+ __ fneg_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src$$reg)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(fp_uop_s); ++ ins_pipe(pipe_slow); +%} + -+instruct negD_reg_reg(fRegD dst, fRegD src) %{ -+ match(Set dst (NegD src)); ++instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(XFER_COST); -+ format %{ "fsgnjn.d $dst, $src, $src\t#@negD_reg_reg" %} ++ match(Set res (CompareAndSwapS mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq" ++ %} + + ins_encode %{ -+ __ fneg_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src$$reg)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(fp_uop_d); ++ ins_pipe(pipe_slow); +%} + -+instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{ -+ match(Set dst (AbsI src)); ++instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(ALU_COST * 3); -+ format %{ "sraiw t0, $src, 0x1f\n\t" -+ "xorr $dst, $src, t0\n\t" -+ "subw $dst, $dst, t0\t#@absI_reg" %} ++ match(Set res (CompareAndSwapI mem (Binary oldval newval))); + -+ ins_encode %{ -+ __ sraiw(t0, as_Register($src$$reg), 0x1f); -+ __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0); -+ __ subw(as_Register($dst$$reg), as_Register($dst$$reg), t0); ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq" + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+instruct absI2L_reg(iRegLNoSp dst, iRegIorL2I src) %{ -+ match(Set dst (ConvI2L (AbsI src))); ++instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(ALU_COST * 3); -+ format %{ "sraiw t0, $src, 0x1f\n\t" -+ "xorr $dst, $src, t0\n\t" -+ "subw $dst, $dst, t0\t#@absI2L_reg" %} ++ match(Set res (CompareAndSwapL mem (Binary oldval newval))); + -+ ins_encode %{ -+ __ sraiw(t0, as_Register($src$$reg), 0x1f); -+ __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0); -+ __ subw(as_Register($dst$$reg), as_Register($dst$$reg), t0); ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq" + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+instruct absL_reg(iRegLNoSp dst, iRegL src) %{ -+ match(Set dst (AbsL src)); ++instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); + -+ ins_cost(ALU_COST * 3); -+ format %{ "srai t0, $src, 0x3f\n\t" -+ "xorr $dst, $src, t0\n\t" -+ "sub $dst, $dst, t0\t#absL_reg" %} ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); + -+ ins_encode %{ -+ __ srai(t0, as_Register($src$$reg), 0x3f); -+ __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0); -+ __ sub(as_Register($dst$$reg), as_Register($dst$$reg), t0); ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq" + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+instruct absF_reg(fRegF dst, fRegF src) %{ -+ match(Set dst (AbsF src)); -+ -+ ins_cost(XFER_COST); -+ format %{ "fsgnjx.s $dst, $src, $src\t#@absF_reg" %} -+ ins_encode %{ -+ __ fabs_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src$$reg)); -+ %} ++instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_pipe(fp_uop_s); -+%} ++ match(Set res (CompareAndSwapN mem (Binary oldval newval))); + -+instruct absD_reg(fRegD dst, fRegD src) %{ -+ match(Set dst (AbsD src)); ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); + -+ ins_cost(XFER_COST); -+ format %{ "fsgnjx.d $dst, $src, $src\t#@absD_reg" %} -+ ins_encode %{ -+ __ fabs_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src$$reg)); ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq" + %} + -+ ins_pipe(fp_uop_d); ++ ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+instruct sqrtF_reg(fRegF dst, fRegF src) %{ -+ match(Set dst (SqrtF src)); ++// Sundry CAS operations. Note that release is always true, ++// regardless of the memory ordering of the CAS. This is because we ++// need the volatile case to be sequentially consistent but there is ++// no trailing StoreLoad barrier emitted by C2. Unfortunately we ++// can't check the type of memory ordering here, so we always emit a ++// sc_d(w) with rl bit set. ++instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); + -+ ins_cost(FSQRT_COST); -+ format %{ "fsqrt.s $dst, $src\t#@sqrtF_reg" %} -+ ins_encode %{ -+ __ fsqrt_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src$$reg)); -+ %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); + -+ ins_pipe(fp_sqrt_s); -+%} ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + -+instruct sqrtD_reg(fRegD dst, fRegD src) %{ -+ match(Set dst (SqrtD src)); ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB" ++ %} + -+ ins_cost(FSQRT_COST); -+ format %{ "fsqrt.d $dst, $src\t#@sqrtD_reg" %} + ins_encode %{ -+ __ fsqrt_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src$$reg)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(fp_sqrt_d); ++ ins_pipe(pipe_slow); +%} + -+// Arithmetic Instructions End ++instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); + -+// ============================================================================ -+// Logical Instructions ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); + -+// Register And -+instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ -+ match(Set dst (AndI src1 src2)); ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + -+ format %{ "andr $dst, $src1, $src2\t#@andI_reg_reg" %} ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS" ++ %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ andr(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_slow); +%} + -+// Immediate And -+instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ -+ match(Set dst (AndI src1 src2)); ++instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); + -+ format %{ "andi $dst, $src1, $src2\t#@andI_reg_imm" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI" ++ %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ andi(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (int32_t)($src2$$constant)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_pipe(pipe_slow); +%} + -+// Register Or -+instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ -+ match(Set dst (OrI src1 src2)); ++instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) ++%{ ++ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); + -+ format %{ "orr $dst, $src1, $src2\t#@orI_reg_reg" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL" ++ %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ orr(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_slow); +%} + -+// Immediate Or -+instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ -+ match(Set dst (OrI src1 src2)); ++instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) ++%{ ++ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + -+ format %{ "ori $dst, $src1, $src2\t#@orI_reg_imm" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN" ++ %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ ori(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (int32_t)($src2$$constant)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_pipe(pipe_slow); +%} + -+// Register Xor -+instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ -+ match(Set dst (XorI src1 src2)); ++instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ predicate(n->as_LoadStore()->barrier_data() == 0); ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + -+ format %{ "xorr $dst, $src1, $src2\t#@xorI_reg_reg" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP" ++ %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ xorr(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_slow); +%} + -+// Immediate Xor -+instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ -+ match(Set dst (XorI src1 src2)); -+ -+ format %{ "xori $dst, $src1, $src2\t#@xorI_reg_imm" %} ++instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(ALU_COST); -+ ins_encode %{ -+ __ xori(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (int32_t)($src2$$constant)); -+ %} ++ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); + -+ ins_pipe(ialu_reg_imm); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); + -+// Register And Long -+instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (AndL src1 src2)); ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + -+ format %{ "andr $dst, $src1, $src2\t#@andL_reg_reg" %} ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq" ++ %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ andr(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_slow); +%} + -+// Immediate And Long -+instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ -+ match(Set dst (AndL src1 src2)); -+ -+ format %{ "andi $dst, $src1, $src2\t#@andL_reg_imm" %} ++instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(ALU_COST); -+ ins_encode %{ -+ __ andi(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (int32_t)($src2$$constant)); -+ %} ++ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); + -+ ins_pipe(ialu_reg_imm); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); + -+// Register Or Long -+instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (OrL src1 src2)); ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + -+ format %{ "orr $dst, $src1, $src2\t#@orL_reg_reg" %} ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq" ++ %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ orr(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_slow); +%} + -+// Immediate Or Long -+instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ -+ match(Set dst (OrL src1 src2)); ++instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ format %{ "ori $dst, $src1, $src2\t#@orL_reg_imm" %} ++ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq" ++ %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ ori(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (int32_t)($src2$$constant)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_pipe(pipe_slow); +%} + -+// Register Xor Long -+instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (XorL src1 src2)); ++instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ format %{ "xorr $dst, $src1, $src2\t#@xorL_reg_reg" %} ++ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq" ++ %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ xorr(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_slow); +%} + -+// Immediate Xor Long -+instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ -+ match(Set dst (XorL src1 src2)); ++instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(ALU_COST); -+ format %{ "xori $dst, $src1, $src2\t#@xorL_reg_imm" %} ++ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq" ++ %} + + ins_encode %{ -+ __ xori(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (int32_t)($src2$$constant)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_pipe(pipe_slow); +%} + -+// ============================================================================ -+// BSWAP Instructions ++instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); + -+instruct bytes_reverse_int(rFlagsReg cr, iRegINoSp dst, iRegIorL2I src) %{ -+ match(Set dst (ReverseBytesI src)); -+ effect(TEMP cr); ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + -+ ins_cost(ALU_COST * 13); -+ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq" ++ %} + + ins_encode %{ -+ __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_slow); +%} + -+instruct bytes_reverse_long(rFlagsReg cr, iRegLNoSp dst, iRegL src) %{ -+ match(Set dst (ReverseBytesL src)); -+ effect(TEMP cr); ++instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); + -+ ins_cost(ALU_COST * 29); -+ format %{ "revb $dst, $src\t#@bytes_reverse_long" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapB" ++ %} + + ins_encode %{ -+ __ revb(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_slow); +%} + -+instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{ -+ match(Set dst (ReverseBytesUS src)); ++instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); + -+ ins_cost(ALU_COST * 5); -+ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapS" ++ %} + + ins_encode %{ -+ __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_slow); +%} + -+instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{ -+ match(Set dst (ReverseBytesS src)); ++instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); + -+ ins_cost(ALU_COST * 5); -+ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapI" ++ %} + + ins_encode %{ -+ __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_slow); +%} + -+// ============================================================================ -+// MemBar Instruction ++instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) ++%{ ++ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); + -+instruct load_fence() %{ -+ match(LoadFence); -+ ins_cost(ALU_COST); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + -+ format %{ "#@load_fence" %} ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapL" ++ %} + + ins_encode %{ -+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} -+ ins_pipe(pipe_serial); ++ ++ ins_pipe(pipe_slow); +%} + -+instruct membar_acquire() %{ -+ match(MemBarAcquire); -+ ins_cost(ALU_COST); ++instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) ++%{ ++ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + -+ format %{ "#@membar_acquire\n\t" -+ "fence ir iorw" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapN" ++ %} + + ins_encode %{ -+ __ block_comment("membar_acquire"); -+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(pipe_slow); +%} + -+instruct membar_acquire_lock() %{ -+ match(MemBarAcquireLock); -+ ins_cost(0); ++instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ predicate(n->as_LoadStore()->barrier_data() == 0); ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + -+ format %{ "#@membar_acquire_lock (elided)" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapP" ++ %} + + ins_encode %{ -+ __ block_comment("membar_acquire_lock (elided)"); ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(pipe_slow); +%} + -+instruct store_fence() %{ -+ match(StoreFence); -+ ins_cost(ALU_COST); ++instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ format %{ "#@store_fence" %} ++ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); + -+ ins_encode %{ -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ %} -+ ins_pipe(pipe_serial); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); + -+instruct membar_release() %{ -+ match(MemBarRelease); -+ ins_cost(ALU_COST); ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + -+ format %{ "#@membar_release\n\t" -+ "fence iorw ow" %} ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapBAcq" ++ %} + + ins_encode %{ -+ __ block_comment("membar_release"); -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} -+ ins_pipe(pipe_serial); ++ ++ ins_pipe(pipe_slow); +%} + -+instruct membar_storestore() %{ -+ match(MemBarStoreStore); -+ ins_cost(ALU_COST); ++instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ format %{ "MEMBAR-store-store\t#@membar_storestore" %} ++ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); + -+ ins_encode %{ -+ __ membar(MacroAssembler::StoreStore); -+ %} -+ ins_pipe(pipe_serial); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); + -+instruct membar_release_lock() %{ -+ match(MemBarReleaseLock); -+ ins_cost(0); ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + -+ format %{ "#@membar_release_lock (elided)" %} ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapSAcq" ++ %} + + ins_encode %{ -+ __ block_comment("membar_release_lock (elided)"); ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(pipe_slow); +%} + -+instruct membar_volatile() %{ -+ match(MemBarVolatile); -+ ins_cost(ALU_COST); ++instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ format %{ "#@membar_volatile\n\t" -+ "fence iorw iorw"%} ++ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapIAcq" ++ %} + + ins_encode %{ -+ __ block_comment("membar_volatile"); -+ __ membar(MacroAssembler::StoreLoad); ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(pipe_slow); +%} + -+// ============================================================================ -+// Cast Instructions (Java-level type cast) ++instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+instruct castX2P(iRegPNoSp dst, iRegL src) %{ -+ match(Set dst (CastX2P src)); ++ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); + -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $src\t# long -> ptr, #@castX2P" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapLAcq" ++ %} + + ins_encode %{ -+ if ($dst$$reg != $src$$reg) { -+ __ mv(as_Register($dst$$reg), as_Register($src$$reg)); -+ } ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_slow); +%} + -+instruct castP2X(iRegLNoSp dst, iRegP src) %{ -+ match(Set dst (CastP2X src)); ++instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $src\t# ptr -> long, #@castP2X" %} ++ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); ++ ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapNAcq" ++ %} + + ins_encode %{ -+ if ($dst$$reg != $src$$reg) { -+ __ mv(as_Register($dst$$reg), as_Register($src$$reg)); -+ } ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_slow); +%} + -+instruct castPP(iRegPNoSp dst) ++instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ -+ match(Set dst (CastPP dst)); -+ ins_cost(0); ++ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); + -+ size(0); -+ format %{ "# castPP of $dst, #@castPP" %} -+ ins_encode(/* empty encoding */); -+ ins_pipe(pipe_class_empty); -+%} ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + -+instruct castII(iRegI dst) -+%{ -+ match(Set dst (CastII dst)); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + -+ size(0); -+ format %{ "# castII of $dst, #@castII" %} -+ ins_encode(/* empty encoding */); -+ ins_cost(0); -+ ins_pipe(pipe_class_empty); -+%} ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapPAcq" ++ %} + -+instruct checkCastPP(iRegPNoSp dst) -+%{ -+ match(Set dst (CheckCastPP dst)); ++ ins_encode %{ ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); ++ %} + -+ size(0); -+ ins_cost(0); -+ format %{ "# checkcastPP of $dst, #@checkCastPP" %} -+ ins_encode(/* empty encoding */); -+ ins_pipe(pipe_class_empty); ++ ins_pipe(pipe_slow); +%} + -+// ============================================================================ -+// Convert Instructions -+ -+// int to bool -+instruct convI2Bool(iRegINoSp dst, iRegI src) ++instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) +%{ -+ match(Set dst (Conv2B src)); ++ match(Set prev (GetAndSetI mem newv)); + + ins_cost(ALU_COST); -+ format %{ "snez $dst, $src\t#@convI2Bool" %} ++ ++ format %{ "atomic_xchgw $prev, $newv, [$mem]\t#@get_and_setI" %} + + ins_encode %{ -+ __ snez(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_serial); +%} + -+// pointer to bool -+instruct convP2Bool(iRegINoSp dst, iRegP src) ++instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) +%{ -+ match(Set dst (Conv2B src)); ++ match(Set prev (GetAndSetL mem newv)); + + ins_cost(ALU_COST); -+ format %{ "snez $dst, $src\t#@convP2Bool" %} ++ ++ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setL" %} + + ins_encode %{ -+ __ snez(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_serial); +%} + -+// int <-> long -+ -+instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src) ++instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) +%{ -+ match(Set dst (ConvI2L src)); ++ match(Set prev (GetAndSetN mem newv)); + + ins_cost(ALU_COST); -+ format %{ "addw $dst, $src, zr\t#@convI2L_reg_reg" %} -+ ins_encode %{ -+ __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr); -+ %} -+ ins_pipe(ialu_reg); -+%} -+ -+instruct convL2I_reg(iRegINoSp dst, iRegL src) %{ -+ match(Set dst (ConvL2I src)); + -+ ins_cost(ALU_COST); -+ format %{ "addw $dst, $src, zr\t#@convL2I_reg" %} ++ format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %} + + ins_encode %{ -+ __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr); ++ __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_serial); +%} + -+// int to unsigned long (Zero-extend) -+instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) ++instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) +%{ -+ match(Set dst (AndL (ConvI2L src) mask)); ++ predicate(n->as_LoadStore()->barrier_data() == 0); ++ match(Set prev (GetAndSetP mem newv)); + -+ ins_cost(ALU_COST * 2); -+ format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %} ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setP" %} + + ins_encode %{ -+ __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32); ++ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(pipe_serial); +%} + -+// float <-> double ++instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+instruct convF2D_reg(fRegD dst, fRegF src) %{ -+ match(Set dst (ConvF2D src)); ++ match(Set prev (GetAndSetI mem newv)); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.d.s $dst, $src\t#@convF2D_reg" %} ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchgw_acq $prev, $newv, [$mem]\t#@get_and_setIAcq" %} + + ins_encode %{ -+ __ fcvt_d_s(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); ++ __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_f2d); ++ ins_pipe(pipe_serial); +%} + -+instruct convD2F_reg(fRegF dst, fRegD src) %{ -+ match(Set dst (ConvD2F src)); ++instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.s.d $dst, $src\t#@convD2F_reg" %} ++ match(Set prev (GetAndSetL mem newv)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setLAcq" %} + + ins_encode %{ -+ __ fcvt_s_d(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); ++ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_d2f); ++ ins_pipe(pipe_serial); +%} + -+// float <-> int ++instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{ -+ match(Set dst (ConvF2I src)); ++ match(Set prev (GetAndSetN mem newv)); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.w.s $dst, $src\t#@convF2I_reg_reg" %} ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %} + + ins_encode %{ -+ __ fcvt_w_s_safe($dst$$Register, $src$$FloatRegister); ++ __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_f2i); ++ ins_pipe(pipe_serial); +%} + -+instruct convI2F_reg_reg(fRegF dst, iRegIorL2I src) %{ -+ match(Set dst (ConvI2F src)); ++instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) ++%{ ++ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.s.w $dst, $src\t#@convI2F_reg_reg" %} ++ match(Set prev (GetAndSetP mem newv)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setPAcq" %} + + ins_encode %{ -+ __ fcvt_s_w(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_i2f); ++ ins_pipe(pipe_serial); +%} + -+// float <-> long ++instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) ++%{ ++ match(Set newval (GetAndAddL mem incr)); + -+instruct convF2L_reg_reg(iRegLNoSp dst, fRegF src) %{ -+ match(Set dst (ConvF2L src)); ++ ins_cost(ALU_COST); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.l.s $dst, $src\t#@convF2L_reg_reg" %} ++ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %} + + ins_encode %{ -+ __ fcvt_l_s_safe($dst$$Register, $src$$FloatRegister); ++ __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_f2l); ++ ins_pipe(pipe_serial); +%} + -+instruct convL2F_reg_reg(fRegF dst, iRegL src) %{ -+ match(Set dst (ConvL2F src)); ++instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used()); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.s.l $dst, $src\t#@convL2F_reg_reg" %} ++ match(Set dummy (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %} + + ins_encode %{ -+ __ fcvt_s_l(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_l2f); ++ ins_pipe(pipe_serial); +%} + -+// double <-> int ++instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr) ++%{ ++ match(Set newval (GetAndAddL mem incr)); + -+instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{ -+ match(Set dst (ConvD2I src)); ++ ins_cost(ALU_COST); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.w.d $dst, $src\t#@convD2I_reg_reg" %} ++ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %} + + ins_encode %{ -+ __ fcvt_w_d_safe($dst$$Register, $src$$FloatRegister); ++ __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_d2i); ++ ins_pipe(pipe_serial); +%} + -+instruct convI2D_reg_reg(fRegD dst, iRegIorL2I src) %{ -+ match(Set dst (ConvI2D src)); ++instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used()); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.d.w $dst, $src\t#@convI2D_reg_reg" %} ++ match(Set dummy (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %} + + ins_encode %{ -+ __ fcvt_d_w(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_i2d); ++ ins_pipe(pipe_serial); +%} + -+// double <-> long ++instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) ++%{ ++ match(Set newval (GetAndAddI mem incr)); + -+instruct convD2L_reg_reg(iRegLNoSp dst, fRegD src) %{ -+ match(Set dst (ConvD2L src)); ++ ins_cost(ALU_COST); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.l.d $dst, $src\t#@convD2L_reg_reg" %} ++ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %} + + ins_encode %{ -+ __ fcvt_l_d_safe($dst$$Register, $src$$FloatRegister); ++ __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_d2l); ++ ins_pipe(pipe_serial); +%} + -+instruct convL2D_reg_reg(fRegD dst, iRegL src) %{ -+ match(Set dst (ConvL2D src)); -+ -+ ins_cost(XFER_COST); -+ format %{ "fcvt.d.l $dst, $src\t#@convL2D_reg_reg" %} -+ -+ ins_encode %{ -+ __ fcvt_d_l(as_FloatRegister($dst$$reg), as_Register($src$$reg)); -+ %} ++instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used()); + -+ ins_pipe(fp_l2d); -+%} ++ match(Set dummy (GetAndAddI mem incr)); + -+// Convert oop into int for vectors alignment masking -+instruct convP2I(iRegINoSp dst, iRegP src) %{ -+ match(Set dst (ConvL2I (CastP2X src))); ++ ins_cost(ALU_COST); + -+ ins_cost(ALU_COST * 2); -+ format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %} ++ format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %} + + ins_encode %{ -+ __ zero_extend($dst$$Register, $src$$Register, 32); ++ __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_serial); +%} + -+// Convert compressed oop into int for vectors alignment masking -+// in case of 32bit oops (heap < 4Gb). -+instruct convN2I(iRegINoSp dst, iRegN src) ++instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr) +%{ -+ predicate(Universe::narrow_oop_shift() == 0); -+ match(Set dst (ConvL2I (CastP2X (DecodeN src)))); ++ match(Set newval (GetAndAddI mem incr)); + + ins_cost(ALU_COST); -+ format %{ "mv $dst, $src\t# compressed ptr -> int, #@convN2I" %} ++ ++ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %} + + ins_encode %{ -+ __ mv($dst$$Register, $src$$Register); ++ __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_serial); +%} + -+// Convert oop pointer into compressed form -+instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{ -+ match(Set dst (EncodeP src)); -+ ins_cost(ALU_COST); -+ format %{ "encode_heap_oop $dst, $src\t#@encodeHeapOop" %} -+ ins_encode %{ -+ Register s = $src$$Register; -+ Register d = $dst$$Register; -+ __ encode_heap_oop(d, s); -+ %} -+ ins_pipe(ialu_reg); -+%} ++instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used()); + -+instruct decodeHeapOop(iRegPNoSp dst, iRegN src) %{ -+ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && -+ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); -+ match(Set dst (DecodeN src)); ++ match(Set dummy (GetAndAddI mem incr)); + -+ ins_cost(0); -+ format %{ "decode_heap_oop $dst, $src\t#@decodeHeapOop" %} -+ ins_encode %{ -+ Register s = $src$$Register; -+ Register d = $dst$$Register; -+ __ decode_heap_oop(d, s); -+ %} -+ ins_pipe(ialu_reg); -+%} ++ ins_cost(ALU_COST); + -+instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src) %{ -+ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || -+ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); -+ match(Set dst (DecodeN src)); ++ format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %} + -+ ins_cost(0); -+ format %{ "decode_heap_oop_not_null $dst, $src\t#@decodeHeapOop_not_null" %} + ins_encode %{ -+ Register s = $src$$Register; -+ Register d = $dst$$Register; -+ __ decode_heap_oop_not_null(d, s); ++ __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base)); + %} -+ ins_pipe(ialu_reg); ++ ++ ins_pipe(pipe_serial); +%} + -+// Convert klass pointer into compressed form. -+instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{ -+ match(Set dst (EncodePKlass src)); ++instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); ++ ++ match(Set newval (GetAndAddL mem incr)); + + ins_cost(ALU_COST); -+ format %{ "encode_klass_not_null $dst, $src\t#@encodeKlass_not_null" %} ++ ++ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %} + + ins_encode %{ -+ Register src_reg = as_Register($src$$reg); -+ Register dst_reg = as_Register($dst$$reg); -+ __ encode_klass_not_null(dst_reg, src_reg, t0); ++ __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_serial); +%} + -+instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{ -+ predicate(!maybe_use_tmp_register_decoding_klass()); ++instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{ ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + -+ match(Set dst (DecodeNKlass src)); ++ match(Set dummy (GetAndAddL mem incr)); + + ins_cost(ALU_COST); -+ format %{ "decode_klass_not_null $dst, $src\t#@decodeKlass_not_null" %} ++ ++ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %} + + ins_encode %{ -+ Register src_reg = as_Register($src$$reg); -+ Register dst_reg = as_Register($dst$$reg); -+ __ decode_klass_not_null(dst_reg, src_reg, UseCompressedOops ? xheapbase : t0); ++ __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_serial); +%} + -+instruct decodeKlass_not_null_with_tmp(iRegPNoSp dst, iRegN src, rFlagsReg tmp) %{ -+ predicate(maybe_use_tmp_register_decoding_klass()); -+ -+ match(Set dst (DecodeNKlass src)); ++instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ effect(TEMP tmp); ++ match(Set newval (GetAndAddL mem incr)); + + ins_cost(ALU_COST); -+ format %{ "decode_klass_not_null $dst, $src\t#@decodeKlass_not_null" %} ++ ++ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %} + + ins_encode %{ -+ Register src_reg = as_Register($src$$reg); -+ Register dst_reg = as_Register($dst$$reg); -+ Register tmp_reg = as_Register($tmp$$reg); -+ __ decode_klass_not_null(dst_reg, src_reg, tmp_reg); ++ __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_serial); +%} + -+// stack <-> reg and reg <-> reg shuffles with no conversion -+ -+instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{ -+ -+ match(Set dst (MoveF2I src)); ++instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + -+ effect(DEF dst, USE src); ++ match(Set dummy (GetAndAddL mem incr)); + -+ ins_cost(LOAD_COST); ++ ins_cost(ALU_COST); + -+ format %{ "lw $dst, $src\t#@MoveF2I_stack_reg" %} ++ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %} + + ins_encode %{ -+ __ lw(as_Register($dst$$reg), Address(sp, $src$$disp)); ++ __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(iload_reg_reg); -+ ++ ins_pipe(pipe_serial); +%} + -+instruct MoveI2F_stack_reg(fRegF dst, stackSlotI src) %{ -+ -+ match(Set dst (MoveI2F src)); ++instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ effect(DEF dst, USE src); ++ match(Set newval (GetAndAddI mem incr)); + -+ ins_cost(LOAD_COST); ++ ins_cost(ALU_COST); + -+ format %{ "flw $dst, $src\t#@MoveI2F_stack_reg" %} ++ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %} + + ins_encode %{ -+ __ flw(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); ++ __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_class_memory); -+ ++ ins_pipe(pipe_serial); +%} + -+instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{ -+ -+ match(Set dst (MoveD2L src)); ++instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + -+ effect(DEF dst, USE src); ++ match(Set dummy (GetAndAddI mem incr)); + -+ ins_cost(LOAD_COST); ++ ins_cost(ALU_COST); + -+ format %{ "ld $dst, $src\t#@MoveD2L_stack_reg" %} ++ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %} + + ins_encode %{ -+ __ ld(as_Register($dst$$reg), Address(sp, $src$$disp)); ++ __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(iload_reg_reg); -+ ++ ins_pipe(pipe_serial); +%} + -+instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{ -+ -+ match(Set dst (MoveL2D src)); ++instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ effect(DEF dst, USE src); ++ match(Set newval (GetAndAddI mem incr)); + -+ ins_cost(LOAD_COST); ++ ins_cost(ALU_COST); + -+ format %{ "fld $dst, $src\t#@MoveL2D_stack_reg" %} ++ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %} + + ins_encode %{ -+ __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); ++ __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_class_memory); -+ ++ ins_pipe(pipe_serial); +%} + -+instruct MoveF2I_reg_stack(stackSlotI dst, fRegF src) %{ -+ -+ match(Set dst (MoveF2I src)); ++instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + -+ effect(DEF dst, USE src); ++ match(Set dummy (GetAndAddI mem incr)); + -+ ins_cost(STORE_COST); ++ ins_cost(ALU_COST); + -+ format %{ "fsw $src, $dst\t#@MoveF2I_reg_stack" %} ++ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %} + + ins_encode %{ -+ __ fsw(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); ++ __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_class_memory); -+ ++ ins_pipe(pipe_serial); +%} + -+instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{ -+ -+ match(Set dst (MoveI2F src)); -+ -+ effect(DEF dst, USE src); ++// ============================================================================ ++// Arithmetic Instructions ++// + -+ ins_cost(STORE_COST); ++// Integer Addition + -+ format %{ "sw $src, $dst\t#@MoveI2F_reg_stack" %} ++// TODO ++// these currently employ operations which do not set CR and hence are ++// not flagged as killing CR but we would like to isolate the cases ++// where we want to set flags from those where we don't. need to work ++// out how to do that. ++instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "addw $dst, $src1, $src2\t#@addI_reg_reg" %} + + ins_encode %{ -+ __ sw(as_Register($src$$reg), Address(sp, $dst$$disp)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ addw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(istore_reg_reg); -+ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{ -+ -+ match(Set dst (MoveD2L src)); -+ -+ effect(DEF dst, USE src); -+ -+ ins_cost(STORE_COST); ++instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{ ++ match(Set dst (AddI src1 src2)); + -+ format %{ "fsd $dst, $src\t#@MoveD2L_reg_stack" %} ++ ins_cost(ALU_COST); ++ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm" %} + + ins_encode %{ -+ __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); ++ Assembler::CompressibleRegion cr(&_masm); ++ int32_t con = (int32_t)$src2$$constant; ++ __ addiw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); + %} + -+ ins_pipe(pipe_class_memory); -+ ++ ins_pipe(ialu_reg_imm); +%} + -+instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{ -+ -+ match(Set dst (MoveL2D src)); -+ -+ effect(DEF dst, USE src); -+ -+ ins_cost(STORE_COST); ++instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{ ++ match(Set dst (AddI (ConvL2I src1) src2)); + -+ format %{ "sd $src, $dst\t#@MoveL2D_reg_stack" %} ++ ins_cost(ALU_COST); ++ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm_l2i" %} + + ins_encode %{ -+ __ sd(as_Register($src$$reg), Address(sp, $dst$$disp)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ addiw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); + %} + -+ ins_pipe(istore_reg_reg); -+ ++ ins_pipe(ialu_reg_imm); +%} + -+instruct MoveF2I_reg_reg(iRegINoSp dst, fRegF src) %{ -+ -+ match(Set dst (MoveF2I src)); -+ -+ effect(DEF dst, USE src); -+ -+ ins_cost(XFER_COST); ++// Pointer Addition ++instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{ ++ match(Set dst (AddP src1 src2)); + -+ format %{ "fmv.x.w $dst, $src\t#@MoveL2D_reg_stack" %} ++ ins_cost(ALU_COST); ++ format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %} + + ins_encode %{ -+ __ fmv_x_w(as_Register($dst$$reg), as_FloatRegister($src$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ add(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(fp_f2i); -+ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct MoveI2F_reg_reg(fRegF dst, iRegI src) %{ -+ -+ match(Set dst (MoveI2F src)); -+ -+ effect(DEF dst, USE src); -+ -+ ins_cost(XFER_COST); -+ -+ format %{ "fmv.w.x $dst, $src\t#@MoveI2F_reg_reg" %} ++// If we shift more than 32 bits, we need not convert I2L. ++instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{ ++ match(Set dst (LShiftL (ConvI2L src) scale)); ++ ins_cost(ALU_COST); ++ format %{ "slli $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %} + + ins_encode %{ -+ __ fmv_w_x(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63); + %} + -+ ins_pipe(fp_i2f); -+ ++ ins_pipe(ialu_reg_shift); +%} + -+instruct MoveD2L_reg_reg(iRegLNoSp dst, fRegD src) %{ -+ -+ match(Set dst (MoveD2L src)); ++// Pointer Immediate Addition ++// n.b. this needs to be more expensive than using an indirect memory ++// operand ++instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "addi $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %} + -+ effect(DEF dst, USE src); ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ // src2 is imm, so actually call the addi ++ __ add(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); ++ %} + -+ ins_cost(XFER_COST); ++ ins_pipe(ialu_reg_imm); ++%} + -+ format %{ "fmv.x.d $dst, $src\t#@MoveD2L_reg_reg" %} ++// Long Addition ++instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (AddL src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "add $dst, $src1, $src2\t#@addL_reg_reg" %} + + ins_encode %{ -+ __ fmv_x_d(as_Register($dst$$reg), as_FloatRegister($src$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ add(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(fp_d2l); -+ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct MoveL2D_reg_reg(fRegD dst, iRegL src) %{ ++// No constant pool entries requiredLong Immediate Addition. ++instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ ++ match(Set dst (AddL src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "addi $dst, $src1, $src2\t#@addL_reg_imm" %} + -+ match(Set dst (MoveL2D src)); ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ // src2 is imm, so actually call the addi ++ __ add(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); ++ %} + -+ effect(DEF dst, USE src); ++ ins_pipe(ialu_reg_imm); ++%} + -+ ins_cost(XFER_COST); ++// Integer Subtraction ++instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (SubI src1 src2)); + -+ format %{ "fmv.d.x $dst, $src\t#@MoveD2L_reg_reg" %} ++ ins_cost(ALU_COST); ++ format %{ "subw $dst, $src1, $src2\t#@subI_reg_reg" %} + + ins_encode %{ -+ __ fmv_d_x(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ subw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(fp_l2d); ++ ins_pipe(ialu_reg_reg); +%} + -+// ============================================================================ -+// Compare Instructions which set the result float comparisons in dest register. -+ -+instruct cmpF3_reg_reg(iRegINoSp dst, fRegF op1, fRegF op2) -+%{ -+ match(Set dst (CmpF3 op1 op2)); ++// Immediate Subtraction ++instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{ ++ match(Set dst (SubI src1 src2)); + -+ ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST); -+ format %{ "flt.s $dst, $op2, $op1\t#@cmpF3_reg_reg\n\t" -+ "bgtz $dst, done\n\t" -+ "feq.s $dst, $op1, $op2\n\t" -+ "addi $dst, $dst, -1\t#@cmpF3_reg_reg" -+ %} ++ ins_cost(ALU_COST); ++ format %{ "addiw $dst, $src1, -$src2\t#@subI_reg_imm" %} + + ins_encode %{ -+ // we want -1 for unordered or less than, 0 for equal and 1 for greater than. -+ __ float_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), -+ as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/); ++ Assembler::CompressibleRegion cr(&_masm); ++ // src2 is imm, so actually call the addiw ++ __ subw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(ialu_reg_imm); +%} + -+instruct cmpD3_reg_reg(iRegINoSp dst, fRegD op1, fRegD op2) -+%{ -+ match(Set dst (CmpD3 op1 op2)); -+ -+ ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST); -+ format %{ "flt.d $dst, $op2, $op1\t#@cmpD3_reg_reg\n\t" -+ "bgtz $dst, done\n\t" -+ "feq.d $dst, $op1, $op2\n\t" -+ "addi $dst, $dst, -1\t#@cmpD3_reg_reg" -+ %} ++// Long Subtraction ++instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "sub $dst, $src1, $src2\t#@subL_reg_reg" %} + + ins_encode %{ -+ // we want -1 for unordered or less than, 0 for equal and 1 for greater than. -+ __ double_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sub(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2) -+%{ -+ match(Set dst (CmpL3 op1 op2)); ++// No constant pool entries requiredLong Immediate Subtraction. ++instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "addi $dst, $src1, -$src2\t#@subL_reg_imm" %} + -+ ins_cost(ALU_COST * 3 + BRANCH_COST); -+ format %{ "slt $dst, $op2, $op1\t#@cmpL3_reg_reg\n\t" -+ "bnez $dst, done\n\t" -+ "slt $dst, $op1, $op2\n\t" -+ "neg $dst, $dst\t#@cmpL3_reg_reg" -+ %} + ins_encode %{ -+ __ cmp_l2i(t0, as_Register($op1$$reg), as_Register($op2$$reg)); -+ __ mv(as_Register($dst$$reg), t0); ++ Assembler::CompressibleRegion cr(&_masm); ++ // src2 is imm, so actually call the addi ++ __ sub(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(ialu_reg_imm); +%} + -+instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q) -+%{ -+ match(Set dst (CmpLTMask p q)); -+ -+ ins_cost(2 * ALU_COST); ++// Integer Negation (special case for sub) + -+ format %{ "slt $dst, $p, $q\t#@cmpLTMask_reg_reg\n\t" -+ "subw $dst, zr, $dst\t#@cmpLTMask_reg_reg" -+ %} ++instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero) %{ ++ match(Set dst (SubI zero src)); ++ ins_cost(ALU_COST); ++ format %{ "subw $dst, x0, $src\t# int, #@negI_reg" %} + + ins_encode %{ -+ __ slt(as_Register($dst$$reg), as_Register($p$$reg), as_Register($q$$reg)); -+ __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg)); ++ // actually call the subw ++ __ negw(as_Register($dst$$reg), ++ as_Register($src$$reg)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(ialu_reg); +%} + -+instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I op, immI0 zero) -+%{ -+ match(Set dst (CmpLTMask op zero)); ++// Long Negation + ++instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero) %{ ++ match(Set dst (SubL zero src)); + ins_cost(ALU_COST); -+ -+ format %{ "sraiw $dst, $dst, 31\t#@cmpLTMask_reg_reg" %} ++ format %{ "sub $dst, x0, $src\t# long, #@negL_reg" %} + + ins_encode %{ -+ __ sraiw(as_Register($dst$$reg), as_Register($op$$reg), 31); ++ // actually call the sub ++ __ neg(as_Register($dst$$reg), ++ as_Register($src$$reg)); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(ialu_reg); +%} + ++// Integer Multiply + -+// ============================================================================ -+// Max and Min -+ -+instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2) -+%{ -+ match(Set dst (MinI src1 src2)); -+ -+ effect(DEF dst, USE src1, USE src2); -+ -+ ins_cost(BRANCH_COST + ALU_COST * 2); -+ format %{ -+ "ble $src1, $src2, Lsrc1.\t#@minI_rReg\n\t" -+ "mv $dst, $src2\n\t" -+ "j Ldone\n\t" -+ "bind Lsrc1\n\t" -+ "mv $dst, $src1\n\t" -+ "bind\t#@minI_rReg" -+ %} ++instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (MulI src1 src2)); ++ ins_cost(IMUL_COST); ++ format %{ "mulw $dst, $src1, $src2\t#@mulI" %} + ++ //this means 2 word multi, and no sign extend to 64 bits + ins_encode %{ -+ Label Lsrc1, Ldone; -+ __ ble(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1); -+ __ mv(as_Register($dst$$reg), as_Register($src2$$reg)); -+ __ j(Ldone); -+ __ bind(Lsrc1); -+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); -+ __ bind(Ldone); ++ // riscv64 mulw will sign-extension to high 32 bits in dst reg ++ __ mulw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(imul_reg_reg); +%} + -+instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2) -+%{ -+ match(Set dst (MaxI src1 src2)); -+ -+ effect(DEF dst, USE src1, USE src2); ++// Long Multiply + -+ ins_cost(BRANCH_COST + ALU_COST * 2); -+ format %{ -+ "bge $src1, $src2, Lsrc1\t#@maxI_rReg\n\t" -+ "mv $dst, $src2\n\t" -+ "j Ldone\n\t" -+ "bind Lsrc1\n\t" -+ "mv $dst, $src1\n\t" -+ "bind\t#@maxI_rReg" -+ %} ++instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (MulL src1 src2)); ++ ins_cost(IMUL_COST); ++ format %{ "mul $dst, $src1, $src2\t#@mulL" %} + + ins_encode %{ -+ Label Lsrc1, Ldone; -+ __ bge(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1); -+ __ mv(as_Register($dst$$reg), as_Register($src2$$reg)); -+ __ j(Ldone); -+ __ bind(Lsrc1); -+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); -+ __ bind(Ldone); -+ ++ __ mul(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(lmul_reg_reg); +%} + -+// ============================================================================ -+// Branch Instructions -+// Direct Branch. -+instruct branch(label lbl) ++instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2) +%{ -+ match(Goto); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); -+ format %{ "j $lbl\t#@branch" %} ++ match(Set dst (MulHiL src1 src2)); ++ ins_cost(IMUL_COST); ++ format %{ "mulh $dst, $src1, $src2\t# mulhi, #@mulHiL_rReg" %} + -+ ins_encode(riscv_enc_j(lbl)); ++ ins_encode %{ ++ __ mulh(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} + -+ ins_pipe(pipe_branch); ++ ins_pipe(lmul_reg_reg); +%} + -+// ============================================================================ -+// Compare and Branch Instructions ++// Integer Divide + -+// Patterns for short (< 12KiB) variants ++instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (DivI src1 src2)); ++ ins_cost(IDIVSI_COST); ++ format %{ "divw $dst, $src1, $src2\t#@divI"%} + -+// Compare flags and branch near instructions. -+instruct cmpFlag_branch(cmpOpEqNe cmp, rFlagsReg cr, label lbl) %{ -+ match(If cmp cr); -+ effect(USE lbl); ++ ins_encode(riscv_enc_divw(dst, src1, src2)); ++ ins_pipe(idiv_reg_reg); ++%} + -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $cr, zr, $lbl\t#@cmpFlag_branch" %} ++instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{ ++ match(Set dst (URShiftI (RShiftI src1 div1) div2)); ++ ins_cost(ALU_COST); ++ format %{ "srliw $dst, $src1, $div1\t# int signExtract, #@signExtract" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label)); ++ __ srliw(as_Register($dst$$reg), as_Register($src1$$reg), 31); + %} -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_shift); +%} + -+// Compare signed int and branch near instructions -+instruct cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) -+%{ -+ // Same match rule as `far_cmpI_branch'. -+ match(If cmp (CmpI op1 op2)); ++// Long Divide + -+ effect(USE lbl); ++instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (DivL src1 src2)); ++ ins_cost(IDIVDI_COST); ++ format %{ "div $dst, $src1, $src2\t#@divL" %} + -+ ins_cost(BRANCH_COST); ++ ins_encode(riscv_enc_div(dst, src1, src2)); ++ ins_pipe(ldiv_reg_reg); ++%} + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_branch" %} ++instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{ ++ match(Set dst (URShiftL (RShiftL src1 div1) div2)); ++ ins_cost(ALU_COST); ++ format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63); + %} -+ -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_shift); +%} + -+instruct cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) -+%{ -+ // Same match rule as `far_cmpI_loop'. -+ match(CountedLoopEnd cmp (CmpI op1 op2)); ++// Integer Remainder + -+ effect(USE lbl); ++instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (ModI src1 src2)); ++ ins_cost(IDIVSI_COST); ++ format %{ "remw $dst, $src1, $src2\t#@modI" %} + -+ ins_cost(BRANCH_COST); ++ ins_encode(riscv_enc_modw(dst, src1, src2)); ++ ins_pipe(ialu_reg_reg); ++%} + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_loop" %} ++// Long Remainder + -+ ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); -+ %} ++instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (ModL src1 src2)); ++ ins_cost(IDIVDI_COST); ++ format %{ "rem $dst, $src1, $src2\t#@modL" %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_encode(riscv_enc_mod(dst, src1, src2)); ++ ins_pipe(ialu_reg_reg); +%} + -+// Compare unsigned int and branch near instructions -+instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) -+%{ -+ // Same match rule as `far_cmpU_branch'. -+ match(If cmp (CmpU op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); ++// Integer Shifts + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_branch" %} ++// Shift Left Register ++// In RV64I, only the low 5 bits of src2 are considered for the shift amount ++instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (LShiftI src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "sllw $dst, $src1, $src2\t#@lShiftI_reg_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); ++ __ sllw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_reg_vshift); +%} + -+instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) -+%{ -+ // Same match rule as `far_cmpU_loop'. -+ match(CountedLoopEnd cmp (CmpU op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); -+ -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_loop" %} ++// Shift Left Immediate ++instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ ++ match(Set dst (LShiftI src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "slliw $dst, $src1, ($src2 & 0x1f)\t#@lShiftI_reg_imm" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); ++ // the shift amount is encoded in the lower ++ // 5 bits of the I-immediate field for RV32I ++ __ slliw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x1f); + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_shift); +%} + -+// Compare signed long and branch near instructions -+instruct cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) -+%{ -+ // Same match rule as `far_cmpL_branch'. -+ match(If cmp (CmpL op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); -+ -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_branch" %} ++// Shift Right Logical Register ++// In RV64I, only the low 5 bits of src2 are considered for the shift amount ++instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (URShiftI src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "srlw $dst, $src1, $src2\t#@urShiftI_reg_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); ++ __ srlw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_reg_vshift); +%} + -+instruct cmpL_loop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) -+%{ -+ // Same match rule as `far_cmpL_loop'. -+ match(CountedLoopEnd cmp (CmpL op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); -+ -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_loop" %} ++// Shift Right Logical Immediate ++instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ ++ match(Set dst (URShiftI src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "srliw $dst, $src1, ($src2 & 0x1f)\t#@urShiftI_reg_imm" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); ++ // the shift amount is encoded in the lower ++ // 6 bits of the I-immediate field for RV64I ++ __ srliw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x1f); + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_shift); +%} + -+// Compare unsigned long and branch near instructions -+instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) -+%{ -+ // Same match rule as `far_cmpUL_branch'. -+ match(If cmp (CmpUL op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_branch" %} ++// Shift Right Arithmetic Register ++// In RV64I, only the low 5 bits of src2 are considered for the shift amount ++instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (RShiftI src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "sraw $dst, $src1, $src2\t#@rShiftI_reg_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); ++ // riscv will sign-ext dst high 32 bits ++ __ sraw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_reg_vshift); +%} + -+instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) -+%{ -+ // Same match rule as `far_cmpUL_loop'. -+ match(CountedLoopEnd cmp (CmpUL op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_loop" %} ++// Shift Right Arithmetic Immediate ++instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ ++ match(Set dst (RShiftI src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "sraiw $dst, $src1, ($src2 & 0x1f)\t#@rShiftI_reg_imm" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); ++ // riscv will sign-ext dst high 32 bits ++ __ sraiw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x1f); + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_shift); +%} + -+// Compare pointer and branch near instructions -+instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) -+%{ -+ // Same match rule as `far_cmpP_branch'. -+ match(If cmp (CmpP op1 op2)); -+ -+ effect(USE lbl); ++// Long Shifts + -+ ins_cost(BRANCH_COST); ++// Shift Left Register ++// In RV64I, only the low 6 bits of src2 are considered for the shift amount ++instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ ++ match(Set dst (LShiftL src1 src2)); + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_branch" %} ++ ins_cost(ALU_COST); ++ format %{ "sll $dst, $src1, $src2\t#@lShiftL_reg_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); ++ __ sll(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_reg_vshift); +%} + -+instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) -+%{ -+ // Same match rule as `far_cmpP_loop'. -+ match(CountedLoopEnd cmp (CmpP op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); ++// Shift Left Immediate ++instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ ++ match(Set dst (LShiftL src1 src2)); + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_loop" %} ++ ins_cost(ALU_COST); ++ format %{ "slli $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); ++ Assembler::CompressibleRegion cr(&_masm); ++ // the shift amount is encoded in the lower ++ // 6 bits of the I-immediate field for RV64I ++ __ slli(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x3f); + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_shift); +%} + -+// Compare narrow pointer and branch near instructions -+instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) -+%{ -+ // Same match rule as `far_cmpN_branch'. -+ match(If cmp (CmpN op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); ++// Shift Right Logical Register ++// In RV64I, only the low 6 bits of src2 are considered for the shift amount ++instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ ++ match(Set dst (URShiftL src1 src2)); + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_branch" %} ++ ins_cost(ALU_COST); ++ format %{ "srl $dst, $src1, $src2\t#@urShiftL_reg_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); ++ __ srl(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_reg_vshift); +%} + -+instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) -+%{ -+ // Same match rule as `far_cmpN_loop'. -+ match(CountedLoopEnd cmp (CmpN op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); ++// Shift Right Logical Immediate ++instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ ++ match(Set dst (URShiftL src1 src2)); + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_loop" %} ++ ins_cost(ALU_COST); ++ format %{ "srli $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); ++ Assembler::CompressibleRegion cr(&_masm); ++ // the shift amount is encoded in the lower ++ // 6 bits of the I-immediate field for RV64I ++ __ srli(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x3f); + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_shift); +%} + -+// Compare float and branch near instructions -+instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) -+%{ -+ // Same match rule as `far_cmpF_branch'. -+ match(If cmp (CmpF op1 op2)); -+ -+ effect(USE lbl); ++// A special-case pattern for card table stores. ++instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{ ++ match(Set dst (URShiftL (CastP2X src1) src2)); + -+ ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "float_b$cmp $op1, $op2 $lbl \t#@cmpF_branch"%} ++ ins_cost(ALU_COST); ++ format %{ "srli $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); ++ Assembler::CompressibleRegion cr(&_masm); ++ // the shift amount is encoded in the lower ++ // 6 bits of the I-immediate field for RV64I ++ __ srli(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x3f); + %} + -+ ins_pipe(pipe_class_compare); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_shift); +%} + -+instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) -+%{ -+ // Same match rule as `far_cmpF_loop'. -+ match(CountedLoopEnd cmp (CmpF op1 op2)); -+ effect(USE lbl); ++// Shift Right Arithmetic Register ++// In RV64I, only the low 6 bits of src2 are considered for the shift amount ++instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ ++ match(Set dst (RShiftL src1 src2)); + -+ ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%} ++ ins_cost(ALU_COST); ++ format %{ "sra $dst, $src1, $src2\t#@rShiftL_reg_reg" %} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); ++ __ sra(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(pipe_class_compare); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_reg_vshift); +%} + -+// Compare double and branch near instructions -+instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) -+%{ -+ // Same match rule as `far_cmpD_branch'. -+ match(If cmp (CmpD op1 op2)); -+ effect(USE lbl); ++// Shift Right Arithmetic Immediate ++instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ ++ match(Set dst (RShiftL src1 src2)); + -+ ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%} ++ ins_cost(ALU_COST); ++ format %{ "srai $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -+ as_FloatRegister($op2$$reg), *($lbl$$label)); ++ Assembler::CompressibleRegion cr(&_masm); ++ // the shift amount is encoded in the lower ++ // 6 bits of the I-immediate field for RV64I ++ __ srai(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x3f); + %} + -+ ins_pipe(pipe_class_compare); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_shift); +%} + -+instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) -+%{ -+ // Same match rule as `far_cmpD_loop'. -+ match(CountedLoopEnd cmp (CmpD op1 op2)); -+ effect(USE lbl); -+ -+ ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%} ++instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{ ++ match(Set dst (XorI src1 m1)); ++ ins_cost(ALU_COST); ++ format %{ "xori $dst, $src1, -1\t#@regI_not_reg" %} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -+ as_FloatRegister($op2$$reg), *($lbl$$label)); ++ __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1); + %} + -+ ins_pipe(pipe_class_compare); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg); +%} + -+// Compare signed int with zero and branch near instructions -+instruct cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpI_reg_imm0_branch'. -+ match(If cmp (CmpI op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_branch" %} ++instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{ ++ match(Set dst (XorL src1 m1)); ++ ins_cost(ALU_COST); ++ format %{ "xori $dst, $src1, -1\t#@regL_not_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg); +%} + -+instruct cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpI_reg_imm0_loop'. -+ match(CountedLoopEnd cmp (CmpI op1 zero)); + -+ effect(USE op1, USE lbl); ++// ============================================================================ ++// Floating Point Arithmetic Instructions + -+ ins_cost(BRANCH_COST); ++instruct addF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (AddF src1 src2)); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_loop" %} ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fadd.s $dst, $src1, $src2\t#@addF_reg_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ __ fadd_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_dop_reg_reg_s); +%} + -+// Compare unsigned int with zero and branch near instructions -+instruct cmpUEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_branch'. -+ match(If cmp (CmpU op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST); ++instruct addD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (AddD src1 src2)); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_branch" %} ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fadd.d $dst, $src1, $src2\t#@addD_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ __ fadd_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_dop_reg_reg_d); +%} + -+instruct cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_loop'. -+ match(CountedLoopEnd cmp (CmpU op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST); -+ -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_loop" %} ++instruct subF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (SubF src1 src2)); + ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fsub.s $dst, $src1, $src2\t#@subF_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ __ fsub_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_dop_reg_reg_s); +%} + -+// Compare signed long with zero and branch near instructions -+instruct cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpL_reg_imm0_branch'. -+ match(If cmp (CmpL op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST); ++instruct subD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (SubD src1 src2)); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_branch" %} ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fsub.d $dst, $src1, $src2\t#@subD_reg_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ __ fsub_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_dop_reg_reg_d); +%} + -+instruct cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpL_reg_imm0_loop'. -+ match(CountedLoopEnd cmp (CmpL op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST); ++instruct mulF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (MulF src1 src2)); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_loop" %} ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fmul.s $dst, $src1, $src2\t#@mulF_reg_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ __ fmul_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_dop_reg_reg_s); +%} + -+// Compare unsigned long with zero and branch near instructions -+instruct cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_branch'. -+ match(If cmp (CmpUL op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST); ++instruct mulD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (MulD src1 src2)); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_branch" %} ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fmul.d $dst, $src1, $src2\t#@mulD_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ __ fmul_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_dop_reg_reg_d); +%} + -+instruct cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_loop'. -+ match(CountedLoopEnd cmp (CmpUL op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST); ++// src1 * src2 + src3 ++instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary src1 src2))); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_loop" %} ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fmadd.s $dst, $src1, $src2, $src3\t#@maddF_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ __ fmadd_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(pipe_class_default); +%} + -+// Compare pointer with zero and branch near instructions -+instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ -+ // Same match rule as `far_cmpP_reg_imm0_branch'. -+ match(If cmp (CmpP op1 zero)); -+ effect(USE lbl); ++// src1 * src2 + src3 ++instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary src1 src2))); + -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_branch" %} ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fmadd.d $dst, $src1, $src2, $src3\t#@maddD_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ __ fmadd_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(pipe_class_default); +%} + -+instruct cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ -+ // Same match rule as `far_cmpP_reg_imm0_loop'. -+ match(CountedLoopEnd cmp (CmpP op1 zero)); -+ effect(USE lbl); ++// src1 * src2 - src3 ++instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary src1 src2))); + -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_loop" %} ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fmsub.s $dst, $src1, $src2, $src3\t#@msubF_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ __ fmsub_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(pipe_class_default); +%} + -+// Compare narrow pointer with zero and branch near instructions -+instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ -+ // Same match rule as `far_cmpN_reg_imm0_branch'. -+ match(If cmp (CmpN op1 zero)); -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); ++// src1 * src2 - src3 ++instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary src1 src2))); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_branch" %} ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fmsub.d $dst, $src1, $src2, $src3\t#@msubD_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ __ fmsub_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(pipe_class_default); +%} + -+instruct cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ -+ // Same match rule as `far_cmpN_reg_imm0_loop'. -+ match(CountedLoopEnd cmp (CmpN op1 zero)); -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); ++// -src1 * src2 + src3 ++instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary (NegF src1) src2))); ++ match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_loop" %} ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fnmsub.s $dst, $src1, $src2, $src3\t#@nmsubF_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ __ fnmsub_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(pipe_class_default); +%} + -+// Compare narrow pointer with pointer zero and branch near instructions -+instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ -+ // Same match rule as `far_cmpP_narrowOop_imm0_branch'. -+ match(If cmp (CmpP (DecodeN op1) zero)); -+ effect(USE lbl); ++// -src1 * src2 + src3 ++instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary (NegD src1) src2))); ++ match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); + -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_branch" %} ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fnmsub.d $dst, $src1, $src2, $src3\t#@nmsubD_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); -+ %} -+ -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ __ fnmsub_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(pipe_class_default); +%} + -+instruct cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ -+ // Same match rule as `far_cmpP_narrowOop_imm0_loop'. -+ match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero)); -+ effect(USE lbl); ++// -src1 * src2 - src3 ++instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); ++ match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); + -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_loop" %} ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fnmadd.s $dst, $src1, $src2, $src3\t#@nmaddF_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ __ fnmadd_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(pipe_class_default); +%} + -+// Patterns for far (20KiB) variants -+ -+instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{ -+ match(If cmp cr); -+ effect(USE lbl); ++// -src1 * src2 - src3 ++instruct nmaddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); ++ match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); + -+ ins_cost(BRANCH_COST); -+ format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%} ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fnmadd.d $dst, $src1, $src2, $src3\t#@nmaddD_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true); ++ __ fnmadd_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_class_default); +%} + -+// Compare signed int and branch far instructions -+instruct far_cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{ -+ match(If cmp (CmpI op1 op2)); -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST * 2); ++// Math.max(FF)F ++instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (MaxF src1 src2)); ++ effect(TEMP_DEF dst); + -+ // the format instruction [far_b$cmp] here is be used as two insructions -+ // in macroassembler: b$not_cmp(op1, op2, done), j($lbl), bind(done) -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_branch" %} ++ format %{ "maxF $dst, $src1, $src2" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ minmax_FD(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), ++ false /* is_double */, false /* is_min */); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(fp_dop_reg_reg_s); +%} + -+instruct far_cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{ -+ match(CountedLoopEnd cmp (CmpI op1 op2)); -+ effect(USE lbl); ++// Math.min(FF)F ++instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (MinF src1 src2)); ++ effect(TEMP_DEF dst); + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_loop" %} ++ format %{ "minF $dst, $src1, $src2" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ minmax_FD(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), ++ false /* is_double */, true /* is_min */); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(fp_dop_reg_reg_s); +%} + -+instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ -+ match(If cmp (CmpU op1 op2)); -+ effect(USE lbl); ++// Math.max(DD)D ++instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (MaxD src1 src2)); ++ effect(TEMP_DEF dst); + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %} ++ format %{ "maxD $dst, $src1, $src2" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ minmax_FD(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), ++ true /* is_double */, false /* is_min */); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(fp_dop_reg_reg_d); +%} + -+instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ -+ match(CountedLoopEnd cmp (CmpU op1 op2)); -+ effect(USE lbl); ++// Math.min(DD)D ++instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (MinD src1 src2)); ++ effect(TEMP_DEF dst); + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %} ++ format %{ "minD $dst, $src1, $src2" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ minmax_FD(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), ++ true /* is_double */, true /* is_min */); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(fp_dop_reg_reg_d); +%} + -+instruct far_cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{ -+ match(If cmp (CmpL op1 op2)); -+ effect(USE lbl); ++instruct divF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (DivF src1 src2)); + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_branch" %} ++ ins_cost(FDIV_COST); ++ format %{ "fdiv.s $dst, $src1, $src2\t#@divF_reg_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ fdiv_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(fp_div_s); +%} + -+instruct far_cmpLloop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{ -+ match(CountedLoopEnd cmp (CmpL op1 op2)); -+ effect(USE lbl); ++instruct divD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (DivD src1 src2)); + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_loop" %} ++ ins_cost(FDIV_COST); ++ format %{ "fdiv.d $dst, $src1, $src2\t#@divD_reg_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ fdiv_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(fp_div_d); +%} + -+instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ -+ match(If cmp (CmpUL op1 op2)); -+ effect(USE lbl); ++instruct negF_reg_reg(fRegF dst, fRegF src) %{ ++ match(Set dst (NegF src)); + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_branch" %} ++ ins_cost(XFER_COST); ++ format %{ "fsgnjn.s $dst, $src, $src\t#@negF_reg_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ fneg_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(fp_uop_s); +%} + -+instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ -+ match(CountedLoopEnd cmp (CmpUL op1 op2)); -+ effect(USE lbl); ++instruct negD_reg_reg(fRegD dst, fRegD src) %{ ++ match(Set dst (NegD src)); + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_loop" %} ++ ins_cost(XFER_COST); ++ format %{ "fsgnjn.d $dst, $src, $src\t#@negD_reg_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ fneg_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(fp_uop_d); +%} + -+instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) -+%{ -+ match(If cmp (CmpP op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST * 2); ++instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{ ++ match(Set dst (AbsI src)); + -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_branch" %} ++ ins_cost(ALU_COST * 3); ++ format %{ ++ "sraiw t0, $src, 0x1f\n\t" ++ "addw $dst, $src, t0\n\t" ++ "xorr $dst, $dst, t0\t#@absI_reg" ++ %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ sraiw(t0, as_Register($src$$reg), 0x1f); ++ __ addw(as_Register($dst$$reg), as_Register($src$$reg), t0); ++ __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) -+%{ -+ match(CountedLoopEnd cmp (CmpP op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST * 2); ++instruct absL_reg(iRegLNoSp dst, iRegL src) %{ ++ match(Set dst (AbsL src)); + -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_loop" %} ++ ins_cost(ALU_COST * 3); ++ format %{ ++ "srai t0, $src, 0x3f\n\t" ++ "add $dst, $src, t0\n\t" ++ "xorr $dst, $dst, t0\t#@absL_reg" ++ %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ srai(t0, as_Register($src$$reg), 0x3f); ++ __ add(as_Register($dst$$reg), as_Register($src$$reg), t0); ++ __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) -+%{ -+ match(If cmp (CmpN op1 op2)); ++instruct absF_reg(fRegF dst, fRegF src) %{ ++ match(Set dst (AbsF src)); + -+ effect(USE lbl); ++ ins_cost(XFER_COST); ++ format %{ "fsgnjx.s $dst, $src, $src\t#@absF_reg" %} ++ ins_encode %{ ++ __ fabs_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); ++ %} + -+ ins_cost(BRANCH_COST * 2); ++ ins_pipe(fp_uop_s); ++%} + -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_branch" %} ++instruct absD_reg(fRegD dst, fRegD src) %{ ++ match(Set dst (AbsD src)); + ++ ins_cost(XFER_COST); ++ format %{ "fsgnjx.d $dst, $src, $src\t#@absD_reg" %} + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ fabs_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(fp_uop_d); +%} + -+instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) -+%{ -+ match(CountedLoopEnd cmp (CmpN op1 op2)); ++instruct sqrtF_reg(fRegF dst, fRegF src) %{ ++ match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + -+ effect(USE lbl); ++ ins_cost(FSQRT_COST); ++ format %{ "fsqrt.s $dst, $src\t#@sqrtF_reg" %} ++ ins_encode %{ ++ __ fsqrt_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); ++ %} + -+ ins_cost(BRANCH_COST * 2); ++ ins_pipe(fp_sqrt_s); ++%} + -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_loop" %} ++instruct sqrtD_reg(fRegD dst, fRegD src) %{ ++ match(Set dst (SqrtD src)); + ++ ins_cost(FSQRT_COST); ++ format %{ "fsqrt.d $dst, $src\t#@sqrtD_reg" %} + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ fsqrt_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(fp_sqrt_d); +%} + -+// Float compare and branch instructions -+instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) -+%{ -+ match(If cmp (CmpF op1 op2)); ++// Arithmetic Instructions End + -+ effect(USE lbl); ++// ============================================================================ ++// Logical Instructions + -+ ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%} ++// Register And ++instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ match(Set dst (AndI src1 src2)); + ++ format %{ "andr $dst, $src1, $src2\t#@andI_reg_reg" %} ++ ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -+ *($lbl$$label), /* is_far */ true); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ andr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(pipe_class_compare); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) -+%{ -+ match(CountedLoopEnd cmp (CmpF op1 op2)); -+ effect(USE lbl); ++// Immediate And ++instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ ++ match(Set dst (AndI src1 src2)); + -+ ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%} ++ format %{ "andi $dst, $src1, $src2\t#@andI_reg_imm" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -+ *($lbl$$label), /* is_far */ true); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ andi(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); + %} + -+ ins_pipe(pipe_class_compare); ++ ins_pipe(ialu_reg_imm); +%} + -+// Double compare and branch instructions -+instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) -+%{ -+ match(If cmp (CmpD op1 op2)); -+ effect(USE lbl); ++// Register Or ++instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ match(Set dst (OrI src1 src2)); + -+ ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%} ++ format %{ "orr $dst, $src1, $src2\t#@orI_reg_reg" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -+ as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ orr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(pipe_class_compare); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) -+%{ -+ match(CountedLoopEnd cmp (CmpD op1 op2)); -+ effect(USE lbl); ++// Immediate Or ++instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ ++ match(Set dst (OrI src1 src2)); + -+ ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%} ++ format %{ "ori $dst, $src1, $src2\t#@orI_reg_imm" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -+ as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ ori(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); + %} + -+ ins_pipe(pipe_class_compare); ++ ins_pipe(ialu_reg_imm); +%} + -+instruct far_cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl) -+%{ -+ match(If cmp (CmpI op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST * 2); ++// Register Xor ++instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ match(Set dst (XorI src1 src2)); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_branch" %} ++ format %{ "xorr $dst, $src1, $src2\t#@xorI_reg_reg" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ xorr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct far_cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl) -+%{ -+ match(CountedLoopEnd cmp (CmpI op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST * 2); ++// Immediate Xor ++instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ ++ match(Set dst (XorI src1 src2)); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_loop" %} ++ format %{ "xori $dst, $src1, $src2\t#@xorI_reg_imm" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); ++ __ xori(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(ialu_reg_imm); +%} + -+instruct far_cmpUEqNeLeGt_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) -+%{ -+ match(If cmp (CmpU op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST * 2); ++// Register And Long ++instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (AndL src1 src2)); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_imm0_branch" %} ++ format %{ "andr $dst, $src1, $src2\t#@andL_reg_reg" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ andr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct far_cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) -+%{ -+ match(CountedLoopEnd cmp (CmpU op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST * 2); -+ -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_reg_imm0_loop" %} ++// Immediate And Long ++instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ ++ match(Set dst (AndL src1 src2)); + ++ format %{ "andi $dst, $src1, $src2\t#@andL_reg_imm" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ andi(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(ialu_reg_imm); +%} + -+// compare lt/ge unsigned instructs has no short instruct with same match -+instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl) -+%{ -+ match(If cmp (CmpU op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST); ++// Register Or Long ++instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (OrL src1 src2)); + -+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %} ++ format %{ "orr $dst, $src1, $src2\t#@orL_reg_reg" %} + -+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ __ orr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl) -+%{ -+ match(CountedLoopEnd cmp (CmpU op1 zero)); ++// Immediate Or Long ++instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ ++ match(Set dst (OrL src1 src2)); + -+ effect(USE op1, USE lbl); ++ format %{ "ori $dst, $src1, $src2\t#@orL_reg_imm" %} + -+ ins_cost(BRANCH_COST); ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ ori(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); ++ %} + -+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %} ++ ins_pipe(ialu_reg_imm); ++%} + -+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ++// Register Xor Long ++instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (XorL src1 src2)); + -+ ins_pipe(pipe_cmpz_branch); -+%} ++ format %{ "xorr $dst, $src1, $src2\t#@xorL_reg_reg" %} + -+instruct far_cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl) -+%{ -+ match(If cmp (CmpL op1 zero)); ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ __ xorr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} + -+ effect(USE op1, USE lbl); ++ ins_pipe(ialu_reg_reg); ++%} + -+ ins_cost(BRANCH_COST * 2); ++// Immediate Xor Long ++instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ ++ match(Set dst (XorL src1 src2)); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_branch" %} ++ ins_cost(ALU_COST); ++ format %{ "xori $dst, $src1, $src2\t#@xorL_reg_imm" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); ++ __ xori(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(ialu_reg_imm); +%} + -+instruct far_cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl) -+%{ -+ match(CountedLoopEnd cmp (CmpL op1 zero)); -+ -+ effect(USE op1, USE lbl); ++// ============================================================================ ++// BSWAP Instructions + -+ ins_cost(BRANCH_COST * 2); ++instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr) %{ ++ match(Set dst (ReverseBytesI src)); ++ effect(TEMP cr); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_loop" %} ++ ins_cost(ALU_COST * 13); ++ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); ++ __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(ialu_reg); +%} + -+instruct far_cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) -+%{ -+ match(If cmp (CmpUL op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST * 2); ++instruct bytes_reverse_long(iRegLNoSp dst, iRegL src, rFlagsReg cr) %{ ++ match(Set dst (ReverseBytesL src)); ++ effect(TEMP cr); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_branch" %} ++ ins_cost(ALU_COST * 29); ++ format %{ "revb $dst, $src\t#@bytes_reverse_long" %} + + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ revb(as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(ialu_reg); +%} + -+instruct far_cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) -+%{ -+ match(CountedLoopEnd cmp (CmpUL op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST * 2); ++instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{ ++ match(Set dst (ReverseBytesUS src)); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_loop" %} ++ ins_cost(ALU_COST * 5); ++ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short" %} + + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(ialu_reg); +%} + -+// compare lt/ge unsigned instructs has no short instruct with same match -+instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl) -+%{ -+ match(If cmp (CmpUL op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST); ++instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{ ++ match(Set dst (ReverseBytesS src)); + -+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %} ++ ins_cost(ALU_COST * 5); ++ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short" %} + -+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ++ ins_encode %{ ++ __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(ialu_reg); +%} + -+instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl) -+%{ -+ match(CountedLoopEnd cmp (CmpUL op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST); -+ -+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %} -+ -+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); -+ -+ ins_pipe(pipe_cmpz_branch); -+%} ++// ============================================================================ ++// MemBar Instruction + -+instruct far_cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ -+ match(If cmp (CmpP op1 zero)); -+ effect(USE lbl); ++instruct load_fence() %{ ++ match(LoadFence); ++ ins_cost(ALU_COST); + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_branch" %} ++ format %{ "#@load_fence" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + %} -+ -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_serial); +%} + -+instruct far_cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ -+ match(CountedLoopEnd cmp (CmpP op1 zero)); -+ effect(USE lbl); ++instruct membar_acquire() %{ ++ match(MemBarAcquire); ++ ins_cost(ALU_COST); + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_loop" %} ++ format %{ "#@membar_acquire\n\t" ++ "fence ir iorw" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ block_comment("membar_acquire"); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_serial); +%} + -+instruct far_cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ -+ match(If cmp (CmpN op1 zero)); -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST * 2); ++instruct membar_acquire_lock() %{ ++ match(MemBarAcquireLock); ++ ins_cost(0); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_branch" %} ++ format %{ "#@membar_acquire_lock (elided)" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ block_comment("membar_acquire_lock (elided)"); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_serial); +%} + -+instruct far_cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ -+ match(CountedLoopEnd cmp (CmpN op1 zero)); -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST * 2); ++instruct store_fence() %{ ++ match(StoreFence); ++ ins_cost(ALU_COST); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_loop" %} ++ format %{ "#@store_fence" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + %} -+ -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_serial); +%} + -+instruct far_cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ -+ match(If cmp (CmpP (DecodeN op1) zero)); -+ effect(USE lbl); ++instruct membar_release() %{ ++ match(MemBarRelease); ++ ins_cost(ALU_COST); + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_branch" %} ++ format %{ "#@membar_release\n\t" ++ "fence iorw ow" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ block_comment("membar_release"); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + %} -+ -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_serial); +%} + -+instruct far_cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ -+ match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero)); -+ effect(USE lbl); ++instruct membar_storestore() %{ ++ match(MemBarStoreStore); ++ match(StoreStoreFence); ++ ins_cost(ALU_COST); + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_loop" %} ++ format %{ "MEMBAR-store-store\t#@membar_storestore" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ membar(MacroAssembler::StoreStore); + %} -+ -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_serial); +%} + -+// ============================================================================ -+// Conditional Move Instructions -+instruct cmovI_cmpI(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOp cop) %{ -+ match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src))); -+ ins_cost(ALU_COST + BRANCH_COST); ++instruct membar_release_lock() %{ ++ match(MemBarReleaseLock); ++ ins_cost(0); + -+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ format %{ "#@membar_release_lock (elided)" %} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode, -+ as_Register($op1$$reg), as_Register($op2$$reg), -+ as_Register($dst$$reg), as_Register($src$$reg)); ++ __ block_comment("membar_release_lock (elided)"); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{ -+ match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src))); -+ ins_cost(ALU_COST + BRANCH_COST); ++instruct membar_volatile() %{ ++ match(MemBarVolatile); ++ ins_cost(ALU_COST); + -+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ format %{ "#@membar_volatile\n\t" ++ "fence iorw iorw"%} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode, -+ as_Register($op1$$reg), as_Register($op2$$reg), -+ as_Register($dst$$reg), as_Register($src$$reg)); ++ __ block_comment("membar_volatile"); ++ __ membar(MacroAssembler::StoreLoad); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{ -+ match(Set dst (CMoveI (Binary cop (CmpU op1 op2)) (Binary dst src))); -+ ins_cost(ALU_COST + BRANCH_COST); -+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} -+ -+ ins_encode %{ -+ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, -+ as_Register($op1$$reg), as_Register($op2$$reg), -+ as_Register($dst$$reg), as_Register($src$$reg)); -+ %} ++// ============================================================================ ++// Cast Instructions (Java-level type cast) + -+ ins_pipe(pipe_slow); -+%} ++instruct castX2P(iRegPNoSp dst, iRegL src) %{ ++ match(Set dst (CastX2P src)); + -+instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{ -+ match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src))); -+ ins_cost(ALU_COST + BRANCH_COST); -+ format %{ "bneg$cop $op1 $op2, skip\t#@cmovI_cmpUL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $src\t# long -> ptr, #@castX2P" %} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, -+ as_Register($op1$$reg), as_Register($op2$$reg), -+ as_Register($dst$$reg), as_Register($src$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ if ($dst$$reg != $src$$reg) { ++ __ mv(as_Register($dst$$reg), as_Register($src$$reg)); ++ } + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg); +%} + -+instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{ -+ match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src))); -+ ins_cost(ALU_COST + BRANCH_COST); ++instruct castP2X(iRegLNoSp dst, iRegP src) %{ ++ match(Set dst (CastP2X src)); + -+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $src\t# ptr -> long, #@castP2X" %} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode, -+ as_Register($op1$$reg), as_Register($op2$$reg), -+ as_Register($dst$$reg), as_Register($src$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ if ($dst$$reg != $src$$reg) { ++ __ mv(as_Register($dst$$reg), as_Register($src$$reg)); ++ } + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg); +%} + -+instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{ -+ match(Set dst (CMoveL (Binary cop (CmpUL op1 op2)) (Binary dst src))); -+ ins_cost(ALU_COST + BRANCH_COST); ++instruct castPP(iRegPNoSp dst) ++%{ ++ match(Set dst (CastPP dst)); ++ ins_cost(0); + -+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ size(0); ++ format %{ "# castPP of $dst, #@castPP" %} ++ ins_encode(/* empty encoding */); ++ ins_pipe(pipe_class_empty); ++%} + -+ ins_encode %{ -+ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, -+ as_Register($op1$$reg), as_Register($op2$$reg), -+ as_Register($dst$$reg), as_Register($src$$reg)); -+ %} ++instruct castLL(iRegL dst) ++%{ ++ match(Set dst (CastLL dst)); + -+ ins_pipe(pipe_slow); ++ size(0); ++ format %{ "# castLL of $dst, #@castLL" %} ++ ins_encode(/* empty encoding */); ++ ins_cost(0); ++ ins_pipe(pipe_class_empty); +%} + ++instruct castII(iRegI dst) ++%{ ++ match(Set dst (CastII dst)); + -+// ============================================================================ -+// Procedure Call/Return Instructions -+ -+// Call Java Static Instruction ++ size(0); ++ format %{ "# castII of $dst, #@castII" %} ++ ins_encode(/* empty encoding */); ++ ins_cost(0); ++ ins_pipe(pipe_class_empty); ++%} + -+instruct CallStaticJavaDirect(method meth) ++instruct checkCastPP(iRegPNoSp dst) +%{ -+ match(CallStaticJava); ++ match(Set dst (CheckCastPP dst)); + -+ effect(USE meth); ++ size(0); ++ ins_cost(0); ++ format %{ "# checkcastPP of $dst, #@checkCastPP" %} ++ ins_encode(/* empty encoding */); ++ ins_pipe(pipe_class_empty); ++%} + -+ ins_cost(BRANCH_COST); ++instruct castFF(fRegF dst) ++%{ ++ match(Set dst (CastFF dst)); + -+ format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %} ++ size(0); ++ format %{ "# castFF of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_cost(0); ++ ins_pipe(pipe_class_empty); ++%} + -+ ins_encode( riscv_enc_java_static_call(meth), -+ riscv_enc_call_epilog ); ++instruct castDD(fRegD dst) ++%{ ++ match(Set dst (CastDD dst)); + -+ ins_pipe(pipe_class_call); ++ size(0); ++ format %{ "# castDD of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_cost(0); ++ ins_pipe(pipe_class_empty); +%} + -+// TO HERE -+ -+// Call Java Dynamic Instruction -+instruct CallDynamicJavaDirect(method meth, rFlagsReg cr) ++instruct castVV(vReg dst) +%{ -+ match(CallDynamicJava); ++ match(Set dst (CastVV dst)); + -+ effect(USE meth, KILL cr); ++ size(0); ++ format %{ "# castVV of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_cost(0); ++ ins_pipe(pipe_class_empty); ++%} + -+ ins_cost(BRANCH_COST + ALU_COST * 6); ++// ============================================================================ ++// Convert Instructions + -+ format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %} ++// int to bool ++instruct convI2Bool(iRegINoSp dst, iRegI src) ++%{ ++ match(Set dst (Conv2B src)); + -+ ins_encode( riscv_enc_java_dynamic_call(meth), -+ riscv_enc_call_epilog ); ++ ins_cost(ALU_COST); ++ format %{ "snez $dst, $src\t#@convI2Bool" %} + -+ ins_pipe(pipe_class_call); -+%} ++ ins_encode %{ ++ __ snez(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} + -+// Call Runtime Instruction ++ ins_pipe(ialu_reg); ++%} + -+instruct CallRuntimeDirect(method meth, rFlagsReg cr) ++// pointer to bool ++instruct convP2Bool(iRegINoSp dst, iRegP src) +%{ -+ match(CallRuntime); -+ -+ effect(USE meth, KILL cr); -+ -+ ins_cost(BRANCH_COST); ++ match(Set dst (Conv2B src)); + -+ format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %} ++ ins_cost(ALU_COST); ++ format %{ "snez $dst, $src\t#@convP2Bool" %} + -+ ins_encode( riscv_enc_java_to_runtime(meth) ); ++ ins_encode %{ ++ __ snez(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} + -+ ins_pipe(pipe_class_call); ++ ins_pipe(ialu_reg); +%} + -+// Call Runtime Instruction ++// int <-> long + -+instruct CallLeafDirect(method meth, rFlagsReg cr) ++instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src) +%{ -+ match(CallLeaf); ++ match(Set dst (ConvI2L src)); + -+ effect(USE meth, KILL cr); ++ ins_cost(ALU_COST); ++ format %{ "addw $dst, $src, zr\t#@convI2L_reg_reg" %} ++ ins_encode %{ ++ __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr); ++ %} ++ ins_pipe(ialu_reg); ++%} + -+ ins_cost(BRANCH_COST); ++instruct convL2I_reg(iRegINoSp dst, iRegL src) %{ ++ match(Set dst (ConvL2I src)); + -+ format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %} ++ ins_cost(ALU_COST); ++ format %{ "addw $dst, $src, zr\t#@convL2I_reg" %} + -+ ins_encode( riscv_enc_java_to_runtime(meth) ); ++ ins_encode %{ ++ __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr); ++ %} + -+ ins_pipe(pipe_class_call); ++ ins_pipe(ialu_reg); +%} + -+// Call Runtime Instruction -+ -+instruct CallLeafNoFPDirect(method meth, rFlagsReg cr) ++// int to unsigned long (Zero-extend) ++instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) +%{ -+ match(CallLeafNoFP); ++ match(Set dst (AndL (ConvI2L src) mask)); + -+ effect(USE meth, KILL cr); ++ ins_cost(ALU_COST * 2); ++ format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %} + -+ ins_cost(BRANCH_COST); ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32); ++ %} + -+ format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %} ++ ins_pipe(ialu_reg_shift); ++%} + -+ ins_encode( riscv_enc_java_to_runtime(meth) ); ++// float <-> double + -+ ins_pipe(pipe_class_call); -+%} ++instruct convF2D_reg(fRegD dst, fRegF src) %{ ++ match(Set dst (ConvF2D src)); + -+// ============================================================================ -+// Partial Subtype Check -+// -+// superklass array for an instance of the superklass. Set a hidden -+// internal cache on a hit (cache is checked with exposed code in -+// gen_subtype_check()). Return zero for a hit. The encoding -+// ALSO sets flags. ++ ins_cost(XFER_COST); ++ format %{ "fcvt.d.s $dst, $src\t#@convF2D_reg" %} + -+instruct partialSubtypeCheck(rFlagsReg cr, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result) -+%{ -+ match(Set result (PartialSubtypeCheck sub super)); -+ effect(KILL temp, KILL cr); ++ ins_encode %{ ++ __ fcvt_d_s(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); ++ %} + -+ ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); -+ format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %} ++ ins_pipe(fp_f2d); ++%} + -+ ins_encode(riscv_enc_partial_subtype_check(sub, super, temp, result)); ++instruct convD2F_reg(fRegF dst, fRegD src) %{ ++ match(Set dst (ConvD2F src)); + -+ opcode(0x1); // Force zero of result reg on hit ++ ins_cost(XFER_COST); ++ format %{ "fcvt.s.d $dst, $src\t#@convD2F_reg" %} + -+ ins_pipe(pipe_class_memory); ++ ins_encode %{ ++ __ fcvt_s_d(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); ++ %} ++ ++ ins_pipe(fp_d2f); +%} + -+instruct partialSubtypeCheckVsZero(iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result, -+ immP0 zero, rFlagsReg cr) -+%{ -+ match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); -+ effect(KILL temp, KILL result); ++// float <-> int + -+ ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); -+ format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %} ++instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{ ++ match(Set dst (ConvF2I src)); + -+ ins_encode(riscv_enc_partial_subtype_check(sub, super, temp, result)); ++ ins_cost(XFER_COST); ++ format %{ "fcvt.w.s $dst, $src\t#@convF2I_reg_reg" %} + -+ opcode(0x0); // Don't zero result reg on hit ++ ins_encode %{ ++ __ fcvt_w_s_safe($dst$$Register, $src$$FloatRegister); ++ %} + -+ ins_pipe(pipe_class_memory); ++ ins_pipe(fp_f2i); +%} + -+instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) -+%{ -+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++instruct convI2F_reg_reg(fRegF dst, iRegIorL2I src) %{ ++ match(Set dst (ConvI2F src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.s.w $dst, $src\t#@convI2F_reg_reg" %} + -+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} + ins_encode %{ -+ // Count is in 8-bit bytes; non-Compact chars are 16 bits. -+ __ string_compare($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, -+ StrIntrinsicNode::UU); ++ __ fcvt_s_w(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(fp_i2f); +%} + -+instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) -+%{ -+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++// float <-> long ++ ++instruct convF2L_reg_reg(iRegLNoSp dst, fRegF src) %{ ++ match(Set dst (ConvF2L src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.l.s $dst, $src\t#@convF2L_reg_reg" %} + -+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} + ins_encode %{ -+ __ string_compare($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, -+ StrIntrinsicNode::LL); ++ __ fcvt_l_s_safe($dst$$Register, $src$$FloatRegister); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(fp_f2l); +%} + -+instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) -+%{ -+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++instruct convL2F_reg_reg(fRegF dst, iRegL src) %{ ++ match(Set dst (ConvL2F src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.s.l $dst, $src\t#@convL2F_reg_reg" %} + -+ format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} + ins_encode %{ -+ __ string_compare($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, -+ StrIntrinsicNode::UL); ++ __ fcvt_s_l(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(fp_l2f); +%} + -+instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, -+ rFlagsReg cr) -+%{ -+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++// double <-> int ++ ++instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{ ++ match(Set dst (ConvD2I src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.w.d $dst, $src\t#@convD2I_reg_reg" %} + -+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} + ins_encode %{ -+ __ string_compare($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, -+ StrIntrinsicNode::LU); ++ __ fcvt_w_d_safe($dst$$Register, $src$$FloatRegister); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(fp_d2i); +%} + -+instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, -+ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp) -+%{ -+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %} ++instruct convI2D_reg_reg(fRegD dst, iRegIorL2I src) %{ ++ match(Set dst (ConvI2D src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.d.w $dst, $src\t#@convI2D_reg_reg" %} + + ins_encode %{ -+ __ string_indexof($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, -+ $tmp5$$Register, $tmp6$$Register, -+ $result$$Register, StrIntrinsicNode::UU); ++ __ fcvt_d_w(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(fp_i2d); +%} + -+instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, -+ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp) -+%{ -+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %} ++// double <-> long ++ ++instruct convD2L_reg_reg(iRegLNoSp dst, fRegD src) %{ ++ match(Set dst (ConvD2L src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.l.d $dst, $src\t#@convD2L_reg_reg" %} + + ins_encode %{ -+ __ string_indexof($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, -+ $tmp5$$Register, $tmp6$$Register, -+ $result$$Register, StrIntrinsicNode::LL); ++ __ fcvt_l_d_safe($dst$$Register, $src$$FloatRegister); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(fp_d2l); +%} + -+instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, -+ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp) -+%{ -+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); -+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %} ++instruct convL2D_reg_reg(fRegD dst, iRegL src) %{ ++ match(Set dst (ConvL2D src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.d.l $dst, $src\t#@convL2D_reg_reg" %} + + ins_encode %{ -+ __ string_indexof($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, -+ $tmp5$$Register, $tmp6$$Register, -+ $result$$Register, StrIntrinsicNode::UL); ++ __ fcvt_d_l(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(fp_l2d); +%} + -+instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, -+ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) -+%{ -+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %} ++// Convert oop into int for vectors alignment masking ++instruct convP2I(iRegINoSp dst, iRegP src) %{ ++ match(Set dst (ConvL2I (CastP2X src))); ++ ++ ins_cost(ALU_COST * 2); ++ format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %} + + ins_encode %{ -+ int icnt2 = (int)$int_cnt2$$constant; -+ __ string_indexof_linearscan($str1$$Register, $str2$$Register, -+ $cnt1$$Register, zr, -+ $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, -+ icnt2, $result$$Register, StrIntrinsicNode::UU); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ zero_extend($dst$$Register, $src$$Register, 32); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, -+ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) ++// Convert compressed oop into int for vectors alignment masking ++// in case of 32bit oops (heap < 4Gb). ++instruct convN2I(iRegINoSp dst, iRegN src) +%{ -+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %} ++ predicate(CompressedOops::shift() == 0); ++ match(Set dst (ConvL2I (CastP2X (DecodeN src)))); ++ ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $src\t# compressed ptr -> int, #@convN2I" %} + + ins_encode %{ -+ int icnt2 = (int)$int_cnt2$$constant; -+ __ string_indexof_linearscan($str1$$Register, $str2$$Register, -+ $cnt1$$Register, zr, -+ $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, -+ icnt2, $result$$Register, StrIntrinsicNode::LL); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ mv($dst$$Register, $src$$Register); + %} -+ ins_pipe(pipe_class_memory); -+%} + -+instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, -+ immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) -+%{ -+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); -+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %} ++ ins_pipe(ialu_reg); ++%} + ++// Convert oop pointer into compressed form ++instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{ ++ match(Set dst (EncodeP src)); ++ ins_cost(ALU_COST); ++ format %{ "encode_heap_oop $dst, $src\t#@encodeHeapOop" %} + ins_encode %{ -+ int icnt2 = (int)$int_cnt2$$constant; -+ __ string_indexof_linearscan($str1$$Register, $str2$$Register, -+ $cnt1$$Register, zr, -+ $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, -+ icnt2, $result$$Register, StrIntrinsicNode::UL); ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ __ encode_heap_oop(d, s); + %} -+ ins_pipe(pipe_class_memory); ++ ins_pipe(ialu_reg); +%} + -+instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) -+%{ -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U); -+ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); -+ -+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} ++instruct decodeHeapOop(iRegPNoSp dst, iRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && ++ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); ++ match(Set dst (DecodeN src)); + ++ ins_cost(0); ++ format %{ "decode_heap_oop $dst, $src\t#@decodeHeapOop" %} + ins_encode %{ -+ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, -+ $result$$Register, $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, false /* isU */) ; ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ __ decode_heap_oop(d, s); + %} -+ ins_pipe(pipe_class_memory); ++ ins_pipe(ialu_reg); +%} + ++instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || ++ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); ++ match(Set dst (DecodeN src)); + -+instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) -+%{ -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); -+ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); ++ ins_cost(0); ++ format %{ "decode_heap_oop_not_null $dst, $src\t#@decodeHeapOop_not_null" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ __ decode_heap_oop_not_null(d, s); ++ %} ++ ins_pipe(ialu_reg); ++%} + -+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} ++// Convert klass pointer into compressed form. ++instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{ ++ match(Set dst (EncodePKlass src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "encode_klass_not_null $dst, $src\t#@encodeKlass_not_null" %} + + ins_encode %{ -+ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, -+ $result$$Register, $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, true /* isL */); ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ __ encode_klass_not_null(dst_reg, src_reg, t0); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(ialu_reg); +%} + -+// clearing of an array -+instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) -+%{ -+ predicate(!UseRVV); -+ match(Set dummy (ClearArray cnt base)); -+ effect(USE_KILL cnt, USE_KILL base, KILL cr); ++instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src, iRegPNoSp tmp) %{ ++ match(Set dst (DecodeNKlass src)); + -+ ins_cost(4 * DEFAULT_COST); -+ format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} ++ effect(TEMP tmp); ++ ++ ins_cost(ALU_COST); ++ format %{ "decode_klass_not_null $dst, $src\t#@decodeKlass_not_null" %} + + ins_encode %{ -+ address tpc = __ zero_words($base$$Register, $cnt$$Register); -+ if (tpc == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ Register tmp_reg = as_Register($tmp$$reg); ++ __ decode_klass_not_null(dst_reg, src_reg, tmp_reg); + %} + -+ ins_pipe(pipe_class_memory); ++ ins_pipe(ialu_reg); +%} + -+instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) -+%{ -+ predicate(!UseRVV && (uint64_t)n->in(2)->get_long() -+ < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); -+ match(Set dummy (ClearArray cnt base)); -+ effect(USE_KILL base, KILL cr); ++// stack <-> reg and reg <-> reg shuffles with no conversion + -+ ins_cost(4 * DEFAULT_COST); -+ format %{ "ClearArray $cnt, $base\t#@clearArray_imm_reg" %} ++instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{ ++ ++ match(Set dst (MoveF2I src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(LOAD_COST); ++ ++ format %{ "lw $dst, $src\t#@MoveF2I_stack_reg" %} + + ins_encode %{ -+ __ zero_words($base$$Register, (uint64_t)$cnt$$constant); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ lw(as_Register($dst$$reg), Address(sp, $src$$disp)); + %} + -+ ins_pipe(pipe_class_memory); ++ ins_pipe(iload_reg_reg); ++ +%} + -+instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, -+ iRegI_R10 result, rFlagsReg cr) -+%{ -+ predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result (StrEquals (Binary str1 str2) cnt)); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); ++instruct MoveI2F_stack_reg(fRegF dst, stackSlotI src) %{ ++ ++ match(Set dst (MoveI2F src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(LOAD_COST); ++ ++ format %{ "flw $dst, $src\t#@MoveI2F_stack_reg" %} + -+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} + ins_encode %{ -+ // Count is in 8-bit bytes; non-Compact chars are 16 bits. -+ __ string_equals($str1$$Register, $str2$$Register, -+ $result$$Register, $cnt$$Register, 1); ++ __ flw(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); + %} ++ + ins_pipe(pipe_class_memory); ++ +%} + -+instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, -+ iRegI_R10 result, rFlagsReg cr) -+%{ -+ predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result (StrEquals (Binary str1 str2) cnt)); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); ++instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{ ++ ++ match(Set dst (MoveD2L src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(LOAD_COST); ++ ++ format %{ "ld $dst, $src\t#@MoveD2L_stack_reg" %} + -+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} + ins_encode %{ -+ // Count is in 8-bit bytes; non-Compact chars are 16 bits. -+ __ string_equals($str1$$Register, $str2$$Register, -+ $result$$Register, $cnt$$Register, 2); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ld(as_Register($dst$$reg), Address(sp, $src$$disp)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(iload_reg_reg); ++ +%} + -+instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, -+ iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, -+ iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr) -+%{ -+ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result (AryEq ary1 ary2)); -+ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); ++instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{ ++ ++ match(Set dst (MoveL2D src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(LOAD_COST); ++ ++ format %{ "fld $dst, $src\t#@MoveL2D_stack_reg" %} + -+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} + ins_encode %{ -+ address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, -+ $result$$Register, $tmp$$Register, 1); -+ if (tpc == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } ++ Assembler::CompressibleRegion cr(&_masm); ++ __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); + %} ++ + ins_pipe(pipe_class_memory); ++ +%} + -+instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, -+ iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, -+ iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr) -+%{ -+ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result (AryEq ary1 ary2)); -+ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); ++instruct MoveF2I_reg_stack(stackSlotI dst, fRegF src) %{ ++ ++ match(Set dst (MoveF2I src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(STORE_COST); ++ ++ format %{ "fsw $src, $dst\t#@MoveF2I_reg_stack" %} + -+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} + ins_encode %{ -+ address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, -+ $result$$Register, $tmp$$Register, 2); -+ if (tpc == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } ++ __ fsw(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); + %} ++ + ins_pipe(pipe_class_memory); ++ +%} + -+// ============================================================================ -+// Safepoint Instructions ++instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{ + -+instruct safePoint(iRegP poll) -+%{ -+ match(SafePoint poll); ++ match(Set dst (MoveI2F src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(STORE_COST); ++ ++ format %{ "sw $src, $dst\t#@MoveI2F_reg_stack" %} + -+ ins_cost(2 * LOAD_COST); -+ format %{ -+ "lwu zr, [$poll]\t# Safepoint: poll for GC, #@safePoint" -+ %} + ins_encode %{ -+ __ read_polling_page(as_Register($poll$$reg), 0, relocInfo::poll_type); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sw(as_Register($src$$reg), Address(sp, $dst$$disp)); + %} -+ ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem); ++ ++ ins_pipe(istore_reg_reg); ++ +%} + -+// ============================================================================ -+// This name is KNOWN by the ADLC and cannot be changed. -+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type -+// for this guy. -+instruct tlsLoadP(javaThread_RegP dst) -+%{ -+ match(Set dst (ThreadLocal)); ++instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{ + -+ ins_cost(0); ++ match(Set dst (MoveD2L src)); + -+ format %{ " -- \t// $dst=Thread::current(), empty, #@tlsLoadP" %} ++ effect(DEF dst, USE src); + -+ size(0); ++ ins_cost(STORE_COST); + -+ ins_encode( /*empty*/ ); ++ format %{ "fsd $dst, $src\t#@MoveD2L_reg_stack" %} ++ ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); ++ %} ++ ++ ins_pipe(pipe_class_memory); + -+ ins_pipe(pipe_class_empty); +%} + -+// inlined locking and unlocking -+// using t1 as the 'flag' register to bridge the BoolNode producers and consumers -+instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) -+%{ -+ match(Set cr (FastLock object box)); -+ effect(TEMP tmp, TEMP tmp2); ++instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{ + -+ ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3); -+ format %{ "fastlock $object,$box\t! kills $tmp,$tmp2, #@cmpFastLock" %} ++ match(Set dst (MoveL2D src)); + -+ ins_encode(riscv_enc_fast_lock(object, box, tmp, tmp2)); ++ effect(DEF dst, USE src); + -+ ins_pipe(pipe_serial); -+%} ++ ins_cost(STORE_COST); + -+// using t1 as the 'flag' register to bridge the BoolNode producers and consumers -+instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) -+%{ -+ match(Set cr (FastUnlock object box)); -+ effect(TEMP tmp, TEMP tmp2); ++ format %{ "sd $src, $dst\t#@MoveL2D_reg_stack" %} + -+ ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4); -+ format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2, #@cmpFastUnlock" %} ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sd(as_Register($src$$reg), Address(sp, $dst$$disp)); ++ %} + -+ ins_encode(riscv_enc_fast_unlock(object, box, tmp, tmp2)); ++ ins_pipe(istore_reg_reg); + -+ ins_pipe(pipe_serial); +%} + -+// Tail Call; Jump from runtime stub to Java code. -+// Also known as an 'interprocedural jump'. -+// Target of jump will eventually return to caller. -+// TailJump below removes the return address. -+instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop) -+%{ -+ match(TailCall jump_target method_oop); ++instruct MoveF2I_reg_reg(iRegINoSp dst, fRegF src) %{ + -+ ins_cost(BRANCH_COST); ++ match(Set dst (MoveF2I src)); + -+ format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %} ++ effect(DEF dst, USE src); + -+ ins_encode(riscv_enc_tail_call(jump_target)); ++ ins_cost(XFER_COST); ++ ++ format %{ "fmv.x.w $dst, $src\t#@MoveL2D_reg_stack" %} ++ ++ ins_encode %{ ++ __ fmv_x_w(as_Register($dst$$reg), as_FloatRegister($src$$reg)); ++ %} ++ ++ ins_pipe(fp_f2i); + -+ ins_pipe(pipe_class_call); +%} + -+instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop) -+%{ -+ match(TailJump jump_target ex_oop); ++instruct MoveI2F_reg_reg(fRegF dst, iRegI src) %{ + -+ ins_cost(ALU_COST + BRANCH_COST); ++ match(Set dst (MoveI2F src)); + -+ format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %} ++ effect(DEF dst, USE src); + -+ ins_encode(riscv_enc_tail_jmp(jump_target)); ++ ins_cost(XFER_COST); ++ ++ format %{ "fmv.w.x $dst, $src\t#@MoveI2F_reg_reg" %} ++ ++ ins_encode %{ ++ __ fmv_w_x(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(fp_i2f); + -+ ins_pipe(pipe_class_call); +%} + -+// Create exception oop: created by stack-crawling runtime code. -+// Created exception is now available to this handler, and is setup -+// just prior to jumping to this handler. No code emitted. -+instruct CreateException(iRegP_R10 ex_oop) -+%{ -+ match(Set ex_oop (CreateEx)); ++instruct MoveD2L_reg_reg(iRegLNoSp dst, fRegD src) %{ + -+ ins_cost(0); -+ format %{ " -- \t// exception oop; no code emitted, #@CreateException" %} ++ match(Set dst (MoveD2L src)); + -+ size(0); ++ effect(DEF dst, USE src); + -+ ins_encode( /*empty*/ ); ++ ins_cost(XFER_COST); ++ ++ format %{ "fmv.x.d $dst, $src\t#@MoveD2L_reg_reg" %} ++ ++ ins_encode %{ ++ __ fmv_x_d(as_Register($dst$$reg), as_FloatRegister($src$$reg)); ++ %} ++ ++ ins_pipe(fp_d2l); + -+ ins_pipe(pipe_class_empty); +%} + -+// Rethrow exception: The exception oop will come in the first -+// argument position. Then JUMP (not call) to the rethrow stub code. -+instruct RethrowException() -+%{ -+ match(Rethrow); ++instruct MoveL2D_reg_reg(fRegD dst, iRegL src) %{ + -+ ins_cost(BRANCH_COST); ++ match(Set dst (MoveL2D src)); + -+ format %{ "j rethrow_stub\t#@RethrowException" %} ++ effect(DEF dst, USE src); + -+ ins_encode( riscv_enc_rethrow() ); ++ ins_cost(XFER_COST); + -+ ins_pipe(pipe_class_call); ++ format %{ "fmv.d.x $dst, $src\t#@MoveD2L_reg_reg" %} ++ ++ ins_encode %{ ++ __ fmv_d_x(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(fp_l2d); +%} + -+// Return Instruction -+// epilog node loads ret address into ra as part of frame pop -+instruct Ret() ++// ============================================================================ ++// Compare Instructions which set the result float comparisons in dest register. ++ ++instruct cmpF3_reg_reg(iRegINoSp dst, fRegF op1, fRegF op2) +%{ -+ match(Return); ++ match(Set dst (CmpF3 op1 op2)); + -+ ins_cost(BRANCH_COST); -+ format %{ "ret\t// return register, #@Ret" %} ++ ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST); ++ format %{ "flt.s $dst, $op2, $op1\t#@cmpF3_reg_reg\n\t" ++ "bgtz $dst, done\n\t" ++ "feq.s $dst, $op1, $op2\n\t" ++ "addi $dst, $dst, -1\t#@cmpF3_reg_reg" ++ %} + -+ ins_encode(riscv_enc_ret()); ++ ins_encode %{ ++ // we want -1 for unordered or less than, 0 for equal and 1 for greater than. ++ __ float_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), ++ as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/); ++ %} + -+ ins_pipe(pipe_branch); ++ ins_pipe(pipe_class_default); +%} + -+// Die now. -+instruct ShouldNotReachHere() %{ -+ match(Halt); -+ -+ ins_cost(BRANCH_COST); ++instruct cmpD3_reg_reg(iRegINoSp dst, fRegD op1, fRegD op2) ++%{ ++ match(Set dst (CmpD3 op1 op2)); + -+ format %{ "#@ShouldNotReachHere" %} ++ ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST); ++ format %{ "flt.d $dst, $op2, $op1\t#@cmpD3_reg_reg\n\t" ++ "bgtz $dst, done\n\t" ++ "feq.d $dst, $op1, $op2\n\t" ++ "addi $dst, $dst, -1\t#@cmpD3_reg_reg" ++ %} + + ins_encode %{ -+ if (is_reachable()) { -+ __ halt(); -+ } ++ // we want -1 for unordered or less than, 0 for equal and 1 for greater than. ++ __ double_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/); + %} + + ins_pipe(pipe_class_default); +%} + ++instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2) ++%{ ++ match(Set dst (CmpL3 op1 op2)); + -+//----------PEEPHOLE RULES----------------------------------------------------- -+// These must follow all instruction definitions as they use the names -+// defined in the instructions definitions. -+// -+// peepmatch ( root_instr_name [preceding_instruction]* ); -+// -+// peepconstraint %{ -+// (instruction_number.operand_name relational_op instruction_number.operand_name -+// [, ...] ); -+// // instruction numbers are zero-based using left to right order in peepmatch -+// -+// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); -+// // provide an instruction_number.operand_name for each operand that appears -+// // in the replacement instruction's match rule -+// -+// ---------VM FLAGS--------------------------------------------------------- -+// -+// All peephole optimizations can be turned off using -XX:-OptoPeephole -+// -+// Each peephole rule is given an identifying number starting with zero and -+// increasing by one in the order seen by the parser. An individual peephole -+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# -+// on the command-line. -+// -+// ---------CURRENT LIMITATIONS---------------------------------------------- -+// -+// Only match adjacent instructions in same basic block -+// Only equality constraints -+// Only constraints between operands, not (0.dest_reg == RAX_enc) -+// Only one replacement instruction -+// -+//----------SMARTSPILL RULES--------------------------------------------------- -+// These must follow all instruction definitions as they use the names -+// defined in the instructions definitions. -+ -+// Local Variables: -+// mode: c++ -+// End: -diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad -new file mode 100644 -index 000000000..6f7055a39 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/riscv_b.ad -@@ -0,0 +1,605 @@ -+// -+// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. -+// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. -+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+// -+// This code is free software; you can redistribute it and/or modify it -+// under the terms of the GNU General Public License version 2 only, as -+// published by the Free Software Foundation. -+// -+// This code is distributed in the hope that it will be useful, but WITHOUT -+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+// version 2 for more details (a copy is included in the LICENSE file that -+// accompanied this code). -+// -+// You should have received a copy of the GNU General Public License version -+// 2 along with this work; if not, write to the Free Software Foundation, -+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+// -+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+// or visit www.oracle.com if you need additional information or have any -+// questions. -+// -+// -+ -+// RISCV Bit-Manipulation Extension Architecture Description File -+ -+instruct rorI_imm_b(iRegINoSp dst, iRegI src, immI rshift, immI lshift) %{ -+ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); -+ predicate(UseZbb && ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) == 32)); -+ effect(DEF dst, USE src); -+ -+ format %{ "roriw $dst, $src, ($rshift & 0x1f)\t#@rorI_imm_b" %} -+ -+ ins_cost(ALU_COST); ++ ins_cost(ALU_COST * 3 + BRANCH_COST); ++ format %{ "slt $dst, $op2, $op1\t#@cmpL3_reg_reg\n\t" ++ "bnez $dst, done\n\t" ++ "slt $dst, $op1, $op2\n\t" ++ "neg $dst, $dst\t#@cmpL3_reg_reg" ++ %} + ins_encode %{ -+ __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $rshift$$constant & 0x1f); ++ __ cmp_l2i(t0, as_Register($op1$$reg), as_Register($op2$$reg)); ++ __ mv(as_Register($dst$$reg), t0); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(pipe_class_default); +%} + -+instruct rorL_imm_b(iRegLNoSp dst, iRegL src, immI rshift, immI lshift) %{ -+ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); -+ predicate(UseZbb && ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) == 64)); -+ effect(DEF dst, USE src); ++instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q) ++%{ ++ match(Set dst (CmpLTMask p q)); + -+ format %{ "rori $dst, $src, ($rshift & 0x3f)\t#@rorL_imm_b" %} ++ ins_cost(2 * ALU_COST); + -+ ins_cost(ALU_COST); -+ ins_encode %{ -+ __ rori(as_Register($dst$$reg), as_Register($src$$reg), $rshift$$constant & 0x3f); ++ format %{ "slt $dst, $p, $q\t#@cmpLTMask_reg_reg\n\t" ++ "subw $dst, zr, $dst\t#@cmpLTMask_reg_reg" + %} + -+ ins_pipe(ialu_reg_shift); -+%} -+ -+// ror expander -+instruct rorI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{ -+ effect(DEF dst, USE src, USE shift); -+ -+ format %{ "rorw $dst, $src, $shift\t#@rorI_reg_b" %} -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); ++ __ slt(as_Register($dst$$reg), as_Register($p$$reg), as_Register($q$$reg)); ++ __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + -+// ror expander -+instruct rorL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{ -+ effect(DEF dst, USE src, USE shift); ++instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I op, immI0 zero) ++%{ ++ match(Set dst (CmpLTMask op zero)); + -+ format %{ "ror $dst, $src, $shift\t#@rorL_reg_b" %} + ins_cost(ALU_COST); ++ ++ format %{ "sraiw $dst, $dst, 31\t#@cmpLTMask_reg_reg" %} ++ + ins_encode %{ -+ __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); ++ __ sraiw(as_Register($dst$$reg), as_Register($op$$reg), 31); + %} -+ ins_pipe(ialu_reg_reg); ++ ++ ins_pipe(ialu_reg_shift); +%} + + -+instruct rorI_rReg_Var_C_32_b(iRegINoSp dst, iRegI src, iRegI shift, immI_32 imm32) %{ -+ predicate(UseZbb); -+ match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI imm32 shift)))); ++// ============================================================================ ++// Max and Min + -+ expand %{ -+ rorI_reg_b(dst, src, shift); -+ %} -+%} ++instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2) ++%{ ++ match(Set dst (MinI src1 src2)); + -+instruct rorI_rReg_Var_C0_b(iRegINoSp dst, iRegI src, iRegI shift, immI0 zero) %{ -+ predicate(UseZbb); -+ match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI zero shift)))); ++ effect(DEF dst, USE src1, USE src2); + -+ expand %{ -+ rorI_reg_b(dst, src, shift); ++ ins_cost(BRANCH_COST + ALU_COST * 2); ++ format %{ ++ "ble $src1, $src2, Lsrc1.\t#@minI_rReg\n\t" ++ "mv $dst, $src2\n\t" ++ "j Ldone\n\t" ++ "bind Lsrc1\n\t" ++ "mv $dst, $src1\n\t" ++ "bind\t#@minI_rReg" + %} -+%} -+ -+instruct rorL_rReg_Var_C_64_b(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 imm64) %{ -+ predicate(UseZbb); -+ match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI imm64 shift)))); + -+ expand %{ -+ rorL_reg_b(dst, src, shift); ++ ins_encode %{ ++ Label Lsrc1, Ldone; ++ __ ble(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1); ++ __ mv(as_Register($dst$$reg), as_Register($src2$$reg)); ++ __ j(Ldone); ++ __ bind(Lsrc1); ++ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); ++ __ bind(Ldone); + %} ++ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct rorL_rReg_Var_C0_b(iRegLNoSp dst, iRegL src, iRegI shift, immI0 zero) %{ -+ predicate(UseZbb); -+ match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI zero shift)))); ++instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2) ++%{ ++ match(Set dst (MaxI src1 src2)); + -+ expand %{ -+ rorL_reg_b(dst, src, shift); -+ %} -+%} ++ effect(DEF dst, USE src1, USE src2); + -+// rol expander -+instruct rolI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{ -+ effect(DEF dst, USE src, USE shift); ++ ins_cost(BRANCH_COST + ALU_COST * 2); ++ format %{ ++ "bge $src1, $src2, Lsrc1\t#@maxI_rReg\n\t" ++ "mv $dst, $src2\n\t" ++ "j Ldone\n\t" ++ "bind Lsrc1\n\t" ++ "mv $dst, $src1\n\t" ++ "bind\t#@maxI_rReg" ++ %} + -+ format %{ "rolw $dst, $src, $shift\t#@rolI_reg_b" %} -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); ++ Label Lsrc1, Ldone; ++ __ bge(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1); ++ __ mv(as_Register($dst$$reg), as_Register($src2$$reg)); ++ __ j(Ldone); ++ __ bind(Lsrc1); ++ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); ++ __ bind(Ldone); ++ + %} + + ins_pipe(ialu_reg_reg); +%} + -+// rol expander -+instruct rolL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{ -+ effect(DEF dst, USE src, USE shift); ++// ============================================================================ ++// Branch Instructions ++// Direct Branch. ++instruct branch(label lbl) ++%{ ++ match(Goto); + -+ format %{ "rol $dst, $src, $shift\t#@rolL_reg_b" %} -+ ins_cost(ALU_COST); -+ ins_encode %{ -+ __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); -+ %} -+ -+ ins_pipe(ialu_reg_reg); -+%} ++ effect(USE lbl); + -+instruct rolI_rReg_Var_C_32_b(iRegINoSp dst, iRegI src, iRegI shift, immI_32 imm32) %{ -+ predicate(UseZbb); -+ match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI imm32 shift)))); ++ ins_cost(BRANCH_COST); ++ format %{ "j $lbl\t#@branch" %} + -+ expand %{ -+ rolI_reg_b(dst, src, shift); -+ %} ++ ins_encode(riscv_enc_j(lbl)); ++ ++ ins_pipe(pipe_branch); +%} + -+instruct rolI_rReg_Var_C0_b(iRegINoSp dst, iRegI src, iRegI shift, immI0 zero) %{ -+ predicate(UseZbb); -+ match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI zero shift)))); ++// ============================================================================ ++// Compare and Branch Instructions ++ ++// Patterns for short (< 12KiB) variants + -+ expand %{ -+ rolI_reg_b(dst, src, shift); -+ %} -+%} ++// Compare flags and branch near instructions. ++instruct cmpFlag_branch(cmpOpEqNe cmp, rFlagsReg cr, label lbl) %{ ++ match(If cmp cr); ++ effect(USE lbl); + -+instruct rolL_rReg_Var_C_64_b(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 imm64) %{ -+ predicate(UseZbb); -+ match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI imm64 shift)))); ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $cr, zr, $lbl\t#@cmpFlag_branch" %} + -+ expand %{ -+ rolL_reg_b(dst, src, shift); ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label)); + %} ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct rolL_rReg_Var_C0_b(iRegLNoSp dst, iRegL src, iRegI shift, immI0 zero) %{ -+ predicate(UseZbb); -+ match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI zero shift)))); ++// Compare signed int and branch near instructions ++instruct cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) ++%{ ++ // Same match rule as `far_cmpI_branch'. ++ match(If cmp (CmpI op1 op2)); + -+ expand %{ -+ rolL_reg_b(dst, src, shift); -+ %} -+%} ++ effect(USE lbl); + -+// Convert oop into int for vectors alignment masking -+instruct convP2I_b(iRegINoSp dst, iRegP src) %{ -+ predicate(UseZba); -+ match(Set dst (ConvL2I (CastP2X src))); ++ ins_cost(BRANCH_COST); + -+ format %{ "zext.w $dst, $src\t# ptr -> int @convP2I_b" %} ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_branch" %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ zext_w(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+// byte to int -+instruct convB2I_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{ -+ predicate(UseZbb); -+ match(Set dst (RShiftI (LShiftI src lshift) rshift)); ++instruct cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) ++%{ ++ // Same match rule as `far_cmpI_loop'. ++ match(CountedLoopEnd cmp (CmpI op1 op2)); ++ ++ effect(USE lbl); + -+ format %{ "sext.b $dst, $src\t# b2i, #@convB2I_reg_reg_b" %} ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_loop" %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ sext_b(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+// int to short -+instruct convI2S_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{ -+ predicate(UseZbb); -+ match(Set dst (RShiftI (LShiftI src lshift) rshift)); ++// Compare unsigned int and branch near instructions ++instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) ++%{ ++ // Same match rule as `far_cmpU_branch'. ++ match(If cmp (CmpU op1 op2)); + -+ format %{ "sext.h $dst, $src\t# i2s, #@convI2S_reg_reg_b" %} ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_branch" %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ sext_h(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+// short to unsigned int -+instruct convS2UI_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{ -+ predicate(UseZbb); -+ match(Set dst (AndI src mask)); ++instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) ++%{ ++ // Same match rule as `far_cmpU_loop'. ++ match(CountedLoopEnd cmp (CmpU op1 op2)); + -+ format %{ "zext.h $dst, $src\t# s2ui, #@convS2UI_reg_reg_b" %} ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_loop" %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ zext_h(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+// int to unsigned long (zero extend) -+instruct convI2UL_reg_reg_b(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{ -+ predicate(UseZba); -+ match(Set dst (AndL (ConvI2L src) mask)); ++// Compare signed long and branch near instructions ++instruct cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) ++%{ ++ // Same match rule as `far_cmpL_branch'. ++ match(If cmp (CmpL op1 op2)); + -+ format %{ "zext.w $dst, $src\t# i2ul, #@convI2UL_reg_reg_b" %} ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_branch" %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ zext_w(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+// BSWAP instructions -+instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseZbb); -+ match(Set dst (ReverseBytesI src)); ++instruct cmpL_loop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) ++%{ ++ // Same match rule as `far_cmpL_loop'. ++ match(CountedLoopEnd cmp (CmpL op1 op2)); + -+ ins_cost(ALU_COST * 2); -+ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_b" %} ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_loop" %} + + ins_encode %{ -+ __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct bytes_reverse_long_b(iRegLNoSp dst, iRegL src) %{ -+ predicate(UseZbb); -+ match(Set dst (ReverseBytesL src)); ++// Compare unsigned long and branch near instructions ++instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) ++%{ ++ // Same match rule as `far_cmpUL_branch'. ++ match(If cmp (CmpUL op1 op2)); + -+ ins_cost(ALU_COST); -+ format %{ "rev8 $dst, $src\t#@bytes_reverse_long_b" %} ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_branch" %} + + ins_encode %{ -+ __ rev8(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseZbb); -+ match(Set dst (ReverseBytesUS src)); ++instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) ++%{ ++ // Same match rule as `far_cmpUL_loop'. ++ match(CountedLoopEnd cmp (CmpUL op1 op2)); + -+ ins_cost(ALU_COST * 2); -+ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_b" %} ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_loop" %} + + ins_encode %{ -+ __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseZbb); -+ match(Set dst (ReverseBytesS src)); ++// Compare pointer and branch near instructions ++instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) ++%{ ++ // Same match rule as `far_cmpP_branch'. ++ match(If cmp (CmpP op1 op2)); + -+ ins_cost(ALU_COST * 2); -+ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_b" %} ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_branch" %} + + ins_encode %{ -+ __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+// Shift Add Pointer -+instruct shaddP_reg_reg_b(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{ -+ predicate(UseZba); -+ match(Set dst (AddP src1 (LShiftL src2 imm))); ++instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) ++%{ ++ // Same match rule as `far_cmpP_loop'. ++ match(CountedLoopEnd cmp (CmpP op1 op2)); + -+ ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_b" %} ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_loop" %} + + ins_encode %{ -+ __ shadd(as_Register($dst$$reg), -+ as_Register($src2$$reg), -+ as_Register($src1$$reg), -+ t0, -+ $imm$$constant); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct shaddP_reg_reg_ext_b(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{ -+ predicate(UseZba); -+ match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm))); ++// Compare narrow pointer and branch near instructions ++instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) ++%{ ++ // Same match rule as `far_cmpN_branch'. ++ match(If cmp (CmpN op1 op2)); + -+ ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_b" %} ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_branch" %} + + ins_encode %{ -+ __ shadd(as_Register($dst$$reg), -+ as_Register($src2$$reg), -+ as_Register($src1$$reg), -+ t0, -+ $imm$$constant); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+// Shift Add Long -+instruct shaddL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{ -+ predicate(UseZba); -+ match(Set dst (AddL src1 (LShiftL src2 imm))); ++instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) ++%{ ++ // Same match rule as `far_cmpN_loop'. ++ match(CountedLoopEnd cmp (CmpN op1 op2)); + -+ ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_b" %} ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_loop" %} + + ins_encode %{ -+ __ shadd(as_Register($dst$$reg), -+ as_Register($src2$$reg), -+ as_Register($src1$$reg), -+ t0, -+ $imm$$constant); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct shaddL_reg_reg_ext_b(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{ -+ predicate(UseZba); -+ match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm))); ++// Compare float and branch near instructions ++instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) ++%{ ++ // Same match rule as `far_cmpF_branch'. ++ match(If cmp (CmpF op1 op2)); + -+ ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_b" %} ++ effect(USE lbl); ++ ++ ins_cost(XFER_COST + BRANCH_COST); ++ format %{ "float_b$cmp $op1, $op2 \t#@cmpF_branch"%} + + ins_encode %{ -+ __ shadd(as_Register($dst$$reg), -+ as_Register($src2$$reg), -+ as_Register($src1$$reg), -+ t0, -+ $imm$$constant); ++ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_class_compare); ++ ins_short_branch(1); +%} + -+// Zeros Count instructions -+instruct countLeadingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseZbb); -+ match(Set dst (CountLeadingZerosI src)); ++instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) ++%{ ++ // Same match rule as `far_cmpF_loop'. ++ match(CountedLoopEnd cmp (CmpF op1 op2)); ++ effect(USE lbl); + -+ ins_cost(ALU_COST); -+ format %{ "clzw $dst, $src\t#@countLeadingZerosI_b" %} ++ ins_cost(XFER_COST + BRANCH_COST); ++ format %{ "float_b$cmp $op1, $op2\t#@cmpF_loop"%} + + ins_encode %{ -+ __ clzw(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_class_compare); ++ ins_short_branch(1); +%} + -+instruct countLeadingZerosL_b(iRegINoSp dst, iRegL src) %{ -+ predicate(UseZbb); -+ match(Set dst (CountLeadingZerosL src)); ++// Compare double and branch near instructions ++instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) ++%{ ++ // Same match rule as `far_cmpD_branch'. ++ match(If cmp (CmpD op1 op2)); ++ effect(USE lbl); + -+ ins_cost(ALU_COST); -+ format %{ "clz $dst, $src\t#@countLeadingZerosL_b" %} ++ ins_cost(XFER_COST + BRANCH_COST); ++ format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%} + + ins_encode %{ -+ __ clz(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_class_compare); ++ ins_short_branch(1); +%} + -+instruct countTrailingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseZbb); -+ match(Set dst (CountTrailingZerosI src)); ++instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) ++%{ ++ // Same match rule as `far_cmpD_loop'. ++ match(CountedLoopEnd cmp (CmpD op1 op2)); ++ effect(USE lbl); + -+ ins_cost(ALU_COST); -+ format %{ "ctzw $dst, $src\t#@countTrailingZerosI_b" %} ++ ins_cost(XFER_COST + BRANCH_COST); ++ format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%} + + ins_encode %{ -+ __ ctzw(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_class_compare); ++ ins_short_branch(1); +%} + -+instruct countTrailingZerosL_b(iRegINoSp dst, iRegL src) %{ -+ predicate(UseZbb); -+ match(Set dst (CountTrailingZerosL src)); ++// Compare signed int with zero and branch near instructions ++instruct cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpI_reg_imm0_branch'. ++ match(If cmp (CmpI op1 zero)); + -+ ins_cost(ALU_COST); -+ format %{ "ctz $dst, $src\t#@countTrailingZerosL_b" %} ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_branch" %} + + ins_encode %{ -+ __ ctz(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+// Population Count instructions -+instruct popCountI_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UsePopCountInstruction); -+ match(Set dst (PopCountI src)); ++instruct cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpI_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpI op1 zero)); + -+ ins_cost(ALU_COST); -+ format %{ "cpopw $dst, $src\t#@popCountI_b" %} ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_loop" %} + + ins_encode %{ -+ __ cpopw(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+// Note: Long/bitCount(long) returns an int. -+instruct popCountL_b(iRegINoSp dst, iRegL src) %{ -+ predicate(UsePopCountInstruction); -+ match(Set dst (PopCountL src)); ++// Compare unsigned int with zero and branch near instructions ++instruct cmpUEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_branch'. ++ match(If cmp (CmpU op1 zero)); + -+ ins_cost(ALU_COST); -+ format %{ "cpop $dst, $src\t#@popCountL_b" %} ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_branch" %} + + ins_encode %{ -+ __ cpop(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+// Max and Min -+instruct minI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{ -+ predicate(UseZbb); -+ match(Set dst (MinI src1 src2)); ++instruct cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpU op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_loop" %} + -+ ins_cost(ALU_COST); -+ format %{ "min $dst, $src1, $src2\t#@minI_reg_b" %} + + ins_encode %{ -+ __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct maxI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{ -+ predicate(UseZbb); -+ match(Set dst (MaxI src1 src2)); ++// Compare signed long with zero and branch near instructions ++instruct cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpL_reg_imm0_branch'. ++ match(If cmp (CmpL op1 zero)); + -+ ins_cost(ALU_COST); -+ format %{ "max $dst, $src1, $src2\t#@maxI_reg_b" %} ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_branch" %} + + ins_encode %{ -+ __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+// Abs -+instruct absI_reg_b(iRegINoSp dst, iRegI src) %{ -+ predicate(UseZbb); -+ match(Set dst (AbsI src)); ++instruct cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpL_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpL op1 zero)); + -+ ins_cost(ALU_COST * 2); -+ format %{ -+ "negw t0, $src\n\t" -+ "max $dst, $src, t0\t#@absI_reg_b" -+ %} ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_loop" %} + + ins_encode %{ -+ __ negw(t0, as_Register($src$$reg)); -+ __ max(as_Register($dst$$reg), as_Register($src$$reg), t0); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct absL_reg_b(iRegLNoSp dst, iRegL src) %{ -+ predicate(UseZbb); -+ match(Set dst (AbsL src)); ++// Compare unsigned long with zero and branch near instructions ++instruct cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_branch'. ++ match(If cmp (CmpUL op1 zero)); + -+ ins_cost(ALU_COST * 2); -+ format %{ -+ "neg t0, $src\n\t" -+ "max $dst, $src, t0\t#@absL_reg_b" -+ %} ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_branch" %} + + ins_encode %{ -+ __ neg(t0, as_Register($src$$reg)); -+ __ max(as_Register($dst$$reg), as_Register($src$$reg), t0); ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+// And Not -+instruct andnI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ -+ predicate(UseZbb); -+ match(Set dst (AndI src1 (XorI src2 m1))); ++instruct cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpUL op1 zero)); + -+ ins_cost(ALU_COST); -+ format %{ "andn $dst, $src1, $src2\t#@andnI_reg_reg_b" %} ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_loop" %} + + ins_encode %{ -+ __ andn(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct andnL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ -+ predicate(UseZbb); -+ match(Set dst (AndL src1 (XorL src2 m1))); ++// Compare pointer with zero and branch near instructions ++instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ ++ // Same match rule as `far_cmpP_reg_imm0_branch'. ++ match(If cmp (CmpP op1 zero)); ++ effect(USE lbl); + -+ ins_cost(ALU_COST); -+ format %{ "andn $dst, $src1, $src2\t#@andnL_reg_reg_b" %} ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_branch" %} + + ins_encode %{ -+ __ andn(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+// Or Not -+instruct ornI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ -+ predicate(UseZbb); -+ match(Set dst (OrI src1 (XorI src2 m1))); ++instruct cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ ++ // Same match rule as `far_cmpP_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpP op1 zero)); ++ effect(USE lbl); + -+ ins_cost(ALU_COST); -+ format %{ "orn $dst, $src1, $src2\t#@ornI_reg_reg_b" %} ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_loop" %} + + ins_encode %{ -+ __ orn(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct ornL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ -+ predicate(UseZbb); -+ match(Set dst (OrL src1 (XorL src2 m1))); ++// Compare narrow pointer with zero and branch near instructions ++instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ ++ // Same match rule as `far_cmpN_reg_imm0_branch'. ++ match(If cmp (CmpN op1 zero)); ++ effect(USE lbl); + -+ ins_cost(ALU_COST); -+ format %{ "orn $dst, $src1, $src2\t#@ornL_reg_reg_b" %} ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_branch" %} + + ins_encode %{ -+ __ orn(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} -diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad -new file mode 100644 -index 000000000..905041890 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/riscv_v.ad -@@ -0,0 +1,1723 @@ -+// -+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -+// Copyright (c) 2020, Arm Limited. All rights reserved. -+// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+// -+// This code is free software; you can redistribute it and/or modify it -+// under the terms of the GNU General Public License version 2 only, as -+// published by the Free Software Foundation. -+// -+// This code is distributed in the hope that it will be useful, but WITHOUT -+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+// version 2 for more details (a copy is included in the LICENSE file that -+// accompanied this code). -+// -+// You should have received a copy of the GNU General Public License version -+// 2 along with this work; if not, write to the Free Software Foundation, -+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+// -+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+// or visit www.oracle.com if you need additional information or have any -+// questions. -+// -+// + -+// RISCV VEC Architecture Description File ++instruct cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ ++ // Same match rule as `far_cmpN_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpN op1 zero)); ++ effect(USE lbl); + -+opclass vmemA(indirect); ++ ins_cost(BRANCH_COST); + -+source_hpp %{ -+ bool op_vec_supported(int opcode); -+%} ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_loop" %} + -+source %{ ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ %} + -+ static inline BasicType vector_element_basic_type(const MachNode* n) { -+ const TypeVect* vt = n->bottom_type()->is_vect(); -+ return vt->element_basic_type(); -+ } ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} + -+ static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) { -+ int def_idx = use->operand_index(opnd); -+ Node* def = use->in(def_idx); -+ const TypeVect* vt = def->bottom_type()->is_vect(); -+ return vt->element_basic_type(); -+ } ++// Compare narrow pointer with pointer zero and branch near instructions ++instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ ++ // Same match rule as `far_cmpP_narrowOop_imm0_branch'. ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ effect(USE lbl); + -+ static void loadStore(MacroAssembler masm, bool is_store, -+ VectorRegister reg, BasicType bt, Register base) { -+ Assembler::SEW sew = Assembler::elemtype_to_sew(bt); -+ masm.vsetvli(t0, x0, sew); -+ if (is_store) { -+ masm.vsex_v(reg, base, sew); -+ } else { -+ masm.vlex_v(reg, base, sew); -+ } -+ } ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_branch" %} + -+ bool op_vec_supported(int opcode) { -+ switch (opcode) { -+ // No multiply reduction instructions -+ case Op_MulReductionVD: -+ case Op_MulReductionVF: -+ case Op_MulReductionVI: -+ case Op_MulReductionVL: -+ // Others -+ case Op_Extract: -+ case Op_ExtractB: -+ case Op_ExtractC: -+ case Op_ExtractD: -+ case Op_ExtractF: -+ case Op_ExtractI: -+ case Op_ExtractL: -+ case Op_ExtractS: -+ case Op_ExtractUB: -+ return false; -+ default: -+ return UseRVV; -+ } -+ } ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ %} + ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+definitions %{ -+ int_def VEC_COST (200, 200); -+%} ++instruct cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ ++ // Same match rule as `far_cmpP_narrowOop_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero)); ++ effect(USE lbl); + -+// All VEC instructions ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_loop" %} + -+// vector load/store -+instruct loadV(vReg dst, vmemA mem) %{ -+ match(Set dst (LoadVector mem)); -+ ins_cost(VEC_COST); -+ format %{ "vle $dst, $mem\t#@loadV" %} + ins_encode %{ -+ VectorRegister dst_reg = as_VectorRegister($dst$$reg); -+ loadStore(MacroAssembler(&cbuf), false, dst_reg, -+ vector_element_basic_type(this), as_Register($mem$$base)); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct storeV(vReg src, vmemA mem) %{ -+ match(Set mem (StoreVector mem src)); -+ ins_cost(VEC_COST); -+ format %{ "vse $src, $mem\t#@storeV" %} ++// Patterns for far (20KiB) variants ++ ++instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{ ++ match(If cmp cr); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "far_b$cmp $cr, zr, L\t#@far_cmpFlag_branch"%} ++ + ins_encode %{ -+ VectorRegister src_reg = as_VectorRegister($src$$reg); -+ loadStore(MacroAssembler(&cbuf), true, src_reg, -+ vector_element_basic_type(this, $src), as_Register($mem$$base)); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + -+// vector abs ++// Compare signed int and branch far instructions ++instruct far_cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{ ++ match(If cmp (CmpI op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ // the format instruction [far_b$cmp] here is be used as two insructions ++ // in macroassembler: b$not_cmp(op1, op2, done), j($lbl), bind(done) ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_branch" %} + -+instruct vabsB(vReg dst, vReg src, vReg tmp) %{ -+ match(Set dst (AbsVB src)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t" -+ "vmax.vv $dst, $tmp, $src" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct vabsS(vReg dst, vReg src, vReg tmp) %{ -+ match(Set dst (AbsVS src)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t" -+ "vmax.vv $dst, $tmp, $src" %} ++instruct far_cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpI op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_loop" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct vabsI(vReg dst, vReg src, vReg tmp) %{ -+ match(Set dst (AbsVI src)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t" -+ "vmax.vv $dst, $tmp, $src" %} ++instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ ++ match(If cmp (CmpU op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct vabsL(vReg dst, vReg src, vReg tmp) %{ -+ match(Set dst (AbsVL src)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t" -+ "vmax.vv $dst, $tmp, $src" %} ++instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpU op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct vabsF(vReg dst, vReg src) %{ -+ match(Set dst (AbsVF src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %} ++instruct far_cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{ ++ match(If cmp (CmpL op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_branch" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct vabsD(vReg dst, vReg src) %{ -+ match(Set dst (AbsVD src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %} ++instruct far_cmpLloop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpL op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_loop" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+// vector add ++instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ ++ match(If cmp (CmpUL op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_branch" %} + -+instruct vaddB(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVB src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct vaddS(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVS src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %} ++instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpUL op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_loop" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct vaddI(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVI src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %} ++instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) ++%{ ++ match(If cmp (CmpP op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_branch" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct vaddL(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVL src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %} ++instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpP op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_loop" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct vaddF(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVF src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %} ++instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) ++%{ ++ match(If cmp (CmpN op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_branch" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct vaddD(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVD src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %} ++instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpN op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_loop" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+// vector and ++// Float compare and branch instructions ++instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) ++%{ ++ match(If cmp (CmpF op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(XFER_COST + BRANCH_COST * 2); ++ format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_branch"%} + -+instruct vand(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AndV src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vand.vv $dst, $src1, $src2\t#@vand" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vand_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), ++ *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_class_compare); +%} + -+// vector or ++instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpF op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(XFER_COST + BRANCH_COST * 2); ++ format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_loop"%} + -+instruct vor(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (OrV src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vor.vv $dst, $src1, $src2\t#@vor" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vor_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), ++ *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_class_compare); +%} + -+// vector xor ++// Double compare and branch instructions ++instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) ++%{ ++ match(If cmp (CmpD op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(XFER_COST + BRANCH_COST * 2); ++ format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%} + -+instruct vxor(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (XorV src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vxor.vv $dst, $src1, $src2\t#@vxor" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vxor_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_class_compare); +%} + -+// vector float div ++instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpD op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(XFER_COST + BRANCH_COST * 2); ++ format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%} + -+instruct vdivF(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (DivVF src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivF" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfdiv_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_class_compare); +%} + -+instruct vdivD(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (DivVD src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivD" %} ++instruct far_cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(If cmp (CmpI op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_branch" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfdiv_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + -+// vector fmla ++instruct far_cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpI op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_loop" %} + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %} ++instruct far_cmpUEqNeLeGt_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(If cmp (CmpU op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_imm0_branch" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + -+// vector fmls ++instruct far_cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpU op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_reg_imm0_loop" %} ++ + -+// dst_src1 = dst_src1 + -src2 * src3 -+// dst_src1 = dst_src1 + src2 * -src3 -+instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); -+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); -+ ins_cost(VEC_COST); -+ format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + -+// dst_src1 = dst_src1 + -src2 * src3 -+// dst_src1 = dst_src1 + src2 * -src3 -+instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); -+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); -+ ins_cost(VEC_COST); -+ format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %} ++// compare lt/ge unsigned instructs has no short instruct with same match ++instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(If cmp (CmpU op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %} ++ ++ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpU op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %} ++ ++ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(If cmp (CmpL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_branch" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + -+// vector fnmla ++instruct far_cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_loop" %} + -+// dst_src1 = -dst_src1 + -src2 * src3 -+// dst_src1 = -dst_src1 + src2 * -src3 -+instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); -+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); -+ ins_cost(VEC_COST); -+ format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + -+// dst_src1 = -dst_src1 + -src2 * src3 -+// dst_src1 = -dst_src1 + src2 * -src3 -+instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); -+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); -+ ins_cost(VEC_COST); -+ format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %} ++instruct far_cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(If cmp (CmpUL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_branch" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + -+// vector fnmls ++instruct far_cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpUL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_loop" %} + -+// dst_src1 = -dst_src1 + src2 * src3 -+instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + -+// dst_src1 = -dst_src1 + src2 * src3 -+instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %} ++// compare lt/ge unsigned instructs has no short instruct with same match ++instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(If cmp (CmpUL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %} ++ ++ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpUL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %} ++ ++ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_branch" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + -+// vector mla ++instruct far_cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpP op1 zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_loop" %} + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %} ++instruct far_cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ ++ match(If cmp (CmpN op1 zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_branch" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %} ++instruct far_cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpN op1 zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_loop" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %} ++instruct far_cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_branch" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + -+// vector mls ++instruct far_cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_loop" %} + -+// dst_src1 = dst_src1 - src2 * src3 -+instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + -+// dst_src1 = dst_src1 - src2 * src3 -+instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %} ++// ============================================================================ ++// Conditional Move Instructions ++instruct cmovI_cmpI(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOp cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); ++ ++ format %{ ++ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ enc_cmove($cop$$cmpcode, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); + %} ++ + ins_pipe(pipe_slow); +%} + -+// dst_src1 = dst_src1 - src2 * src3 -+instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %} ++instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpU op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); ++ ++ format %{ ++ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); + %} ++ + ins_pipe(pipe_slow); +%} + -+// dst_src1 = dst_src1 - src2 * src3 -+instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %} ++instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); ++ ++ format %{ ++ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ enc_cmove($cop$$cmpcode, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); + %} ++ + ins_pipe(pipe_slow); +%} + -+// vector mul ++instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); ++ ++ format %{ ++ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} + -+instruct vmulB(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVB src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ enc_cmove($cop$$cmpcode, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); + %} ++ + ins_pipe(pipe_slow); +%} + -+instruct vmulS(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVS src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %} ++instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); ++ ++ format %{ ++ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); + %} ++ + ins_pipe(pipe_slow); +%} + -+instruct vmulI(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVI src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %} ++instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); ++ format %{ ++ "bneg$cop $op1, $op2\t#@cmovI_cmpUL\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); + %} ++ + ins_pipe(pipe_slow); +%} + -+instruct vmulL(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVL src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++ ++// ============================================================================ ++// Procedure Call/Return Instructions ++ ++// Call Java Static Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallStaticJavaDirect(method meth) ++%{ ++ match(CallStaticJava); ++ ++ effect(USE meth); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %} ++ ++ ins_encode(riscv_enc_java_static_call(meth), ++ riscv_enc_call_epilog); ++ ++ ins_pipe(pipe_class_call); ++ ins_alignment(4); +%} + -+instruct vmulF(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVF src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++// TO HERE ++ ++// Call Java Dynamic Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallDynamicJavaDirect(method meth, rFlagsReg cr) ++%{ ++ match(CallDynamicJava); ++ ++ effect(USE meth, KILL cr); ++ ++ ins_cost(BRANCH_COST + ALU_COST * 6); ++ ++ format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %} ++ ++ ins_encode(riscv_enc_java_dynamic_call(meth), ++ riscv_enc_call_epilog); ++ ++ ins_pipe(pipe_class_call); ++ ins_alignment(4); +%} + -+instruct vmulD(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVD src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++// Call Runtime Instruction ++ ++instruct CallRuntimeDirect(method meth, rFlagsReg cr) ++%{ ++ match(CallRuntime); ++ ++ effect(USE meth, KILL cr); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %} ++ ++ ins_encode(riscv_enc_java_to_runtime(meth)); ++ ++ ins_pipe(pipe_class_call); +%} + -+// vector fneg ++// Call Runtime Instruction + -+instruct vnegF(vReg dst, vReg src) %{ -+ match(Set dst (NegVF src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++instruct CallLeafDirect(method meth, rFlagsReg cr) ++%{ ++ match(CallLeaf); ++ ++ effect(USE meth, KILL cr); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %} ++ ++ ins_encode(riscv_enc_java_to_runtime(meth)); ++ ++ ins_pipe(pipe_class_call); +%} + -+instruct vnegD(vReg dst, vReg src) %{ -+ match(Set dst (NegVD src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++// Call Runtime Instruction ++ ++instruct CallLeafNoFPDirect(method meth, rFlagsReg cr) ++%{ ++ match(CallLeafNoFP); ++ ++ effect(USE meth, KILL cr); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %} ++ ++ ins_encode(riscv_enc_java_to_runtime(meth)); ++ ++ ins_pipe(pipe_class_call); +%} + -+// popcount vector ++// ============================================================================ ++// Partial Subtype Check ++// ++// superklass array for an instance of the superklass. Set a hidden ++// internal cache on a hit (cache is checked with exposed code in ++// gen_subtype_check()). Return zero for a hit. The encoding ++// ALSO sets flags. + -+instruct vpopcountI(iRegINoSp dst, vReg src) %{ -+ match(Set dst (PopCountVI src)); -+ format %{ "vpopc.m $dst, $src\t#@vpopcountI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++instruct partialSubtypeCheck(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, rFlagsReg cr) ++%{ ++ match(Set result (PartialSubtypeCheck sub super)); ++ effect(KILL tmp, KILL cr); ++ ++ ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); ++ format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %} ++ ++ ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result)); ++ ++ opcode(0x1); // Force zero of result reg on hit ++ ++ ins_pipe(pipe_class_memory); +%} + -+// vector add reduction ++instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, ++ immP0 zero, rFlagsReg cr) ++%{ ++ match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); ++ effect(KILL tmp, KILL result); + -+instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); -+ match(Set dst (AddReductionVI src1 src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t" -+ "vredsum.vs $tmp, $src2, $tmp\n\t" -+ "vmv.x.s $dst, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++ ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); ++ format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %} ++ ++ ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result)); ++ ++ opcode(0x0); // Don't zero result reg on hit ++ ++ ins_pipe(pipe_class_memory); +%} + -+instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); -+ match(Set dst (AddReductionVI src1 src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t" -+ "vredsum.vs $tmp, $src2, $tmp\n\t" -+ "vmv.x.s $dst, $tmp" %} ++instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++ ++ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ // Count is in 8-bit bytes; non-Compact chars are 16 bits. ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, ++ StrIntrinsicNode::UU); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); -+ match(Set dst (AddReductionVI src1 src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t" -+ "vredsum.vs $tmp, $src2, $tmp\n\t" -+ "vmv.x.s $dst, $tmp" %} ++instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++ ++ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, ++ StrIntrinsicNode::LL); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); -+ match(Set dst (AddReductionVL src1 src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t" -+ "vredsum.vs $tmp, $src2, $tmp\n\t" -+ "vmv.x.s $dst, $tmp" %} ++instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++ ++ format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, ++ StrIntrinsicNode::UL); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{ -+ match(Set src1_dst (AddReductionVF src1_dst src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t" -+ "vfredosum.vs $tmp, $src2, $tmp\n\t" -+ "vfmv.f.s $src1_dst, $tmp" %} ++instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, ++ rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); ++ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++ ++ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); -+ __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, ++ StrIntrinsicNode::LU); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{ -+ match(Set src1_dst (AddReductionVD src1_dst src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t" -+ "vfredosum.vs $tmp, $src2, $tmp\n\t" -+ "vfmv.f.s $src1_dst, $tmp" %} ++instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, ++ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); ++ ++ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); -+ __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); ++ __ string_indexof($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ $tmp5$$Register, $tmp6$$Register, ++ $result$$Register, StrIntrinsicNode::UU); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+// vector replicate ++instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, ++ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); + -+instruct replicateB(vReg dst, iRegIorL2I src) %{ -+ match(Set dst (ReplicateB src)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.x $dst, $src\t#@replicateB" %} ++ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); ++ __ string_indexof($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ $tmp5$$Register, $tmp6$$Register, ++ $result$$Register, StrIntrinsicNode::LL); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct replicateS(vReg dst, iRegIorL2I src) %{ -+ match(Set dst (ReplicateS src)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.x $dst, $src\t#@replicateS" %} ++instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, ++ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); ++ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); ++ __ string_indexof($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ $tmp5$$Register, $tmp6$$Register, ++ $result$$Register, StrIntrinsicNode::UL); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct replicateI(vReg dst, iRegIorL2I src) %{ -+ match(Set dst (ReplicateI src)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.x $dst, $src\t#@replicateI" %} ++instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, ++ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); ++ ++ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); ++ int icnt2 = (int)$int_cnt2$$constant; ++ __ string_indexof_linearscan($str1$$Register, $str2$$Register, ++ $cnt1$$Register, zr, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ icnt2, $result$$Register, StrIntrinsicNode::UU); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct replicateL(vReg dst, iRegL src) %{ -+ match(Set dst (ReplicateL src)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.x $dst, $src\t#@replicateL" %} ++instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, ++ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); ++ ++ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); ++ int icnt2 = (int)$int_cnt2$$constant; ++ __ string_indexof_linearscan($str1$$Register, $str2$$Register, ++ $cnt1$$Register, zr, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ icnt2, $result$$Register, StrIntrinsicNode::LL); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct replicateB_imm5(vReg dst, immI5 con) %{ -+ match(Set dst (ReplicateB con)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.i $dst, $con\t#@replicateB_imm5" %} ++instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, ++ immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); ++ ++ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); ++ int icnt2 = (int)$int_cnt2$$constant; ++ __ string_indexof_linearscan($str1$$Register, $str2$$Register, ++ $cnt1$$Register, zr, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ icnt2, $result$$Register, StrIntrinsicNode::UL); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct replicateS_imm5(vReg dst, immI5 con) %{ -+ match(Set dst (ReplicateS con)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.i $dst, $con\t#@replicateS_imm5" %} ++instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) ++%{ ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); ++ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); ++ ++ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); ++ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, ++ $result$$Register, $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, false /* isU */); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct replicateI_imm5(vReg dst, immI5 con) %{ -+ match(Set dst (ReplicateI con)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.i $dst, $con\t#@replicateI_imm5" %} ++ ++instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) ++%{ ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); ++ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); ++ ++ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); ++ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, ++ $result$$Register, $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, true /* isL */); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct replicateL_imm5(vReg dst, immL5 con) %{ -+ match(Set dst (ReplicateL con)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.i $dst, $con\t#@replicateL_imm5" %} ++// clearing of an array ++instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy) ++%{ ++ predicate(!UseRVV); ++ match(Set dummy (ClearArray cnt base)); ++ effect(USE_KILL cnt, USE_KILL base); ++ ++ ins_cost(4 * DEFAULT_COST); ++ format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); ++ address tpc = __ zero_words($base$$Register, $cnt$$Register); ++ if (tpc == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_class_memory); +%} + -+instruct replicateF(vReg dst, fRegF src) %{ -+ match(Set dst (ReplicateF src)); -+ ins_cost(VEC_COST); -+ format %{ "vfmv.v.f $dst, $src\t#@replicateF" %} ++instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && (uint64_t)n->in(2)->get_long() ++ < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); ++ match(Set dummy (ClearArray cnt base)); ++ effect(USE_KILL base, KILL cr); ++ ++ ins_cost(4 * DEFAULT_COST); ++ format %{ "ClearArray $cnt, $base\t#@clearArray_imm_reg" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); ++ __ zero_words($base$$Register, (uint64_t)$cnt$$constant); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_class_memory); +%} + -+instruct replicateD(vReg dst, fRegD src) %{ -+ match(Set dst (ReplicateD src)); -+ ins_cost(VEC_COST); -+ format %{ "vfmv.v.f $dst, $src\t#@replicateD" %} ++instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, ++ iRegI_R10 result, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); ++ ++ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); ++ // Count is in 8-bit bytes; non-Compact chars are 16 bits. ++ __ string_equals($str1$$Register, $str2$$Register, ++ $result$$Register, $cnt$$Register, 1); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+// vector shift ++instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, ++ iRegI_R10 result, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); + -+instruct vasrB(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (RShiftVB src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t" -+ "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0\n\t" -+ "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} ++ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ BitsPerByte - 1, Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ // Count is in 8-bit bytes; non-Compact chars are 16 bits. ++ __ string_equals($str1$$Register, $str2$$Register, ++ $result$$Register, $cnt$$Register, 2); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct vasrS(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (RShiftVS src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t" -+ "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0\n\t" -+ "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} ++instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, ++ iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, ++ iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (AryEq ary1 ary2)); ++ effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); ++ ++ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp5" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ BitsPerShort - 1, Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ __ arrays_equals($ary1$$Register, $ary2$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, ++ $result$$Register, $tmp5$$Register, 1); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct vasrI(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (RShiftVI src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %} ++instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, ++ iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, ++ iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (AryEq ary1 ary2)); ++ effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); ++ ++ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp5" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); ++ __ arrays_equals($ary1$$Register, $ary2$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, ++ $result$$Register, $tmp5$$Register, 2); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct vasrL(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (RShiftVL src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %} ++// ============================================================================ ++// Safepoint Instructions ++ ++instruct safePoint(iRegP poll) ++%{ ++ match(SafePoint poll); ++ ++ ins_cost(2 * LOAD_COST); ++ format %{ ++ "lwu zr, [$poll]\t# Safepoint: poll for GC, #@safePoint" ++ %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); ++ __ read_polling_page(as_Register($poll$$reg), 0, relocInfo::poll_type); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem); +%} + -+instruct vlslB(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (LShiftVB src shift)); -+ ins_cost(VEC_COST); -+ effect( TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t" -+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0\n\t" -+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} ++// ============================================================================ ++// This name is KNOWN by the ADLC and cannot be changed. ++// The ADLC forces a 'TypeRawPtr::BOTTOM' output type ++// for this guy. ++instruct tlsLoadP(javaThread_RegP dst) ++%{ ++ match(Set dst (ThreadLocal)); ++ ++ ins_cost(0); ++ ++ format %{ " -- \t// $dst=Thread::current(), empty, #@tlsLoadP" %} ++ ++ size(0); ++ ++ ins_encode( /*empty*/ ); ++ ++ ins_pipe(pipe_class_empty); ++%} ++ ++// inlined locking and unlocking ++// using t1 as the 'flag' register to bridge the BoolNode producers and consumers ++instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) ++%{ ++ match(Set cr (FastLock object box)); ++ effect(TEMP tmp1, TEMP tmp2); ++ ++ ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3); ++ format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2, #@cmpFastLock" %} ++ ++ ins_encode(riscv_enc_fast_lock(object, box, tmp1, tmp2)); ++ ++ ins_pipe(pipe_serial); ++%} ++ ++// using t1 as the 'flag' register to bridge the BoolNode producers and consumers ++instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) ++%{ ++ match(Set cr (FastUnlock object box)); ++ effect(TEMP tmp1, TEMP tmp2); ++ ++ ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4); ++ format %{ "fastunlock $object,$box\t! kills $tmp1, $tmp2, #@cmpFastUnlock" %} ++ ++ ins_encode(riscv_enc_fast_unlock(object, box, tmp1, tmp2)); ++ ++ ins_pipe(pipe_serial); ++%} ++ ++// Tail Call; Jump from runtime stub to Java code. ++// Also known as an 'interprocedural jump'. ++// Target of jump will eventually return to caller. ++// TailJump below removes the return address. ++instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop) ++%{ ++ match(TailCall jump_target method_oop); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %} ++ ++ ins_encode(riscv_enc_tail_call(jump_target)); ++ ++ ins_pipe(pipe_class_call); ++%} ++ ++instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop) ++%{ ++ match(TailJump jump_target ex_oop); ++ ++ ins_cost(ALU_COST + BRANCH_COST); ++ ++ format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %} ++ ++ ins_encode(riscv_enc_tail_jmp(jump_target)); ++ ++ ins_pipe(pipe_class_call); ++%} ++ ++// Create exception oop: created by stack-crawling runtime code. ++// Created exception is now available to this handler, and is setup ++// just prior to jumping to this handler. No code emitted. ++instruct CreateException(iRegP_R10 ex_oop) ++%{ ++ match(Set ex_oop (CreateEx)); ++ ++ ins_cost(0); ++ format %{ " -- \t// exception oop; no code emitted, #@CreateException" %} ++ ++ size(0); ++ ++ ins_encode( /*empty*/ ); ++ ++ ins_pipe(pipe_class_empty); ++%} ++ ++// Rethrow exception: The exception oop will come in the first ++// argument position. Then JUMP (not call) to the rethrow stub code. ++instruct RethrowException() ++%{ ++ match(Rethrow); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "j rethrow_stub\t#@RethrowException" %} ++ ++ ins_encode(riscv_enc_rethrow()); ++ ++ ins_pipe(pipe_class_call); ++%} ++ ++// Return Instruction ++// epilog node loads ret address into ra as part of frame pop ++instruct Ret() ++%{ ++ match(Return); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "ret\t// return register, #@Ret" %} ++ ++ ins_encode(riscv_enc_ret()); ++ ++ ins_pipe(pipe_branch); ++%} ++ ++// Die now. ++instruct ShouldNotReachHere() %{ ++ match(Halt); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "#@ShouldNotReachHere" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ // if shift > BitsPerByte - 1, clear the element -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg), Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ Assembler::CompressibleRegion cr(&_masm); ++ if (is_reachable()) { ++ __ halt(); ++ } + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_class_default); +%} + -+instruct vlslS(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (LShiftVS src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t" -+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0\n\t" -+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} ++ ++//----------PEEPHOLE RULES----------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++// ++// peepmatch ( root_instr_name [preceding_instruction]* ); ++// ++// peepconstraint %{ ++// (instruction_number.operand_name relational_op instruction_number.operand_name ++// [, ...] ); ++// // instruction numbers are zero-based using left to right order in peepmatch ++// ++// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); ++// // provide an instruction_number.operand_name for each operand that appears ++// // in the replacement instruction's match rule ++// ++// ---------VM FLAGS--------------------------------------------------------- ++// ++// All peephole optimizations can be turned off using -XX:-OptoPeephole ++// ++// Each peephole rule is given an identifying number starting with zero and ++// increasing by one in the order seen by the parser. An individual peephole ++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# ++// on the command-line. ++// ++// ---------CURRENT LIMITATIONS---------------------------------------------- ++// ++// Only match adjacent instructions in same basic block ++// Only equality constraints ++// Only constraints between operands, not (0.dest_reg == RAX_enc) ++// Only one replacement instruction ++// ++//----------SMARTSPILL RULES--------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++ ++// Local Variables: ++// mode: c++ ++// End: +diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad +new file mode 100644 +index 00000000000..4488c1c4031 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/riscv_b.ad +@@ -0,0 +1,527 @@ ++// ++// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// RISCV Bit-Manipulation Extension Architecture Description File ++ ++instruct rorI_imm_rvb(iRegINoSp dst, iRegI src, immI shift) %{ ++ predicate(UseRVB); ++ match(Set dst (RotateRight src shift)); ++ ++ format %{ "roriw $dst, $src, ($shift & 0x1f)\t#@rorI_imm_rvb" %} ++ ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ // if shift > BitsPerShort - 1, clear the element -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg), Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x1f); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg_shift); +%} + -+instruct vlslI(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (LShiftVI src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %} ++instruct rorL_imm_rvb(iRegLNoSp dst, iRegL src, immI shift) %{ ++ predicate(UseRVB); ++ match(Set dst (RotateRight src shift)); ++ ++ format %{ "rori $dst, $src, ($shift & 0x3f)\t#@rorL_imm_rvb" %} ++ ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); ++ __ rori(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x3f); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg_shift); +%} + -+instruct vlslL(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (LShiftVL src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %} ++instruct rorI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{ ++ predicate(UseRVB); ++ match(Set dst (RotateRight src shift)); ++ ++ format %{ "rorw $dst, $src, $shift\t#@rorI_reg_rvb" %} ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); ++ __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vlsrB(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (URShiftVB src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t" -+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0, v0\n\t" -+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} ++instruct rorL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{ ++ predicate(UseRVB); ++ match(Set dst (RotateRight src shift)); ++ ++ format %{ "ror $dst, $src, $shift\t#@rorL_reg_rvb" %} ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ // if shift > BitsPerByte - 1, clear the element -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg), Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vlsrS(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (URShiftVS src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t" -+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0\n\t" -+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} ++instruct rolI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{ ++ predicate(UseRVB); ++ match(Set dst (RotateLeft src shift)); ++ ++ format %{ "rolw $dst, $src, $shift\t#@rolI_reg_rvb" %} ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ // if shift > BitsPerShort - 1, clear the element -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg), Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg_reg); +%} + ++instruct rolL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{ ++ predicate(UseRVB); ++ match(Set dst (RotateLeft src shift)); + -+instruct vlsrI(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (URShiftVI src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %} ++ format %{ "rol $dst, $src, $shift\t#@rolL_reg_rvb" %} ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); ++ __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg_reg); +%} + ++// Convert oop into int for vectors alignment masking ++instruct convP2I_rvb(iRegINoSp dst, iRegP src) %{ ++ predicate(UseRVB); ++ match(Set dst (ConvL2I (CastP2X src))); ++ ++ format %{ "zext.w $dst, $src\t# ptr -> int @convP2I_rvb" %} + -+instruct vlsrL(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (URShiftVL src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %} ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); ++ __ zext_w(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (RShiftVB src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %} ++// byte to int ++instruct convB2I_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{ ++ predicate(UseRVB); ++ match(Set dst (RShiftI (LShiftI src lshift) rshift)); ++ ++ format %{ "sext.b $dst, $src\t# b2i, #@convB2I_reg_reg_rvb" %} ++ ++ ins_cost(ALU_COST); + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e8); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ if (con >= BitsPerByte) con = BitsPerByte - 1; -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ __ sext_b(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (RShiftVS src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %} ++// int to short ++instruct convI2S_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{ ++ predicate(UseRVB); ++ match(Set dst (RShiftI (LShiftI src lshift) rshift)); ++ ++ format %{ "sext.h $dst, $src\t# i2s, #@convI2S_reg_reg_rvb" %} ++ ++ ins_cost(ALU_COST); + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e16); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ if (con >= BitsPerShort) con = BitsPerShort - 1; -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ __ sext_h(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (RShiftVI src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %} ++// short to unsigned int ++instruct convS2UI_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{ ++ predicate(UseRVB); ++ match(Set dst (AndI src mask)); ++ ++ format %{ "zext.h $dst, $src\t# s2ui, #@convS2UI_reg_reg_rvb" %} ++ ++ ins_cost(ALU_COST); + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e32); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ __ zext_h(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vasrL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{ -+ predicate((n->in(2)->get_int() & 0x3f) < 64); -+ match(Set dst (RShiftVL src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %} ++// int to unsigned long (zero extend) ++instruct convI2UL_reg_reg_rvb(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{ ++ predicate(UseRVB); ++ match(Set dst (AndL (ConvI2L src) mask)); ++ ++ format %{ "zext.w $dst, $src\t# i2ul, #@convI2UL_reg_reg_rvb" %} ++ ++ ins_cost(ALU_COST); + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x3f; -+ __ vsetvli(t0, x0, Assembler::e64); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ if (con < 32) { -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ } else { -+ __ li(t0, con); -+ __ vmv_v_x(as_VectorRegister($tmp$$reg), t0); -+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg)); -+ } ++ __ zext_w(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg_shift); +%} + -+instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (URShiftVB src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %} ++// BSWAP instructions ++instruct bytes_reverse_int_rvb(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseRVB); ++ match(Set dst (ReverseBytesI src)); ++ ++ ins_cost(ALU_COST * 2); ++ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_rvb" %} ++ + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e8); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ if (con >= BitsPerByte) { -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (URShiftVS src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %} ++instruct bytes_reverse_long_rvb(iRegLNoSp dst, iRegL src) %{ ++ predicate(UseRVB); ++ match(Set dst (ReverseBytesL src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "rev8 $dst, $src\t#@bytes_reverse_long_rvb" %} ++ + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e16); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ if (con >= BitsPerShort) { -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ __ rev8(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (URShiftVI src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %} ++instruct bytes_reverse_unsigned_short_rvb(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseRVB); ++ match(Set dst (ReverseBytesUS src)); ++ ++ ins_cost(ALU_COST * 2); ++ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_rvb" %} ++ + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e32); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vlsrL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{ -+ predicate((n->in(2)->get_int() & 0x3f) < 64); -+ match(Set dst (URShiftVL src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %} ++instruct bytes_reverse_short_rvb(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseRVB); ++ match(Set dst (ReverseBytesS src)); ++ ++ ins_cost(ALU_COST * 2); ++ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_rvb" %} ++ + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x3f; -+ __ vsetvli(t0, x0, Assembler::e64); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ if (con < 32) { -+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ } else { -+ __ li(t0, con); -+ __ vmv_v_x(as_VectorRegister($tmp$$reg), t0); -+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg)); -+ } ++ __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (LShiftVB src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %} ++// Shift Add Pointer ++instruct shaddP_reg_reg_rvb(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{ ++ predicate(UseRVB); ++ match(Set dst (AddP src1 (LShiftL src2 imm))); ++ ++ ins_cost(ALU_COST); ++ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_rvb" %} ++ + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e8); -+ if (con >= BitsPerByte) { -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ __ shadd(as_Register($dst$$reg), ++ as_Register($src2$$reg), ++ as_Register($src1$$reg), ++ t0, ++ $imm$$constant); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (LShiftVS src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %} ++instruct shaddP_reg_reg_ext_rvb(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{ ++ predicate(UseRVB); ++ match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm))); ++ ++ ins_cost(ALU_COST); ++ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_rvb" %} ++ + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e16); -+ if (con >= BitsPerShort) { -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ __ shadd(as_Register($dst$$reg), ++ as_Register($src2$$reg), ++ as_Register($src1$$reg), ++ t0, ++ $imm$$constant); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (LShiftVI src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %} ++// Shift Add Long ++instruct shaddL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{ ++ predicate(UseRVB); ++ match(Set dst (AddL src1 (LShiftL src2 imm))); ++ ++ ins_cost(ALU_COST); ++ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_rvb" %} ++ + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ __ shadd(as_Register($dst$$reg), ++ as_Register($src2$$reg), ++ as_Register($src1$$reg), ++ t0, ++ $imm$$constant); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vlslL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{ -+ predicate((n->in(2)->get_int() & 0x3f) < 64); -+ match(Set dst (LShiftVL src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %} ++instruct shaddL_reg_reg_ext_rvb(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{ ++ predicate(UseRVB); ++ match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm))); ++ ++ ins_cost(ALU_COST); ++ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_rvb" %} ++ + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x3f; -+ __ vsetvli(t0, x0, Assembler::e64); -+ if (con < 32) { -+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ } else { -+ __ li(t0, con); -+ __ vmv_v_x(as_VectorRegister($tmp$$reg), t0); -+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg)); -+ } ++ __ shadd(as_Register($dst$$reg), ++ as_Register($src2$$reg), ++ as_Register($src1$$reg), ++ t0, ++ $imm$$constant); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); -+ match(Set dst (LShiftCntV cnt)); -+ match(Set dst (RShiftCntV cnt)); -+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %} ++// Zeros Count instructions ++instruct countLeadingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseRVB); ++ match(Set dst (CountLeadingZerosI src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "clzw $dst, $src\t#@countLeadingZerosI_rvb" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); ++ __ clzw(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || -+ n->bottom_type()->is_vect()->element_basic_type() == T_CHAR); -+ match(Set dst (LShiftCntV cnt)); -+ match(Set dst (RShiftCntV cnt)); -+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %} ++instruct countLeadingZerosL_rvb(iRegINoSp dst, iRegL src) %{ ++ predicate(UseRVB); ++ match(Set dst (CountLeadingZerosL src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "clz $dst, $src\t#@countLeadingZerosL_rvb" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); ++ __ clz(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT); -+ match(Set dst (LShiftCntV cnt)); -+ match(Set dst (RShiftCntV cnt)); -+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %} ++instruct countTrailingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseRVB); ++ match(Set dst (CountTrailingZerosI src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "ctzw $dst, $src\t#@countTrailingZerosI_rvb" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); ++ __ ctzw(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG); -+ match(Set dst (LShiftCntV cnt)); -+ match(Set dst (RShiftCntV cnt)); -+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %} ++instruct countTrailingZerosL_rvb(iRegINoSp dst, iRegL src) %{ ++ predicate(UseRVB); ++ match(Set dst (CountTrailingZerosL src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "ctz $dst, $src\t#@countTrailingZerosL_rvb" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); ++ __ ctz(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+// vector sqrt ++// Population Count instructions ++instruct popCountI_rvb(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UsePopCountInstruction); ++ match(Set dst (PopCountI src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "cpopw $dst, $src\t#@popCountI_rvb" %} + -+instruct vsqrtF(vReg dst, vReg src) %{ -+ match(Set dst (SqrtVF src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ __ cpopw(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vsqrtD(vReg dst, vReg src) %{ -+ match(Set dst (SqrtVD src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %} ++// Note: Long/bitCount(long) returns an int. ++instruct popCountL_rvb(iRegINoSp dst, iRegL src) %{ ++ predicate(UsePopCountInstruction); ++ match(Set dst (PopCountL src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "cpop $dst, $src\t#@popCountL_rvb" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ __ cpop(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+// vector sub ++// Max and Min ++instruct minI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ predicate(UseRVB); ++ match(Set dst (MinI src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "min $dst, $src1, $src2\t#@minI_reg_rvb" %} + -+instruct vsubB(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVB src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vsubS(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVS src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %} ++instruct maxI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ predicate(UseRVB); ++ match(Set dst (MaxI src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "max $dst, $src1, $src2\t#@maxI_reg_rvb" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vsubI(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVI src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++// Abs ++instruct absI_reg_rvb(iRegINoSp dst, iRegI src) %{ ++ predicate(UseRVB); ++ match(Set dst (AbsI src)); ++ ++ ins_cost(ALU_COST * 2); ++ format %{ ++ "negw t0, $src\n\t" ++ "max $dst, $src, t0\t#@absI_reg_rvb" + %} -+ ins_pipe(pipe_slow); -+%} + -+instruct vsubL(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVL src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ negw(t0, as_Register($src$$reg)); ++ __ max(as_Register($dst$$reg), as_Register($src$$reg), t0); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vsubF(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVF src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++instruct absL_reg_rvb(iRegLNoSp dst, iRegL src) %{ ++ predicate(UseRVB); ++ match(Set dst (AbsL src)); ++ ++ ins_cost(ALU_COST * 2); ++ format %{ ++ "neg t0, $src\n\t" ++ "max $dst, $src, t0\t#@absL_reg_rvb" + %} -+ ins_pipe(pipe_slow); -+%} + -+instruct vsubD(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVD src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ neg(t0, as_Register($src$$reg)); ++ __ max(as_Register($dst$$reg), as_Register($src$$reg), t0); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, -+ iRegI_R10 result, vReg_V1 v1, -+ vReg_V2 v2, vReg_V3 v3, rFlagsReg r6) -+%{ -+ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result (StrEquals (Binary str1 str2) cnt)); -+ effect(DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3); ++// And Not ++instruct andnI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ ++ predicate(UseRVB); ++ match(Set dst (AndI src1 (XorI src2 m1))); ++ ++ ins_cost(ALU_COST); ++ format %{ "andn $dst, $src1, $src2\t#@andnI_reg_reg_rvb" %} + -+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} + ins_encode %{ -+ // Count is in 8-bit bytes; non-Compact chars are 16 bits. -+ __ string_equals_v($str1$$Register, $str2$$Register, -+ $result$$Register, $cnt$$Register, 1); ++ __ andn(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, -+ iRegI_R10 result, vReg_V1 v1, -+ vReg_V2 v2, vReg_V3 v3, rFlagsReg r6) -+%{ -+ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result (StrEquals (Binary str1 str2) cnt)); -+ effect(DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3); ++instruct andnL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ ++ predicate(UseRVB); ++ match(Set dst (AndL src1 (XorL src2 m1))); ++ ++ ins_cost(ALU_COST); ++ format %{ "andn $dst, $src1, $src2\t#@andnL_reg_reg_rvb" %} + -+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} + ins_encode %{ -+ // Count is in 8-bit bytes; non-Compact chars are 16 bits. -+ __ string_equals_v($str1$$Register, $str2$$Register, -+ $result$$Register, $cnt$$Register, 2); ++ __ andn(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} -+ ins_pipe(pipe_class_memory); -+%} -+ -+instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg r6) -+%{ -+ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result (AryEq ary1 ary2)); -+ effect(DEF result, KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6); + -+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} -+ ins_encode %{ -+ __ arrays_equals_v($ary1$$Register, $ary2$$Register, -+ $result$$Register, $tmp$$Register, 1); -+ %} -+ ins_pipe(pipe_class_memory); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg r6) -+%{ -+ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result (AryEq ary1 ary2)); -+ effect(DEF result, KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6); ++// Or Not ++instruct ornI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ ++ predicate(UseRVB); ++ match(Set dst (OrI src1 (XorI src2 m1))); ++ ++ ins_cost(ALU_COST); ++ format %{ "orn $dst, $src1, $src2\t#@ornI_reg_reg_rvb" %} + -+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} + ins_encode %{ -+ __ arrays_equals_v($ary1$$Register, $ary2$$Register, -+ $result$$Register, $tmp$$Register, 2); ++ __ orn(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -+ iRegP_R28 tmp1, iRegL_R29 tmp2) -+%{ -+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); ++instruct ornL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ ++ predicate(UseRVB); ++ match(Set dst (OrL src1 (XorL src2 m1))); ++ ++ ins_cost(ALU_COST); ++ format %{ "orn $dst, $src1, $src2\t#@ornL_reg_reg_rvb" %} + -+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} + ins_encode %{ -+ // Count is in 8-bit bytes; non-Compact chars are 16 bits. -+ __ string_compare_v($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ StrIntrinsicNode::UU); ++ __ orn(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} -+ ins_pipe(pipe_class_memory); -+%} -+instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -+ iRegP_R28 tmp1, iRegL_R29 tmp2) -+%{ -+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + -+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} ++ ins_pipe(ialu_reg_reg); ++%} +\ No newline at end of file +diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad +new file mode 100644 +index 00000000000..3828e096b21 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/riscv_v.ad +@@ -0,0 +1,2065 @@ ++// ++// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2020, Arm Limited. All rights reserved. ++// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// RISCV Vector Extension Architecture Description File ++ ++opclass vmemA(indirect); ++ ++source_hpp %{ ++ bool op_vec_supported(int opcode); ++%} ++ ++source %{ ++ ++ static void loadStore(C2_MacroAssembler masm, bool is_store, ++ VectorRegister reg, BasicType bt, Register base) { ++ Assembler::SEW sew = Assembler::elemtype_to_sew(bt); ++ masm.vsetvli(t0, x0, sew); ++ if (is_store) { ++ masm.vsex_v(reg, base, sew); ++ } else { ++ masm.vlex_v(reg, base, sew); ++ } ++ } ++ ++ bool op_vec_supported(int opcode) { ++ switch (opcode) { ++ // No multiply reduction instructions ++ case Op_MulReductionVD: ++ case Op_MulReductionVF: ++ case Op_MulReductionVI: ++ case Op_MulReductionVL: ++ // Others ++ case Op_Extract: ++ case Op_ExtractB: ++ case Op_ExtractC: ++ case Op_ExtractD: ++ case Op_ExtractF: ++ case Op_ExtractI: ++ case Op_ExtractL: ++ case Op_ExtractS: ++ case Op_ExtractUB: ++ // Vector API specific ++ case Op_AndReductionV: ++ case Op_OrReductionV: ++ case Op_XorReductionV: ++ case Op_LoadVectorGather: ++ case Op_StoreVectorScatter: ++ case Op_VectorBlend: ++ case Op_VectorCast: ++ case Op_VectorCastB2X: ++ case Op_VectorCastD2X: ++ case Op_VectorCastF2X: ++ case Op_VectorCastI2X: ++ case Op_VectorCastL2X: ++ case Op_VectorCastS2X: ++ case Op_VectorInsert: ++ case Op_VectorLoadConst: ++ case Op_VectorLoadMask: ++ case Op_VectorLoadShuffle: ++ case Op_VectorMaskCmp: ++ case Op_VectorRearrange: ++ case Op_VectorReinterpret: ++ case Op_VectorStoreMask: ++ case Op_VectorTest: ++ return false; ++ default: ++ return UseRVV; ++ } ++ } ++ ++%} ++ ++definitions %{ ++ int_def VEC_COST (200, 200); ++%} ++ ++// All VEC instructions ++ ++// vector load/store ++instruct loadV(vReg dst, vmemA mem) %{ ++ match(Set dst (LoadVector mem)); ++ ins_cost(VEC_COST); ++ format %{ "vle $dst, $mem\t#@loadV" %} + ins_encode %{ -+ __ string_compare_v($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ StrIntrinsicNode::LL); ++ VectorRegister dst_reg = as_VectorRegister($dst$$reg); ++ loadStore(C2_MacroAssembler(&cbuf), false, dst_reg, ++ Matcher::vector_element_basic_type(this), as_Register($mem$$base)); + %} -+ ins_pipe(pipe_class_memory); ++ ins_pipe(pipe_slow); +%} + -+instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -+ iRegP_R28 tmp1, iRegL_R29 tmp2) -+%{ -+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); ++instruct storeV(vReg src, vmemA mem) %{ ++ match(Set mem (StoreVector mem src)); ++ ins_cost(VEC_COST); ++ format %{ "vse $src, $mem\t#@storeV" %} ++ ins_encode %{ ++ VectorRegister src_reg = as_VectorRegister($src$$reg); ++ loadStore(C2_MacroAssembler(&cbuf), true, src_reg, ++ Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} ++// vector abs ++ ++instruct vabsB(vReg dst, vReg src, vReg tmp) %{ ++ match(Set dst (AbsVB src)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t" ++ "vmax.vv $dst, $tmp, $src" %} + ins_encode %{ -+ __ string_compare_v($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ StrIntrinsicNode::UL); ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); ++ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); + %} -+ ins_pipe(pipe_class_memory); ++ ins_pipe(pipe_slow); +%} -+instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -+ iRegP_R28 tmp1, iRegL_R29 tmp2) -+%{ -+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + -+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} ++instruct vabsS(vReg dst, vReg src, vReg tmp) %{ ++ match(Set dst (AbsVS src)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t" ++ "vmax.vv $dst, $tmp, $src" %} + ins_encode %{ -+ __ string_compare_v($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ StrIntrinsicNode::LU); ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); ++ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); + %} -+ ins_pipe(pipe_class_memory); ++ ins_pipe(pipe_slow); +%} + -+// fast byte[] to char[] inflation -+instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) -+%{ -+ predicate(UseRVV); -+ match(Set dummy (StrInflatedCopy src (Binary dst len))); -+ effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len); ++instruct vabsI(vReg dst, vReg src, vReg tmp) %{ ++ match(Set dst (AbsVI src)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t" ++ "vmax.vv $dst, $tmp, $src" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); ++ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ format %{ "String Inflate $src,$dst" %} ++instruct vabsL(vReg dst, vReg src, vReg tmp) %{ ++ match(Set dst (AbsVL src)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t" ++ "vmax.vv $dst, $tmp, $src" %} + ins_encode %{ -+ address tpc = __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register); -+ if (tpc == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); ++ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); + %} -+ ins_pipe(pipe_class_memory); ++ ins_pipe(pipe_slow); +%} + -+// encode char[] to byte[] in ISO_8859_1 -+instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) -+%{ -+ predicate(UseRVV); -+ match(Set result (EncodeISOArray src (Binary dst len))); -+ effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, -+ TEMP v1, TEMP v2, TEMP v3, TEMP tmp); ++instruct vabsF(vReg dst, vReg src) %{ ++ match(Set dst (AbsVF src)); ++ ins_cost(VEC_COST); ++ format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ format %{ "Encode array $src,$dst,$len -> $result" %} ++instruct vabsD(vReg dst, vReg src) %{ ++ match(Set dst (AbsVD src)); ++ ins_cost(VEC_COST); ++ format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %} + ins_encode %{ -+ __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register, -+ $result$$Register, $tmp$$Register); ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); + %} -+ ins_pipe( pipe_class_memory ); ++ ins_pipe(pipe_slow); +%} + -+// fast char[] to byte[] compression -+instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) -+%{ -+ predicate(UseRVV); -+ match(Set result (StrCompressedCopy src (Binary dst len))); -+ effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, -+ TEMP v1, TEMP v2, TEMP v3, TEMP tmp); ++// vector add + -+ format %{ "String Compress $src,$dst -> $result // KILL R11, R12, R13" %} ++instruct vaddB(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (AddVB src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %} + ins_encode %{ -+ __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register, -+ $result$$Register, $tmp$$Register); ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vadd_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); + %} -+ ins_pipe( pipe_slow ); ++ ins_pipe(pipe_slow); +%} + -+instruct vhas_negatives(iRegP_R11 ary1, iRegI_R12 len, iRegI_R10 result, iRegL tmp) -+%{ -+ predicate(UseRVV); -+ match(Set result (HasNegatives ary1 len)); -+ effect(USE_KILL ary1, USE_KILL len, TEMP tmp); -+ format %{ "has negatives byte[] $ary1,$len -> $result" %} ++instruct vaddS(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (AddVS src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %} + ins_encode %{ -+ address tpc = __ has_negatives_v($ary1$$Register, $len$$Register, $result$$Register, $tmp$$Register); -+ if (tpc == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vadd_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); + %} -+ ins_pipe( pipe_slow ); ++ ins_pipe(pipe_slow); +%} + -+// clearing of an array -+instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, -+ vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3) -+%{ -+ predicate(UseRVV); -+ match(Set dummy (ClearArray cnt base)); -+ effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3); ++instruct vaddI(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (AddVI src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vadd_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} ++instruct vaddL(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (AddVL src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vadd_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + ++instruct vaddF(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (AddVF src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %} + ins_encode %{ -+ __ clear_array_v($base$$Register, $cnt$$Register); ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfadd_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); + %} ++ ins_pipe(pipe_slow); ++%} + -+ ins_pipe(pipe_class_memory); ++instruct vaddD(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (AddVD src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfadd_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); +%} -diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -new file mode 100644 -index 000000000..9922ff4cf ---- /dev/null -+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -0,0 +1,2738 @@ -+/* -+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "code/debugInfoRec.hpp" -+#include "code/icBuffer.hpp" -+#include "code/vtableStubs.hpp" -+#include "interpreter/interp_masm.hpp" -+#include "interpreter/interpreter.hpp" -+#include "logging/log.hpp" -+#include "memory/resourceArea.hpp" -+#include "oops/compiledICHolder.hpp" -+#include "runtime/safepointMechanism.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/vframeArray.hpp" -+#include "utilities/align.hpp" -+#include "vmreg_riscv.inline.hpp" -+#ifdef COMPILER1 -+#include "c1/c1_Runtime1.hpp" -+#endif -+#ifdef COMPILER2 -+#include "adfiles/ad_riscv.hpp" -+#include "opto/runtime.hpp" -+#endif ++// vector and + -+#define __ masm-> ++instruct vand(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (AndV src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vand.vv $dst, $src1, $src2\t#@vand" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vand_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; ++// vector or + -+class SimpleRuntimeFrame { -+public: ++instruct vor(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (OrV src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vor.vv $dst, $src1, $src2\t#@vor" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vor_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Most of the runtime stubs have this simple frame layout. -+ // This class exists to make the layout shared in one place. -+ // Offsets are for compiler stack slots, which are jints. -+ enum layout { -+ // The frame sender code expects that fp will be in the "natural" place and -+ // will override any oopMap setting for it. We must therefore force the layout -+ // so that it agrees with the frame sender code. -+ // we don't expect any arg reg save area so riscv asserts that -+ // frame::arg_reg_save_area_bytes == 0 -+ fp_off = 0, fp_off2, -+ return_off, return_off2, -+ framesize -+ }; -+}; ++// vector xor + -+class RegisterSaver { -+ const bool _save_vectors; -+ public: -+ RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {} -+ ~RegisterSaver() {} -+ OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); -+ void restore_live_registers(MacroAssembler* masm); ++instruct vxor(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (XorV src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vxor.vv $dst, $src1, $src2\t#@vxor" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vxor_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Offsets into the register save area -+ // Used by deoptimization when it is managing result register -+ // values on its own -+ // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4) -+ // |---v0---|<---SP -+ // |---v1---|save vectors only in generate_handler_blob -+ // |-- .. --| -+ // |---v31--|----- -+ // |---f0---| -+ // |---f1---| -+ // | .. | -+ // |---f31--| -+ // |---reserved slot for stack alignment---| -+ // |---x5---| -+ // | x6 | -+ // |---.. --| -+ // |---x31--| -+ // |---fp---| -+ // |---ra---| -+ int v0_offset_in_bytes(void) { return 0; } -+ int f0_offset_in_bytes(void) { -+ int f0_offset = 0; -+#ifdef COMPILER2 -+ if (_save_vectors) { -+ f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers * -+ BytesPerInt; -+ } -+#endif -+ return f0_offset; -+ } -+ int reserved_slot_offset_in_bytes(void) { -+ return f0_offset_in_bytes() + -+ FloatRegisterImpl::max_slots_per_register * -+ FloatRegisterImpl::number_of_registers * -+ BytesPerInt; -+ } ++// vector float div + -+ int reg_offset_in_bytes(Register r) { -+ assert(r->encoding() > 4, "ra, sp, gp and tp not saved"); -+ return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize; -+ } ++instruct vdivF(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (DivVF src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfdiv_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ int freg_offset_in_bytes(FloatRegister f) { -+ return f0_offset_in_bytes() + f->encoding() * wordSize; -+ } ++instruct vdivD(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (DivVD src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfdiv_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ int ra_offset_in_bytes(void) { -+ return reserved_slot_offset_in_bytes() + -+ (RegisterImpl::number_of_registers - 3) * -+ RegisterImpl::max_slots_per_register * -+ BytesPerInt; -+ } ++// vector integer max/min + -+ // During deoptimization only the result registers need to be restored, -+ // all the other values have already been extracted. -+ void restore_result_registers(MacroAssembler* masm); -+}; ++instruct vmax(vReg dst, vReg src1, vReg src2) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT && ++ n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE); ++ match(Set dst (MaxV src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vmax.vv $dst, $src1, $src2\t#@vmax" %} ++ ins_encode %{ ++ BasicType bt = Matcher::vector_element_basic_type(this); ++ Assembler::SEW sew = Assembler::elemtype_to_sew(bt); ++ __ vsetvli(t0, x0, sew); ++ __ vmax_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { -+ int vector_size_in_bytes = 0; -+ int vector_size_in_slots = 0; -+#ifdef COMPILER2 -+ if (_save_vectors) { -+ vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE); -+ vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT); -+ } -+#endif -+ -+ int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16); -+ // OopMap frame size is in compiler stack slots (jint's) not bytes or words -+ int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; -+ // The caller will allocate additional_frame_words -+ int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt; -+ // CodeBlob frame size is in words. -+ int frame_size_in_words = frame_size_in_bytes / wordSize; -+ *total_frame_words = frame_size_in_words; -+ -+ // Save Integer, Float and Vector registers. -+ __ enter(); -+ __ push_CPU_state(_save_vectors, vector_size_in_bytes); ++instruct vmin(vReg dst, vReg src1, vReg src2) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT && ++ n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE); ++ match(Set dst (MinV src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vmin.vv $dst, $src1, $src2\t#@vmin" %} ++ ins_encode %{ ++ BasicType bt = Matcher::vector_element_basic_type(this); ++ Assembler::SEW sew = Assembler::elemtype_to_sew(bt); ++ __ vsetvli(t0, x0, sew); ++ __ vmin_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Set an oopmap for the call site. This oopmap will map all -+ // oop-registers and debug-info registers as callee-saved. This -+ // will allow deoptimization at this safepoint to find all possible -+ // debug-info recordings, as well as let GC find all oops. ++// vector float-point max/min + -+ OopMapSet *oop_maps = new OopMapSet(); -+ OopMap* oop_map = new OopMap(frame_size_in_slots, 0); -+ assert_cond(oop_maps != NULL && oop_map != NULL); ++instruct vmaxF(vReg dst, vReg src1, vReg src2) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst); ++ ins_cost(VEC_COST); ++ format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %} ++ ins_encode %{ ++ __ minmax_FD_v(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), ++ false /* is_double */, false /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ int sp_offset_in_slots = 0; -+ int step_in_slots = 0; -+ if (_save_vectors) { -+ step_in_slots = vector_size_in_slots; -+ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { -+ VectorRegister r = as_VectorRegister(i); -+ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); -+ } -+ } ++instruct vmaxD(vReg dst, vReg src1, vReg src2) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst); ++ ins_cost(VEC_COST); ++ format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %} ++ ins_encode %{ ++ __ minmax_FD_v(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), ++ true /* is_double */, false /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ step_in_slots = FloatRegisterImpl::max_slots_per_register; -+ for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { -+ FloatRegister r = as_FloatRegister(i); -+ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); -+ } ++instruct vminF(vReg dst, vReg src1, vReg src2) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst); ++ ins_cost(VEC_COST); ++ format %{ "vminF $dst, $src1, $src2\t#@vminF" %} ++ ins_encode %{ ++ __ minmax_FD_v(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), ++ false /* is_double */, true /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ step_in_slots = RegisterImpl::max_slots_per_register; -+ // skip the slot reserved for alignment, see MacroAssembler::push_reg; -+ // also skip x5 ~ x6 on the stack because they are caller-saved registers. -+ sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3; -+ // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack. -+ for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { -+ Register r = as_Register(i); -+ if (r != xthread) { -+ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg()); -+ } -+ } ++instruct vminD(vReg dst, vReg src1, vReg src2) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst); ++ ins_cost(VEC_COST); ++ format %{ "vminD $dst, $src1, $src2\t#@vminD" %} ++ ins_encode %{ ++ __ minmax_FD_v(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), ++ true /* is_double */, true /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ return oop_map; -+} ++// vector fmla + -+void RegisterSaver::restore_live_registers(MacroAssembler* masm) { -+#ifdef COMPILER2 -+ __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE)); -+#else -+ __ pop_CPU_state(_save_vectors); -+#endif -+ __ leave(); -+} ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+void RegisterSaver::restore_result_registers(MacroAssembler* masm) { -+ // Just restore result register. Only used by deoptimization. By -+ // now any callee save register that needs to be restored to a c2 -+ // caller of the deoptee has been extracted into the vframeArray -+ // and will be stuffed into the c2i adapter we create for later -+ // restoration so only result registers need to be restored here. -+ // Restore fp result register -+ __ fld(f10, Address(sp, freg_offset_in_bytes(f10))); -+ // Restore integer result register -+ __ ld(x10, Address(sp, reg_offset_in_bytes(x10))); ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Pop all of the register save are off the stack -+ __ add(sp, sp, align_up(ra_offset_in_bytes(), 16)); -+} ++// vector fmls + -+// Is vector's size (in bytes) bigger than a size saved by default? -+// riscv does not ovlerlay the floating-point registers on vector registers like aarch64. -+bool SharedRuntime::is_wide_vector(int size) { -+ return UseRVV; -+} ++// dst_src1 = dst_src1 + -src2 * src3 ++// dst_src1 = dst_src1 + src2 * -src3 ++instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); ++ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); ++ ins_cost(VEC_COST); ++ format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+size_t SharedRuntime::trampoline_size() { -+ // Byte size of function generate_trampoline. movptr_with_offset: 5 instructions, jalr: 1 instrction -+ return 6 * NativeInstruction::instruction_size; // lui + addi + slli + addi + slli + jalr -+} ++// dst_src1 = dst_src1 + -src2 * src3 ++// dst_src1 = dst_src1 + src2 * -src3 ++instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); ++ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); ++ ins_cost(VEC_COST); ++ format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { -+ int32_t offset = 0; -+ __ movptr_with_offset(t0, destination, offset); // lui + addi + slli + addi + slli -+ __ jalr(x0, t0, offset); -+} ++// vector fnmla + -+// --------------------------------------------------------------------------- -+// Read the array of BasicTypes from a signature, and compute where the -+// arguments should go. Values in the VMRegPair regs array refer to 4-byte -+// quantities. Values less than VMRegImpl::stack0 are registers, those above -+// refer to 4-byte stack slots. All stack slots are based off of the stack pointer -+// as framesizes are fixed. -+// VMRegImpl::stack0 refers to the first slot 0(sp). -+// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register -+// up to RegisterImpl::number_of_registers) are the 64-bit -+// integer registers. ++// dst_src1 = -dst_src1 + -src2 * src3 ++// dst_src1 = -dst_src1 + src2 * -src3 ++instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); ++ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); ++ ins_cost(VEC_COST); ++ format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+// Note: the INPUTS in sig_bt are in units of Java argument words, -+// which are 64-bit. The OUTPUTS are in 32-bit units. ++// dst_src1 = -dst_src1 + -src2 * src3 ++// dst_src1 = -dst_src1 + src2 * -src3 ++instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); ++ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); ++ ins_cost(VEC_COST); ++ format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+// The Java calling convention is a "shifted" version of the C ABI. -+// By skipping the first C ABI register we can call non-static jni -+// methods with small numbers of arguments without having to shuffle -+// the arguments at all. Since we control the java ABI we ought to at -+// least get some advantage out of it. ++// vector fnmls + -+int SharedRuntime::java_calling_convention(const BasicType *sig_bt, -+ VMRegPair *regs, -+ int total_args_passed, -+ int is_outgoing) { -+ assert_cond(sig_bt != NULL && regs != NULL); -+ // Create the mapping between argument positions and -+ // registers. -+ static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { -+ j_rarg0, j_rarg1, j_rarg2, j_rarg3, -+ j_rarg4, j_rarg5, j_rarg6, j_rarg7 -+ }; -+ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { -+ j_farg0, j_farg1, j_farg2, j_farg3, -+ j_farg4, j_farg5, j_farg6, j_farg7 -+ }; ++// dst_src1 = -dst_src1 + src2 * src3 ++instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ uint int_args = 0; -+ uint fp_args = 0; -+ uint stk_args = 0; // inc by 2 each time ++// dst_src1 = -dst_src1 + src2 * src3 ++instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ for (int i = 0; i < total_args_passed; i++) { -+ switch (sig_bt[i]) { -+ case T_BOOLEAN: // fall through -+ case T_CHAR: // fall through -+ case T_BYTE: // fall through -+ case T_SHORT: // fall through -+ case T_INT: -+ if (int_args < Argument::n_int_register_parameters_j) { -+ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); -+ } else { -+ regs[i].set1(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ case T_VOID: -+ // halves of T_LONG or T_DOUBLE -+ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); -+ regs[i].set_bad(); -+ break; -+ case T_LONG: // fall through -+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); -+ case T_OBJECT: // fall through -+ case T_ARRAY: // fall through -+ case T_ADDRESS: -+ if (int_args < Argument::n_int_register_parameters_j) { -+ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); -+ } else { -+ regs[i].set2(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ case T_FLOAT: -+ if (fp_args < Argument::n_float_register_parameters_j) { -+ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); -+ } else { -+ regs[i].set1(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ case T_DOUBLE: -+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); -+ if (fp_args < Argument::n_float_register_parameters_j) { -+ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); -+ } else { -+ regs[i].set2(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+ } ++// vector mla + -+ return align_up(stk_args, 2); -+} ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+// Patch the callers callsite with entry to compiled code if it exists. -+static void patch_callers_callsite(MacroAssembler *masm) { -+ Label L; -+ __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); -+ __ beqz(t0, L); ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ __ enter(); -+ __ push_CPU_state(); ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // VM needs caller's callsite -+ // VM needs target method -+ // This needs to be a long call since we will relocate this adapter to -+ // the codeBuffer and it may not reach ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+#ifndef PRODUCT -+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); -+#endif ++// vector mls + -+ __ mv(c_rarg0, xmethod); -+ __ mv(c_rarg1, ra); -+ int32_t offset = 0; -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset); -+ __ jalr(x1, t0, offset); -+ __ pop_CPU_state(); -+ // restore sp -+ __ leave(); -+ __ bind(L); -+} ++// dst_src1 = dst_src1 - src2 * src3 ++instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+static void gen_c2i_adapter(MacroAssembler *masm, -+ int total_args_passed, -+ int comp_args_on_stack, -+ const BasicType *sig_bt, -+ const VMRegPair *regs, -+ Label& skip_fixup) { -+ // Before we get into the guts of the C2I adapter, see if we should be here -+ // at all. We've come from compiled code and are attempting to jump to the -+ // interpreter, which means the caller made a static call to get here -+ // (vcalls always get a compiled target if there is one). Check for a -+ // compiled target. If there is one, we need to patch the caller's call. -+ patch_callers_callsite(masm); ++// dst_src1 = dst_src1 - src2 * src3 ++instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ __ bind(skip_fixup); ++// dst_src1 = dst_src1 - src2 * src3 ++instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ int words_pushed = 0; ++// dst_src1 = dst_src1 - src2 * src3 ++instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Since all args are passed on the stack, total_args_passed * -+ // Interpreter::stackElementSize is the space we need. ++// vector mul + -+ int extraspace = total_args_passed * Interpreter::stackElementSize; ++instruct vmulB(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (MulVB src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ __ mv(x30, sp); ++instruct vmulS(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (MulVS src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // stack is aligned, keep it that way -+ extraspace = align_up(extraspace, 2 * wordSize); ++instruct vmulI(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (MulVI src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ if (extraspace) { -+ __ sub(sp, sp, extraspace); -+ } ++instruct vmulL(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (MulVL src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Now write the args into the outgoing interpreter space -+ for (int i = 0; i < total_args_passed; i++) { -+ if (sig_bt[i] == T_VOID) { -+ assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half"); -+ continue; -+ } ++instruct vmulF(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (MulVF src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // offset to start parameters -+ int st_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; -+ int next_off = st_off - Interpreter::stackElementSize; -+ -+ // Say 4 args: -+ // i st_off -+ // 0 32 T_LONG -+ // 1 24 T_VOID -+ // 2 16 T_OBJECT -+ // 3 8 T_BOOL -+ // - 0 return address -+ // -+ // However to make thing extra confusing. Because we can fit a Java long/double in -+ // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter -+ // leaves one slot empty and only stores to a single slot. In this case the -+ // slot that is occupied is the T_VOID slot. See I said it was confusing. ++instruct vmulD(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (MulVD src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ VMReg r_1 = regs[i].first(); -+ VMReg r_2 = regs[i].second(); -+ if (!r_1->is_valid()) { -+ assert(!r_2->is_valid(), ""); -+ continue; -+ } -+ if (r_1->is_stack()) { -+ // memory to memory use t0 -+ int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size -+ + extraspace -+ + words_pushed * wordSize); -+ if (!r_2->is_valid()) { -+ __ lwu(t0, Address(sp, ld_off)); -+ __ sd(t0, Address(sp, st_off), /*temp register*/esp); -+ } else { -+ __ ld(t0, Address(sp, ld_off), /*temp register*/esp); ++// vector fneg + -+ // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG -+ // T_DOUBLE and T_LONG use two slots in the interpreter -+ if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { -+ // ld_off == LSW, ld_off+wordSize == MSW -+ // st_off == MSW, next_off == LSW -+ __ sd(t0, Address(sp, next_off), /*temp register*/esp); -+#ifdef ASSERT -+ // Overwrite the unused slot with known junk -+ __ mv(t0, 0xdeadffffdeadaaaaul); -+ __ sd(t0, Address(sp, st_off), /*temp register*/esp); -+#endif /* ASSERT */ -+ } else { -+ __ sd(t0, Address(sp, st_off), /*temp register*/esp); -+ } -+ } -+ } else if (r_1->is_Register()) { -+ Register r = r_1->as_Register(); -+ if (!r_2->is_valid()) { -+ // must be only an int (or less ) so move only 32bits to slot -+ __ sd(r, Address(sp, st_off)); -+ } else { -+ // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG -+ // T_DOUBLE and T_LONG use two slots in the interpreter -+ if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { -+ // jlong/double in gpr -+#ifdef ASSERT -+ // Overwrite the unused slot with known junk -+ __ mv(t0, 0xdeadffffdeadaaabul); -+ __ sd(t0, Address(sp, st_off), /*temp register*/esp); -+#endif /* ASSERT */ -+ __ sd(r, Address(sp, next_off)); -+ } else { -+ __ sd(r, Address(sp, st_off)); -+ } -+ } -+ } else { -+ assert(r_1->is_FloatRegister(), ""); -+ if (!r_2->is_valid()) { -+ // only a float use just part of the slot -+ __ fsw(r_1->as_FloatRegister(), Address(sp, st_off)); -+ } else { -+#ifdef ASSERT -+ // Overwrite the unused slot with known junk -+ __ mv(t0, 0xdeadffffdeadaaacul); -+ __ sd(t0, Address(sp, st_off), /*temp register*/esp); -+#endif /* ASSERT */ -+ __ fsd(r_1->as_FloatRegister(), Address(sp, next_off)); -+ } -+ } -+ } ++instruct vnegF(vReg dst, vReg src) %{ ++ match(Set dst (NegVF src)); ++ ins_cost(VEC_COST); ++ format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ __ mv(esp, sp); // Interp expects args on caller's expression stack ++instruct vnegD(vReg dst, vReg src) %{ ++ match(Set dst (NegVD src)); ++ ins_cost(VEC_COST); ++ format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset()))); -+ __ jr(t0); -+} ++// popcount vector + -+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, -+ int total_args_passed, -+ int comp_args_on_stack, -+ const BasicType *sig_bt, -+ const VMRegPair *regs) { -+ // Cut-out for having no stack args. -+ int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord; -+ if (comp_args_on_stack != 0) { -+ __ sub(t0, sp, comp_words_on_stack * wordSize); -+ __ andi(sp, t0, -16); -+ } ++instruct vpopcountI(iRegINoSp dst, vReg src) %{ ++ match(Set dst (PopCountVI src)); ++ format %{ "vpopc.m $dst, $src\t#@vpopcountI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Will jump to the compiled code just as if compiled code was doing it. -+ // Pre-load the register-jump target early, to schedule it better. -+ __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset()))); ++// vector add reduction + -+ // Now generate the shuffle code. -+ for (int i = 0; i < total_args_passed; i++) { -+ if (sig_bt[i] == T_VOID) { -+ assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half"); -+ continue; -+ } ++instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (AddReductionVI src1 src2)); ++ effect(TEMP tmp); ++ ins_cost(VEC_COST); ++ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t" ++ "vredsum.vs $tmp, $src2, $tmp\n\t" ++ "vmv.x.s $dst, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Pick up 0, 1 or 2 words from SP+offset. ++instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (AddReductionVI src1 src2)); ++ effect(TEMP tmp); ++ ins_cost(VEC_COST); ++ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t" ++ "vredsum.vs $tmp, $src2, $tmp\n\t" ++ "vmv.x.s $dst, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), -+ "scrambled load targets?"); -+ // Load in argument order going down. -+ int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; -+ // Point to interpreter value (vs. tag) -+ int next_off = ld_off - Interpreter::stackElementSize; ++instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (AddReductionVI src1 src2)); ++ effect(TEMP tmp); ++ ins_cost(VEC_COST); ++ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t" ++ "vredsum.vs $tmp, $src2, $tmp\n\t" ++ "vmv.x.s $dst, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ VMReg r_1 = regs[i].first(); -+ VMReg r_2 = regs[i].second(); -+ if (!r_1->is_valid()) { -+ assert(!r_2->is_valid(), ""); -+ continue; -+ } -+ if (r_1->is_stack()) { -+ // Convert stack slot to an SP offset (+ wordSize to account for return address ) -+ int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size; -+ if (!r_2->is_valid()) { -+ __ lw(t0, Address(esp, ld_off)); -+ __ sd(t0, Address(sp, st_off), /*temp register*/t2); -+ } else { -+ // -+ // We are using two optoregs. This can be either T_OBJECT, -+ // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates -+ // two slots but only uses one for thr T_LONG or T_DOUBLE case -+ // So we must adjust where to pick up the data to match the -+ // interpreter. -+ // -+ // Interpreter local[n] == MSW, local[n+1] == LSW however locals -+ // are accessed as negative so LSW is at LOW address ++instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (AddReductionVL src1 src2)); ++ effect(TEMP tmp); ++ ins_cost(VEC_COST); ++ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t" ++ "vredsum.vs $tmp, $src2, $tmp\n\t" ++ "vmv.x.s $dst, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // ld_off is MSW so get LSW -+ const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? -+ next_off : ld_off; -+ __ ld(t0, Address(esp, offset)); -+ // st_off is LSW (i.e. reg.first()) -+ __ sd(t0, Address(sp, st_off), /*temp register*/t2); -+ } -+ } else if (r_1->is_Register()) { // Register argument -+ Register r = r_1->as_Register(); -+ if (r_2->is_valid()) { -+ // -+ // We are using two VMRegs. This can be either T_OBJECT, -+ // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates -+ // two slots but only uses one for thr T_LONG or T_DOUBLE case -+ // So we must adjust where to pick up the data to match the -+ // interpreter. ++instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{ ++ match(Set src1_dst (AddReductionVF src1_dst src2)); ++ effect(TEMP tmp); ++ ins_cost(VEC_COST); ++ format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t" ++ "vfredosum.vs $tmp, $src2, $tmp\n\t" ++ "vfmv.f.s $src1_dst, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); ++ __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp$$reg)); ++ __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? -+ next_off : ld_off; ++instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{ ++ match(Set src1_dst (AddReductionVD src1_dst src2)); ++ effect(TEMP tmp); ++ ins_cost(VEC_COST); ++ format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t" ++ "vfredosum.vs $tmp, $src2, $tmp\n\t" ++ "vfmv.f.s $src1_dst, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); ++ __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp$$reg)); ++ __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // this can be a misaligned move -+ __ ld(r, Address(esp, offset)); -+ } else { -+ // sign extend and use a full word? -+ __ lw(r, Address(esp, ld_off)); -+ } -+ } else { -+ if (!r_2->is_valid()) { -+ __ flw(r_1->as_FloatRegister(), Address(esp, ld_off)); -+ } else { -+ __ fld(r_1->as_FloatRegister(), Address(esp, next_off)); -+ } -+ } -+ } ++// vector integer max reduction ++instruct vreduce_maxB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MaxReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_maxB $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ Label Ldone; ++ __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); ++ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); ++ __ bind(Ldone); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // 6243940 We might end up in handle_wrong_method if -+ // the callee is deoptimized as we race thru here. If that -+ // happens we don't want to take a safepoint because the -+ // caller frame will look interpreted and arguments are now -+ // "compiled" so it is much better to make this transition -+ // invisible to the stack walking code. Unfortunately if -+ // we try and find the callee by normal means a safepoint -+ // is possible. So we stash the desired callee in the thread -+ // and the vm will find there should this case occur. ++instruct vreduce_maxS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MaxReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_maxS $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ Label Ldone; ++ __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); ++ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); ++ __ bind(Ldone); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset())); ++instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MaxReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ __ jr(t1); -+} ++instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MaxReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+// --------------------------------------------------------------- -+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, -+ int total_args_passed, -+ int comp_args_on_stack, -+ const BasicType *sig_bt, -+ const VMRegPair *regs, -+ AdapterFingerPrint* fingerprint) { -+ address i2c_entry = __ pc(); -+ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); ++// vector integer min reduction ++instruct vreduce_minB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MinReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_minB $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ Label Ldone; ++ __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); ++ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); ++ __ bind(Ldone); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ address c2i_unverified_entry = __ pc(); -+ Label skip_fixup; ++instruct vreduce_minS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MinReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_minS $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ Label Ldone; ++ __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); ++ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); ++ __ bind(Ldone); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ Label ok; ++instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MinReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ const Register holder = t1; -+ const Register receiver = j_rarg0; -+ const Register tmp = t2; // A call-clobbered register not used for arg passing ++instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MinReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // ------------------------------------------------------------------------- -+ // Generate a C2I adapter. On entry we know xmethod holds the Method* during calls -+ // to the interpreter. The args start out packed in the compiled layout. They -+ // need to be unpacked into the interpreter layout. This will almost always -+ // require some stack space. We grow the current (compiled) stack, then repack -+ // the args. We finally end in a jump to the generic interpreter entry point. -+ // On exit from the interpreter, the interpreter will restore our SP (lest the -+ // compiled code, which relys solely on SP and not FP, get sick). ++// vector float max reduction + -+ { -+ __ block_comment("c2i_unverified_entry {"); -+ __ load_klass(t0, receiver); -+ __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset())); -+ __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset())); -+ __ beq(t0, tmp, ok); -+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); ++instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MaxReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %} ++ ins_encode %{ ++ __ reduce_minmax_FD_v($dst$$FloatRegister, ++ $src1$$FloatRegister, as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), ++ false /* is_double */, false /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ __ bind(ok); -+ // Method might have been compiled since the call site was patched to -+ // interpreted; if that is the case treat it as a miss so we can get -+ // the call site corrected. -+ __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); -+ __ beqz(t0, skip_fixup); -+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); -+ __ block_comment("} c2i_unverified_entry"); -+ } ++instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MaxReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %} ++ ins_encode %{ ++ __ reduce_minmax_FD_v($dst$$FloatRegister, ++ $src1$$FloatRegister, as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), ++ true /* is_double */, false /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ address c2i_entry = __ pc(); ++// vector float min reduction + -+ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); ++instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MinReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %} ++ ins_encode %{ ++ __ reduce_minmax_FD_v($dst$$FloatRegister, ++ $src1$$FloatRegister, as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), ++ false /* is_double */, true /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ __ flush(); -+ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); -+} ++instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MinReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %} ++ ins_encode %{ ++ __ reduce_minmax_FD_v($dst$$FloatRegister, ++ $src1$$FloatRegister, as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), ++ true /* is_double */, true /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+int SharedRuntime::c_calling_convention(const BasicType *sig_bt, -+ VMRegPair *regs, -+ VMRegPair *regs2, -+ int total_args_passed) { -+ assert(regs2 == NULL, "not needed on riscv"); -+ assert_cond(sig_bt != NULL && regs != NULL); ++// vector Math.rint, floor, ceil + -+ // We return the amount of VMRegImpl stack slots we need to reserve for all -+ // the arguments NOT counting out_preserve_stack_slots. ++instruct vroundD(vReg dst, vReg src, immI rmode) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (RoundDoubleModeV src rmode)); ++ format %{ "vroundD $dst, $src, $rmode" %} ++ ins_encode %{ ++ switch ($rmode$$constant) { ++ case RoundDoubleModeNode::rmode_rint: ++ __ csrwi(CSR_FRM, C2_MacroAssembler::rne); ++ __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ break; ++ case RoundDoubleModeNode::rmode_floor: ++ __ csrwi(CSR_FRM, C2_MacroAssembler::rdn); ++ __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ break; ++ case RoundDoubleModeNode::rmode_ceil: ++ __ csrwi(CSR_FRM, C2_MacroAssembler::rup); ++ __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { -+ c_rarg0, c_rarg1, c_rarg2, c_rarg3, -+ c_rarg4, c_rarg5, c_rarg6, c_rarg7 -+ }; -+ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { -+ c_farg0, c_farg1, c_farg2, c_farg3, -+ c_farg4, c_farg5, c_farg6, c_farg7 -+ }; ++// vector replicate + -+ uint int_args = 0; -+ uint fp_args = 0; -+ uint stk_args = 0; // inc by 2 each time ++instruct replicateB(vReg dst, iRegIorL2I src) %{ ++ match(Set dst (ReplicateB src)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.x $dst, $src\t#@replicateB" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ for (int i = 0; i < total_args_passed; i++) { -+ switch (sig_bt[i]) { -+ case T_BOOLEAN: // fall through -+ case T_CHAR: // fall through -+ case T_BYTE: // fall through -+ case T_SHORT: // fall through -+ case T_INT: -+ if (int_args < Argument::n_int_register_parameters_c) { -+ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); -+ } else { -+ regs[i].set1(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ case T_LONG: // fall through -+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); -+ case T_OBJECT: // fall through -+ case T_ARRAY: // fall through -+ case T_ADDRESS: // fall through -+ case T_METADATA: -+ if (int_args < Argument::n_int_register_parameters_c) { -+ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); -+ } else { -+ regs[i].set2(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ case T_FLOAT: -+ if (fp_args < Argument::n_float_register_parameters_c) { -+ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); -+ } else if (int_args < Argument::n_int_register_parameters_c) { -+ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); -+ } else { -+ regs[i].set1(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ case T_DOUBLE: -+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); -+ if (fp_args < Argument::n_float_register_parameters_c) { -+ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); -+ } else if (int_args < Argument::n_int_register_parameters_c) { -+ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); -+ } else { -+ regs[i].set2(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ case T_VOID: // Halves of longs and doubles -+ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); -+ regs[i].set_bad(); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+ } -+ -+ return stk_args; -+} -+ -+void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { -+ // We always ignore the frame_slots arg and just use the space just below frame pointer -+ // which by this time is free to use -+ switch (ret_type) { -+ case T_FLOAT: -+ __ fsw(f10, Address(fp, -3 * wordSize)); -+ break; -+ case T_DOUBLE: -+ __ fsd(f10, Address(fp, -3 * wordSize)); -+ break; -+ case T_VOID: break; -+ default: { -+ __ sd(x10, Address(fp, -3 * wordSize)); -+ } -+ } -+} -+ -+void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { -+ // We always ignore the frame_slots arg and just use the space just below frame pointer -+ // which by this time is free to use -+ switch (ret_type) { -+ case T_FLOAT: -+ __ flw(f10, Address(fp, -3 * wordSize)); -+ break; -+ case T_DOUBLE: -+ __ fld(f10, Address(fp, -3 * wordSize)); -+ break; -+ case T_VOID: break; -+ default: { -+ __ ld(x10, Address(fp, -3 * wordSize)); -+ } -+ } -+} ++instruct replicateS(vReg dst, iRegIorL2I src) %{ ++ match(Set dst (ReplicateS src)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.x $dst, $src\t#@replicateS" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { -+ RegSet x; -+ for ( int i = first_arg ; i < arg_count ; i++ ) { -+ if (args[i].first()->is_Register()) { -+ x = x + args[i].first()->as_Register(); -+ } else if (args[i].first()->is_FloatRegister()) { -+ __ addi(sp, sp, -2 * wordSize); -+ __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0)); -+ } -+ } -+ __ push_reg(x, sp); -+} ++instruct replicateI(vReg dst, iRegIorL2I src) %{ ++ match(Set dst (ReplicateI src)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.x $dst, $src\t#@replicateI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { -+ RegSet x; -+ for ( int i = first_arg ; i < arg_count ; i++ ) { -+ if (args[i].first()->is_Register()) { -+ x = x + args[i].first()->as_Register(); -+ } else { -+ ; -+ } -+ } -+ __ pop_reg(x, sp); -+ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { -+ if (args[i].first()->is_Register()) { -+ ; -+ } else if (args[i].first()->is_FloatRegister()) { -+ __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0)); -+ __ add(sp, sp, 2 * wordSize); -+ } -+ } -+} ++instruct replicateL(vReg dst, iRegL src) %{ ++ match(Set dst (ReplicateL src)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.x $dst, $src\t#@replicateL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+// Check GCLocker::needs_gc and enter the runtime if it's true. This -+// keeps a new JNI critical region from starting until a GC has been -+// forced. Save down any oops in registers and describe them in an -+// OopMap. -+static void check_needs_gc_for_critical_native(MacroAssembler* masm, -+ int stack_slots, -+ int total_c_args, -+ int total_in_args, -+ int arg_save_area, -+ OopMapSet* oop_maps, -+ VMRegPair* in_regs, -+ BasicType* in_sig_bt) { Unimplemented(); } -+ -+// Unpack an array argument into a pointer to the body and the length -+// if the array is non-null, otherwise pass 0 for both. -+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); } -+ -+class ComputeMoveOrder: public StackObj { -+ class MoveOperation: public ResourceObj { -+ friend class ComputeMoveOrder; -+ private: -+ VMRegPair _src; -+ VMRegPair _dst; -+ int _src_index; -+ int _dst_index; -+ bool _processed; -+ MoveOperation* _next; -+ MoveOperation* _prev; -+ -+ static int get_id(VMRegPair r) { Unimplemented(); return 0; } -+ -+ public: -+ MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst): -+ _src(src) -+ , _dst(dst) -+ , _src_index(src_index) -+ , _dst_index(dst_index) -+ , _processed(false) -+ , _next(NULL) -+ , _prev(NULL) { Unimplemented(); } -+ -+ ~MoveOperation() { -+ _next = NULL; -+ _prev = NULL; -+ } -+ -+ VMRegPair src() const { Unimplemented(); return _src; } -+ int src_id() const { Unimplemented(); return 0; } -+ int src_index() const { Unimplemented(); return 0; } -+ VMRegPair dst() const { Unimplemented(); return _src; } -+ void set_dst(int i, VMRegPair dst) { Unimplemented(); } -+ int dst_index() const { Unimplemented(); return 0; } -+ int dst_id() const { Unimplemented(); return 0; } -+ MoveOperation* next() const { Unimplemented(); return 0; } -+ MoveOperation* prev() const { Unimplemented(); return 0; } -+ void set_processed() { Unimplemented(); } -+ bool is_processed() const { Unimplemented(); return 0; } -+ -+ // insert -+ void break_cycle(VMRegPair temp_register) { Unimplemented(); } -+ -+ void link(GrowableArray& killer) { Unimplemented(); } -+ }; ++instruct replicateB_imm5(vReg dst, immI5 con) %{ ++ match(Set dst (ReplicateB con)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.i $dst, $con\t#@replicateB_imm5" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ private: -+ GrowableArray edges; ++instruct replicateS_imm5(vReg dst, immI5 con) %{ ++ match(Set dst (ReplicateS con)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.i $dst, $con\t#@replicateS_imm5" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ public: -+ ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs, -+ BasicType* in_sig_bt, GrowableArray& arg_order, VMRegPair tmp_vmreg) { Unimplemented(); } ++instruct replicateI_imm5(vReg dst, immI5 con) %{ ++ match(Set dst (ReplicateI con)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.i $dst, $con\t#@replicateI_imm5" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ ~ComputeMoveOrder() {} -+ // Collected all the move operations -+ void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { Unimplemented(); } ++instruct replicateL_imm5(vReg dst, immL5 con) %{ ++ match(Set dst (ReplicateL con)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.i $dst, $con\t#@replicateL_imm5" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Walk the edges breaking cycles between moves. The result list -+ // can be walked in order to produce the proper set of loads -+ GrowableArray* get_store_order(VMRegPair temp_register) { Unimplemented(); return 0; } -+}; ++instruct replicateF(vReg dst, fRegF src) %{ ++ match(Set dst (ReplicateF src)); ++ ins_cost(VEC_COST); ++ format %{ "vfmv.v.f $dst, $src\t#@replicateF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+static void verify_oop_args(MacroAssembler* masm, -+ const methodHandle& method, -+ const BasicType* sig_bt, -+ const VMRegPair* regs) { -+ const Register temp_reg = x9; // not part of any compiled calling seq -+ if (VerifyOops) { -+ for (int i = 0; i < method->size_of_parameters(); i++) { -+ if (sig_bt[i] == T_OBJECT || -+ sig_bt[i] == T_ARRAY) { -+ VMReg r = regs[i].first(); -+ assert(r->is_valid(), "bad oop arg"); -+ if (r->is_stack()) { -+ __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); -+ __ verify_oop(temp_reg); -+ } else { -+ __ verify_oop(r->as_Register()); -+ } -+ } -+ } -+ } -+} ++instruct replicateD(vReg dst, fRegD src) %{ ++ match(Set dst (ReplicateD src)); ++ ins_cost(VEC_COST); ++ format %{ "vfmv.v.f $dst, $src\t#@replicateD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+static void gen_special_dispatch(MacroAssembler* masm, -+ const methodHandle& method, -+ const BasicType* sig_bt, -+ const VMRegPair* regs) { -+ verify_oop_args(masm, method, sig_bt, regs); -+ vmIntrinsics::ID iid = method->intrinsic_id(); ++// vector shift + -+ // Now write the args into the outgoing interpreter space -+ bool has_receiver = false; -+ Register receiver_reg = noreg; -+ int member_arg_pos = -1; -+ Register member_reg = noreg; -+ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); -+ if (ref_kind != 0) { -+ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument -+ member_reg = x9; // known to be free at this point -+ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); -+ } else if (iid == vmIntrinsics::_invokeBasic) { -+ has_receiver = true; -+ } else { -+ fatal("unexpected intrinsic id %d", iid); -+ } ++instruct vasrB(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (RShiftVB src shift)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst); ++ format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t" ++ "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t" ++ "vmnot.m v0, v0\n\t" ++ "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits ++ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); ++ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ BitsPerByte - 1, Assembler::v0_t); ++ // otherwise, shift ++ __ vmnot_m(v0, v0); ++ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ if (member_reg != noreg) { -+ // Load the member_arg into register, if necessary. -+ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); -+ VMReg r = regs[member_arg_pos].first(); -+ if (r->is_stack()) { -+ __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); -+ } else { -+ // no data motion is needed -+ member_reg = r->as_Register(); -+ } -+ } ++instruct vasrS(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (RShiftVS src shift)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst); ++ format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t" ++ "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t" ++ "vmnot.m v0, v0\n\t" ++ "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits ++ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); ++ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ BitsPerShort - 1, Assembler::v0_t); ++ // otherwise, shift ++ __ vmnot_m(v0, v0); ++ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ if (has_receiver) { -+ // Make sure the receiver is loaded into a register. -+ assert(method->size_of_parameters() > 0, "oob"); -+ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); -+ VMReg r = regs[0].first(); -+ assert(r->is_valid(), "bad receiver arg"); -+ if (r->is_stack()) { -+ // Porting note: This assumes that compiled calling conventions always -+ // pass the receiver oop in a register. If this is not true on some -+ // platform, pick a temp and load the receiver from stack. -+ fatal("receiver always in a register"); -+ receiver_reg = x12; // known to be free at this point -+ __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); -+ } else { -+ // no data motion is needed -+ receiver_reg = r->as_Register(); -+ } -+ } ++instruct vasrI(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (RShiftVI src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Figure out which address we are really jumping to: -+ MethodHandles::generate_method_handle_dispatch(masm, iid, -+ receiver_reg, member_reg, /*for_compiler_entry:*/ true); -+} ++instruct vasrL(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (RShiftVL src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+// --------------------------------------------------------------------------- -+// Generate a native wrapper for a given method. The method takes arguments -+// in the Java compiled code convention, marshals them to the native -+// convention (handlizes oops, etc), transitions to native, makes the call, -+// returns to java state (possibly blocking), unhandlizes any result and -+// returns. -+// -+// Critical native functions are a shorthand for the use of -+// GetPrimtiveArrayCritical and disallow the use of any other JNI -+// functions. The wrapper is expected to unpack the arguments before -+// passing them to the callee and perform checks before and after the -+// native call to ensure that they GCLocker -+// lock_critical/unlock_critical semantics are followed. Some other -+// parts of JNI setup are skipped like the tear down of the JNI handle -+// block and the check for pending exceptions it's impossible for them -+// to be thrown. -+// -+// They are roughly structured like this: -+// if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical() -+// tranistion to thread_in_native -+// unpack arrray arguments and call native entry point -+// check for safepoint in progress -+// check if any thread suspend flags are set -+// call into JVM and possible unlock the JNI critical -+// if a GC was suppressed while in the critical native. -+// transition back to thread_in_Java -+// return to caller -+// -+nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, -+ const methodHandle& method, -+ int compile_id, -+ BasicType* in_sig_bt, -+ VMRegPair* in_regs, -+ BasicType ret_type, -+ address critical_entry) { -+ if (method->is_method_handle_intrinsic()) { -+ vmIntrinsics::ID iid = method->intrinsic_id(); -+ intptr_t start = (intptr_t)__ pc(); -+ int vep_offset = ((intptr_t)__ pc()) - start; ++instruct vlslB(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (LShiftVB src shift)); ++ ins_cost(VEC_COST); ++ effect( TEMP_DEF dst); ++ format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t" ++ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" ++ "vmnot.m v0, v0\n\t" ++ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ // if shift > BitsPerByte - 1, clear the element ++ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg), Assembler::v0_t); ++ // otherwise, shift ++ __ vmnot_m(v0, v0); ++ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // First instruction must be a nop as it may need to be patched on deoptimisation -+ __ nop(); -+ gen_special_dispatch(masm, -+ method, -+ in_sig_bt, -+ in_regs); -+ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period -+ __ flush(); -+ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually -+ return nmethod::new_native_nmethod(method, -+ compile_id, -+ masm->code(), -+ vep_offset, -+ frame_complete, -+ stack_slots / VMRegImpl::slots_per_word, -+ in_ByteSize(-1), -+ in_ByteSize(-1), -+ (OopMapSet*)NULL); -+ } -+ bool is_critical_native = true; -+ address native_func = critical_entry; -+ if (native_func == NULL) { -+ native_func = method->native_function(); -+ is_critical_native = false; -+ } -+ assert(native_func != NULL, "must have function"); ++instruct vlslS(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (LShiftVS src shift)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst); ++ format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t" ++ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" ++ "vmnot.m v0, v0\n\t" ++ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ // if shift > BitsPerShort - 1, clear the element ++ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg), Assembler::v0_t); ++ // otherwise, shift ++ __ vmnot_m(v0, v0); ++ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // An OopMap for lock (and class if static) -+ OopMapSet *oop_maps = new OopMapSet(); -+ assert_cond(oop_maps != NULL); -+ intptr_t start = (intptr_t)__ pc(); ++instruct vlslI(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (LShiftVI src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // We have received a description of where all the java arg are located -+ // on entry to the wrapper. We need to convert these args to where -+ // the jni function will expect them. To figure out where they go -+ // we convert the java signature to a C signature by inserting -+ // the hidden arguments as arg[0] and possibly arg[1] (static method) ++instruct vlslL(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (LShiftVL src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ const int total_in_args = method->size_of_parameters(); -+ int total_c_args = total_in_args; -+ if (!is_critical_native) { -+ total_c_args += 1; -+ if (method->is_static()) { -+ total_c_args++; -+ } -+ } else { -+ for (int i = 0; i < total_in_args; i++) { -+ if (in_sig_bt[i] == T_ARRAY) { -+ total_c_args++; -+ } -+ } -+ } ++instruct vlsrB(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (URShiftVB src shift)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst); ++ format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t" ++ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" ++ "vmnot.m v0, v0, v0\n\t" ++ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ // if shift > BitsPerByte - 1, clear the element ++ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg), Assembler::v0_t); ++ // otherwise, shift ++ __ vmnot_m(v0, v0); ++ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); -+ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); -+ assert_cond(out_sig_bt != NULL && out_regs != NULL); -+ BasicType* in_elem_bt = NULL; -+ -+ int argc = 0; -+ if (!is_critical_native) { -+ out_sig_bt[argc++] = T_ADDRESS; -+ if (method->is_static()) { -+ out_sig_bt[argc++] = T_OBJECT; -+ } ++instruct vlsrS(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (URShiftVS src shift)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst); ++ format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t" ++ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" ++ "vmnot.m v0, v0\n\t" ++ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ // if shift > BitsPerShort - 1, clear the element ++ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg), Assembler::v0_t); ++ // otherwise, shift ++ __ vmnot_m(v0, v0); ++ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ for (int i = 0; i < total_in_args ; i++) { -+ out_sig_bt[argc++] = in_sig_bt[i]; -+ } -+ } else { -+ Thread* THREAD = Thread::current(); -+ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); -+ assert_cond(in_elem_bt != NULL); -+ SignatureStream ss(method->signature()); -+ for (int i = 0; i < total_in_args ; i++) { -+ if (in_sig_bt[i] == T_ARRAY) { -+ // Arrays are passed as int, elem* pair -+ out_sig_bt[argc++] = T_INT; -+ out_sig_bt[argc++] = T_ADDRESS; -+ Symbol* atype = ss.as_symbol(CHECK_NULL); -+ const char* at = atype->as_C_string(); -+ if (strlen(at) == 2) { -+ assert(at[0] == '[', "must be"); -+ switch (at[1]) { -+ case 'B': in_elem_bt[i] = T_BYTE; break; -+ case 'C': in_elem_bt[i] = T_CHAR; break; -+ case 'D': in_elem_bt[i] = T_DOUBLE; break; -+ case 'F': in_elem_bt[i] = T_FLOAT; break; -+ case 'I': in_elem_bt[i] = T_INT; break; -+ case 'J': in_elem_bt[i] = T_LONG; break; -+ case 'S': in_elem_bt[i] = T_SHORT; break; -+ case 'Z': in_elem_bt[i] = T_BOOLEAN; break; -+ default: ShouldNotReachHere(); -+ } -+ } -+ } else { -+ out_sig_bt[argc++] = in_sig_bt[i]; -+ in_elem_bt[i] = T_VOID; -+ } -+ if (in_sig_bt[i] != T_VOID) { -+ assert(in_sig_bt[i] == ss.type(), "must match"); -+ ss.next(); -+ } -+ } -+ } + -+ // Now figure out where the args must be stored and how much stack space -+ // they require. -+ int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++instruct vlsrI(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (URShiftVI src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Compute framesize for the wrapper. We need to handlize all oops in -+ // incoming registers + -+ // Calculate the total number of stack slots we will need. ++instruct vlsrL(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (URShiftVL src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // First count the abi requirement plus all of the outgoing args -+ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (RShiftVB src (RShiftCntV shift))); ++ ins_cost(VEC_COST); ++ format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e8); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ if (con >= BitsPerByte) con = BitsPerByte - 1; ++ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Now the space for the inbound oop handle area -+ int total_save_slots = 8 * VMRegImpl::slots_per_word; // 8 arguments passed in registers -+ if (is_critical_native) { -+ // Critical natives may have to call out so they need a save area -+ // for register arguments. -+ int double_slots = 0; -+ int single_slots = 0; -+ for ( int i = 0; i < total_in_args; i++) { -+ if (in_regs[i].first()->is_Register()) { -+ const Register reg = in_regs[i].first()->as_Register(); -+ switch (in_sig_bt[i]) { -+ case T_BOOLEAN: -+ case T_BYTE: -+ case T_SHORT: -+ case T_CHAR: -+ case T_INT: single_slots++; break; -+ case T_ARRAY: // specific to LP64 (7145024) -+ case T_LONG: double_slots++; break; -+ default: ShouldNotReachHere(); -+ } -+ } else if (in_regs[i].first()->is_FloatRegister()) { -+ ShouldNotReachHere(); -+ } ++instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (RShiftVS src (RShiftCntV shift))); ++ ins_cost(VEC_COST); ++ format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e16); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; + } -+ total_save_slots = double_slots * 2 + single_slots; -+ // align the save area -+ if (double_slots != 0) { -+ stack_slots = align_up(stack_slots, 2); ++ if (con >= BitsPerShort) con = BitsPerShort - 1; ++ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (RShiftVI src (RShiftCntV shift))); ++ ins_cost(VEC_COST); ++ format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e32); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; + } -+ } ++ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ int oop_handle_offset = stack_slots; -+ stack_slots += total_save_slots; ++instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ ++ predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); ++ match(Set dst (RShiftVL src (RShiftCntV shift))); ++ ins_cost(VEC_COST); ++ format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e64); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Now any space we need for handlizing a klass if static method ++instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (URShiftVB src (RShiftCntV shift))); ++ ins_cost(VEC_COST); ++ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e8); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ if (con >= BitsPerByte) { ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ int klass_slot_offset = 0; -+ int klass_offset = -1; -+ int lock_slot_offset = 0; -+ bool is_static = false; ++instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (URShiftVS src (RShiftCntV shift))); ++ ins_cost(VEC_COST); ++ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e16); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ if (con >= BitsPerShort) { ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ if (method->is_static()) { -+ klass_slot_offset = stack_slots; -+ stack_slots += VMRegImpl::slots_per_word; -+ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; -+ is_static = true; -+ } ++instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (URShiftVI src (RShiftCntV shift))); ++ ins_cost(VEC_COST); ++ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e32); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Plus a lock if needed ++instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ ++ predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); ++ match(Set dst (URShiftVL src (RShiftCntV shift))); ++ ins_cost(VEC_COST); ++ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e64); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ if (method->is_synchronized()) { -+ lock_slot_offset = stack_slots; -+ stack_slots += VMRegImpl::slots_per_word; -+ } ++instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (LShiftVB src (LShiftCntV shift))); ++ ins_cost(VEC_COST); ++ format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e8); ++ if (con >= BitsPerByte) { ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Now a place (+2) to save return values or temp during shuffling -+ // + 4 for return address (which we own) and saved fp -+ stack_slots += 6; ++instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (LShiftVS src (LShiftCntV shift))); ++ ins_cost(VEC_COST); ++ format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e16); ++ if (con >= BitsPerShort) { ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Ok The space we have allocated will look like: -+ // -+ // -+ // FP-> | | -+ // | 2 slots (ra) | -+ // | 2 slots (fp) | -+ // |---------------------| -+ // | 2 slots for moves | -+ // |---------------------| -+ // | lock box (if sync) | -+ // |---------------------| <- lock_slot_offset -+ // | klass (if static) | -+ // |---------------------| <- klass_slot_offset -+ // | oopHandle area | -+ // |---------------------| <- oop_handle_offset (8 java arg registers) -+ // | outbound memory | -+ // | based arguments | -+ // | | -+ // |---------------------| -+ // | | -+ // SP-> | out_preserved_slots | -+ // -+ // ++instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (LShiftVI src (LShiftCntV shift))); ++ ins_cost(VEC_COST); ++ format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} + ++instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ ++ predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); ++ match(Set dst (LShiftVL src (LShiftCntV shift))); ++ ins_cost(VEC_COST); ++ format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Now compute actual number of stack words we need rounding to make -+ // stack properly aligned. -+ stack_slots = align_up(stack_slots, StackAlignmentInSlots); ++instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || ++ n->bottom_type()->is_vect()->element_basic_type() == T_CHAR); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // First thing make an ic check to see if we should even be here ++instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // We are free to use all registers as temps without saving them and -+ // restoring them except fp. fp is the only callee save register -+ // as far as the interpreter and the compiler(s) are concerned. ++instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + ++// vector sqrt + -+ const Register ic_reg = t1; -+ const Register receiver = j_rarg0; ++instruct vsqrtF(vReg dst, vReg src) %{ ++ match(Set dst (SqrtVF src)); ++ ins_cost(VEC_COST); ++ format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ Label hit; -+ Label exception_pending; ++instruct vsqrtD(vReg dst, vReg src) %{ ++ match(Set dst (SqrtVD src)); ++ ins_cost(VEC_COST); ++ format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ assert_different_registers(ic_reg, receiver, t0); -+ __ verify_oop(receiver); -+ __ cmp_klass(receiver, ic_reg, t0, hit); ++// vector sub + -+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); ++instruct vsubB(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (SubVB src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Verified entry point must be aligned -+ __ align(8); ++instruct vsubS(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (SubVS src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ __ bind(hit); ++instruct vsubI(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (SubVI src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ int vep_offset = ((intptr_t)__ pc()) - start; ++instruct vsubL(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (SubVL src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // If we have to make this method not-entrant we'll overwrite its -+ // first instruction with a jump. -+ __ nop(); ++instruct vsubF(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (SubVF src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Generate stack overflow check -+ if (UseStackBanging) { -+ __ bang_stack_with_offset(checked_cast(JavaThread::stack_shadow_zone_size())); -+ } else { -+ Unimplemented(); -+ } ++instruct vsubD(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (SubVD src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} + -+ // Generate a new frame for the wrapper. -+ __ enter(); -+ // -2 because return address is already present and so is saved fp -+ __ sub(sp, sp, stack_size - 2 * wordSize); ++instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, ++ iRegI_R10 result, vReg_V1 v1, ++ vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) ++%{ ++ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); + -+ // Frame is now completed as far as size and linkage. -+ int frame_complete = ((intptr_t)__ pc()) - start; ++ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} ++ ins_encode %{ ++ // Count is in 8-bit bytes; non-Compact chars are 16 bits. ++ __ string_equals_v($str1$$Register, $str2$$Register, ++ $result$$Register, $cnt$$Register, 1); ++ %} ++ ins_pipe(pipe_class_memory); ++%} + -+ // We use x18 as the oop handle for the receiver/klass -+ // It is callee save so it survives the call to native ++instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, ++ iRegI_R10 result, vReg_V1 v1, ++ vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) ++%{ ++ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); + -+ const Register oop_handle_reg = x18; ++ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} ++ ins_encode %{ ++ // Count is in 8-bit bytes; non-Compact chars are 16 bits. ++ __ string_equals_v($str1$$Register, $str2$$Register, ++ $result$$Register, $cnt$$Register, 2); ++ %} ++ ins_pipe(pipe_class_memory); ++%} + -+ if (is_critical_native) { -+ check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, -+ oop_handle_offset, oop_maps, in_regs, in_sig_bt); -+ } ++instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) ++%{ ++ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (AryEq ary1 ary2)); ++ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); + -+ // -+ // We immediately shuffle the arguments so that any vm call we have to -+ // make from here on out (sync slow path, jvmti, etc.) we will have -+ // captured the oops from our caller and have a valid oopMap for -+ // them. ++ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} ++ ins_encode %{ ++ __ arrays_equals_v($ary1$$Register, $ary2$$Register, ++ $result$$Register, $tmp$$Register, 1); ++ %} ++ ins_pipe(pipe_class_memory); ++%} + -+ // ----------------- -+ // The Grand Shuffle ++instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) ++%{ ++ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (AryEq ary1 ary2)); ++ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); + -+ // The Java calling convention is either equal (linux) or denser (win64) than the -+ // c calling convention. However the because of the jni_env argument the c calling -+ // convention always has at least one more (and two for static) arguments than Java. -+ // Therefore if we move the args from java -> c backwards then we will never have -+ // a register->register conflict and we don't have to build a dependency graph -+ // and figure out how to break any cycles. -+ // -+ -+ // Record esp-based slot for receiver on stack for non-static methods -+ int receiver_offset = -1; -+ -+ // This is a trick. We double the stack slots so we can claim -+ // the oops in the caller's frame. Since we are sure to have -+ // more args than the caller doubling is enough to make -+ // sure we can capture all the incoming oop args from the -+ // caller. -+ // -+ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); -+ assert_cond(map != NULL); ++ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} ++ ins_encode %{ ++ __ arrays_equals_v($ary1$$Register, $ary2$$Register, ++ $result$$Register, $tmp$$Register, 2); ++ %} ++ ins_pipe(pipe_class_memory); ++%} + -+ int float_args = 0; -+ int int_args = 0; ++instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, ++ iRegP_R28 tmp1, iRegL_R29 tmp2) ++%{ ++ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, ++ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + -+#ifdef ASSERT -+ bool reg_destroyed[RegisterImpl::number_of_registers]; -+ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; -+ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { -+ reg_destroyed[r] = false; -+ } -+ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { -+ freg_destroyed[f] = false; -+ } ++ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} ++ ins_encode %{ ++ // Count is in 8-bit bytes; non-Compact chars are 16 bits. ++ __ string_compare_v($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ StrIntrinsicNode::UU); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, ++ iRegP_R28 tmp1, iRegL_R29 tmp2) ++%{ ++ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, ++ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + -+#endif /* ASSERT */ ++ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} ++ ins_encode %{ ++ __ string_compare_v($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ StrIntrinsicNode::LL); ++ %} ++ ins_pipe(pipe_class_memory); ++%} + -+ // This may iterate in two different directions depending on the -+ // kind of native it is. The reason is that for regular JNI natives -+ // the incoming and outgoing registers are offset upwards and for -+ // critical natives they are offset down. -+ GrowableArray arg_order(2 * total_in_args); -+ VMRegPair tmp_vmreg; -+ tmp_vmreg.set2(x9->as_VMReg()); ++instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, ++ iRegP_R28 tmp1, iRegL_R29 tmp2) ++%{ ++ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, ++ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + -+ if (!is_critical_native) { -+ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { -+ arg_order.push(i); -+ arg_order.push(c_arg); -+ } -+ } else { -+ // Compute a valid move order, using tmp_vmreg to break any cycles -+ ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); -+ } ++ format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} ++ ins_encode %{ ++ __ string_compare_v($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ StrIntrinsicNode::UL); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, ++ iRegP_R28 tmp1, iRegL_R29 tmp2) ++%{ ++ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); ++ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, ++ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + -+ int temploc = -1; -+ for (int ai = 0; ai < arg_order.length(); ai += 2) { -+ int i = arg_order.at(ai); -+ int c_arg = arg_order.at(ai + 1); -+ __ block_comment(err_msg("mv %d -> %d", i, c_arg)); -+ if (c_arg == -1) { -+ assert(is_critical_native, "should only be required for critical natives"); -+ // This arg needs to be moved to a temporary -+ __ mv(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); -+ in_regs[i] = tmp_vmreg; -+ temploc = i; -+ continue; -+ } else if (i == -1) { -+ assert(is_critical_native, "should only be required for critical natives"); -+ // Read from the temporary location -+ assert(temploc != -1, "must be valid"); -+ i = temploc; -+ temploc = -1; -+ } -+#ifdef ASSERT -+ if (in_regs[i].first()->is_Register()) { -+ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); -+ } else if (in_regs[i].first()->is_FloatRegister()) { -+ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); -+ } -+ if (out_regs[c_arg].first()->is_Register()) { -+ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; -+ } else if (out_regs[c_arg].first()->is_FloatRegister()) { -+ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; -+ } -+#endif /* ASSERT */ -+ switch (in_sig_bt[i]) { -+ case T_ARRAY: -+ if (is_critical_native) { -+ unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); -+ c_arg++; -+#ifdef ASSERT -+ if (out_regs[c_arg].first()->is_Register()) { -+ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; -+ } else if (out_regs[c_arg].first()->is_FloatRegister()) { -+ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; -+ } -+#endif -+ int_args++; -+ break; -+ } -+ // no break -+ case T_OBJECT: -+ assert(!is_critical_native, "no oop arguments"); -+ __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], -+ ((i == 0) && (!is_static)), -+ &receiver_offset); -+ int_args++; -+ break; -+ case T_VOID: -+ break; ++ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} ++ ins_encode %{ ++ __ string_compare_v($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ StrIntrinsicNode::LU); ++ %} ++ ins_pipe(pipe_class_memory); ++%} + -+ case T_FLOAT: -+ __ float_move(in_regs[i], out_regs[c_arg]); -+ float_args++; -+ break; ++// fast byte[] to char[] inflation ++instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len, ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) ++%{ ++ predicate(UseRVV); ++ match(Set dummy (StrInflatedCopy src (Binary dst len))); ++ effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len); + -+ case T_DOUBLE: -+ assert( i + 1 < total_in_args && -+ in_sig_bt[i + 1] == T_VOID && -+ out_sig_bt[c_arg + 1] == T_VOID, "bad arg list"); -+ __ double_move(in_regs[i], out_regs[c_arg]); -+ float_args++; -+ break; ++ format %{ "String Inflate $src,$dst" %} ++ ins_encode %{ ++ __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register); ++ %} ++ ins_pipe(pipe_class_memory); ++%} + -+ case T_LONG : -+ __ long_move(in_regs[i], out_regs[c_arg]); -+ int_args++; -+ break; ++// encode char[] to byte[] in ISO_8859_1 ++instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) ++%{ ++ predicate(UseRVV); ++ match(Set result (EncodeISOArray src (Binary dst len))); ++ effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, ++ TEMP v1, TEMP v2, TEMP v3, TEMP tmp); + -+ case T_ADDRESS: -+ assert(false, "found T_ADDRESS in java args"); -+ break; ++ format %{ "Encode array $src,$dst,$len -> $result" %} ++ ins_encode %{ ++ __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register, ++ $result$$Register, $tmp$$Register); ++ %} ++ ins_pipe( pipe_class_memory ); ++%} + -+ default: -+ __ move32_64(in_regs[i], out_regs[c_arg]); -+ int_args++; -+ } -+ } ++// fast char[] to byte[] compression ++instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) ++%{ ++ predicate(UseRVV); ++ match(Set result (StrCompressedCopy src (Binary dst len))); ++ effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, ++ TEMP v1, TEMP v2, TEMP v3, TEMP tmp); + -+ // point c_arg at the first arg that is already loaded in case we -+ // need to spill before we call out -+ int c_arg = total_c_args - total_in_args; ++ format %{ "String Compress $src,$dst -> $result // KILL R11, R12, R13" %} ++ ins_encode %{ ++ __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register, ++ $result$$Register, $tmp$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} + -+ // Pre-load a static method's oop into c_rarg1. -+ if (method->is_static() && !is_critical_native) { ++instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, iRegL tmp) ++%{ ++ predicate(UseRVV); ++ match(Set result (CountPositives ary len)); ++ effect(USE_KILL ary, USE_KILL len, TEMP tmp); + -+ // load oop into a register -+ __ movoop(c_rarg1, -+ JNIHandles::make_local(method->method_holder()->java_mirror()), -+ /*immediate*/true); ++ format %{ "count positives byte[] $ary, $len -> $result" %} ++ ins_encode %{ ++ __ count_positives_v($ary$$Register, $len$$Register, $result$$Register, $tmp$$Register); ++ %} + -+ // Now handlize the static class mirror it's known not-null. -+ __ sd(c_rarg1, Address(sp, klass_offset)); -+ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); ++ ins_pipe(pipe_slow); ++%} + -+ // Now get the handle -+ __ la(c_rarg1, Address(sp, klass_offset)); -+ // and protect the arg if we must spill -+ c_arg--; -+ } ++instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) ++%{ ++ predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, ++ TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); + -+ // Change state to native (we save the return address in the thread, since it might not -+ // be pushed on the stack when we do a stack traversal). -+ // We use the same pc/oopMap repeatedly when we call out ++ format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %} + -+ Label native_return; -+ __ set_last_Java_frame(sp, noreg, native_return, t0); ++ ins_encode %{ ++ __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register, ++ $result$$Register, $tmp1$$Register, $tmp2$$Register, ++ false /* isL */); ++ %} + -+ Label dtrace_method_entry, dtrace_method_entry_done; -+ { -+ int32_t offset = 0; -+ __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset); -+ __ lbu(t0, Address(t0, offset)); -+ __ addw(t0, t0, zr); -+ __ bnez(t0, dtrace_method_entry); -+ __ bind(dtrace_method_entry_done); -+ } ++ ins_pipe(pipe_class_memory); ++%} + -+ // RedefineClasses() tracing support for obsolete method entry -+ if (log_is_enabled(Trace, redefine, class, obsolete)) { -+ // protect the args we've loaded -+ save_args(masm, total_c_args, c_arg, out_regs); -+ __ mov_metadata(c_rarg1, method()); -+ __ call_VM_leaf( -+ CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), -+ xthread, c_rarg1); -+ restore_args(masm, total_c_args, c_arg, out_regs); -+ } ++instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) ++%{ ++ predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, ++ TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); + -+ // Lock a synchronized method ++ format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %} + -+ // Register definitions used by locking and unlocking ++ ins_encode %{ ++ __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register, ++ $result$$Register, $tmp1$$Register, $tmp2$$Register, ++ true /* isL */); ++ %} + -+ const Register swap_reg = x10; -+ const Register obj_reg = x9; // Will contain the oop -+ const Register lock_reg = x30; // Address of compiler lock object (BasicLock) -+ const Register old_hdr = x30; // value of old header at unlock time -+ const Register tmp = ra; ++ ins_pipe(pipe_class_memory); ++%} + -+ Label slow_path_lock; -+ Label lock_done; ++// clearing of an array ++instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, ++ vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3) ++%{ ++ predicate(UseRVV); ++ match(Set dummy (ClearArray cnt base)); ++ effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3); + -+ if (method->is_synchronized()) { -+ assert(!is_critical_native, "unhandled"); ++ format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} + -+ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); ++ ins_encode %{ ++ __ clear_array_v($base$$Register, $cnt$$Register); ++ %} + -+ // Get the handle (the 2nd argument) -+ __ mv(oop_handle_reg, c_rarg1); ++ ins_pipe(pipe_class_memory); ++%} +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +new file mode 100644 +index 00000000000..f85d4b25a76 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -0,0 +1,2761 @@ ++/* ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // Get address of the box ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/debugInfoRec.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "compiler/oopMap.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "logging/log.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klass.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/align.hpp" ++#include "utilities/formatBuffer.hpp" ++#include "vmreg_riscv.inline.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_Runtime1.hpp" ++#endif ++#ifdef COMPILER2 ++#include "adfiles/ad_riscv.hpp" ++#include "opto/runtime.hpp" ++#endif + -+ __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++#define __ masm-> + -+ // Load the oop from the handle -+ __ ld(obj_reg, Address(oop_handle_reg, 0)); ++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; + -+ if (UseBiasedLocking) { -+ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock); -+ } ++class SimpleRuntimeFrame { ++public: + -+ // Load (object->mark() | 1) into swap_reg % x10 -+ __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); -+ __ ori(swap_reg, t0, 1); ++ // Most of the runtime stubs have this simple frame layout. ++ // This class exists to make the layout shared in one place. ++ // Offsets are for compiler stack slots, which are jints. ++ enum layout { ++ // The frame sender code expects that fp will be in the "natural" place and ++ // will override any oopMap setting for it. We must therefore force the layout ++ // so that it agrees with the frame sender code. ++ // we don't expect any arg reg save area so riscv asserts that ++ // frame::arg_reg_save_area_bytes == 0 ++ fp_off = 0, fp_off2, ++ return_off, return_off2, ++ framesize ++ }; ++}; + -+ // Save (object->mark() | 1) into BasicLock's displaced header -+ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); ++class RegisterSaver { ++ const bool _save_vectors; ++ public: ++ RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {} ++ ~RegisterSaver() {} ++ OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); ++ void restore_live_registers(MacroAssembler* masm); + -+ // src -> dest if dest == x10 else x10 <- dest -+ { -+ Label here; -+ __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL); ++ // Offsets into the register save area ++ // Used by deoptimization when it is managing result register ++ // values on its own ++ // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4) ++ // |---v0---|<---SP ++ // |---v1---|save vectors only in generate_handler_blob ++ // |-- .. --| ++ // |---v31--|----- ++ // |---f0---| ++ // |---f1---| ++ // | .. | ++ // |---f31--| ++ // |---reserved slot for stack alignment---| ++ // |---x5---| ++ // | x6 | ++ // |---.. --| ++ // |---x31--| ++ // |---fp---| ++ // |---ra---| ++ int v0_offset_in_bytes(void) { return 0; } ++ int f0_offset_in_bytes(void) { ++ int f0_offset = 0; ++#ifdef COMPILER2 ++ if (_save_vectors) { ++ f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers * ++ BytesPerInt; + } -+ -+ // Test if the oopMark is an obvious stack pointer, i.e., -+ // 1) (mark & 3) == 0, and -+ // 2) sp <= mark < mark + os::pagesize() -+ // These 3 tests can be done by evaluating the following -+ // expression: ((mark - sp) & (3 - os::vm_page_size())), -+ // assuming both stack pointer and pagesize have their -+ // least significant 2 bits clear. -+ // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg -+ -+ __ sub(swap_reg, swap_reg, sp); -+ __ andi(swap_reg, swap_reg, 3 - os::vm_page_size()); -+ -+ // Save the test result, for recursive case, the result is zero -+ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); -+ __ bnez(swap_reg, slow_path_lock); -+ -+ // Slow path will re-enter here -+ -+ __ bind(lock_done); ++#endif ++ return f0_offset; + } -+ -+ -+ // Finally just about ready to make the JNI call -+ -+ // get JNIEnv* which is first argument to native -+ if (!is_critical_native) { -+ __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset()))); ++ int reserved_slot_offset_in_bytes(void) { ++ return f0_offset_in_bytes() + ++ FloatRegisterImpl::max_slots_per_register * ++ FloatRegisterImpl::number_of_registers * ++ BytesPerInt; + } + -+ // Now set thread in native -+ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); -+ __ mv(t0, _thread_in_native); -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sw(t0, Address(t1)); -+ -+ __ rt_call(native_func); ++ int reg_offset_in_bytes(Register r) { ++ assert (r->encoding() > 4, "ra, sp, gp and tp not saved"); ++ return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize; ++ } + -+ __ bind(native_return); ++ int freg_offset_in_bytes(FloatRegister f) { ++ return f0_offset_in_bytes() + f->encoding() * wordSize; ++ } + -+ intptr_t return_pc = (intptr_t) __ pc(); -+ oop_maps->add_gc_map(return_pc - start, map); ++ int ra_offset_in_bytes(void) { ++ return reserved_slot_offset_in_bytes() + ++ (RegisterImpl::number_of_registers - 3) * ++ RegisterImpl::max_slots_per_register * ++ BytesPerInt; ++ } ++}; + -+ // Unpack native results. -+ if(ret_type != T_OBJECT && ret_type != T_ARRAY) { -+ __ cast_primitive_type(ret_type, x10); ++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { ++ int vector_size_in_bytes = 0; ++ int vector_size_in_slots = 0; ++#ifdef COMPILER2 ++ if (_save_vectors) { ++ vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE); ++ vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT); + } ++#endif + -+ // Switch thread to "native transition" state before reading the synchronization state. -+ // This additional state is necessary because reading and testing the synchronization -+ // state is not atomic w.r.t. GC, as this scenario demonstrates: -+ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. -+ // VM thread changes sync state to synchronizing and suspends threads for GC. -+ // Thread A is resumed to finish this native method, but doesn't block here since it -+ // didn't see any synchronization is progress, and escapes. -+ __ mv(t0, _thread_in_native_trans); ++ assert_cond(masm != NULL && total_frame_words != NULL); ++ int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16); ++ // OopMap frame size is in compiler stack slots (jint's) not bytes or words ++ int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; ++ // The caller will allocate additional_frame_words ++ int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt; ++ // CodeBlob frame size is in words. ++ int frame_size_in_words = frame_size_in_bytes / wordSize; ++ *total_frame_words = frame_size_in_words; + -+ if(os::is_MP()) { -+ if (UseMembar) { -+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); ++ // Save Integer, Float and Vector registers. ++ __ enter(); ++ __ push_CPU_state(_save_vectors, vector_size_in_bytes); + -+ // Force this write out before the read below -+ __ membar(MacroAssembler::AnyAny); -+ } else { -+ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sw(t0, Address(t1)); ++ // Set an oopmap for the call site. This oopmap will map all ++ // oop-registers and debug-info registers as callee-saved. This ++ // will allow deoptimization at this safepoint to find all possible ++ // debug-info recordings, as well as let GC find all oops. ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* oop_map = new OopMap(frame_size_in_slots, 0); ++ assert_cond(oop_maps != NULL && oop_map != NULL); + -+ // Write serialization page so VM thread can do a pseudo remote membar. -+ // We use the current thread pointer to calculate a thread specific -+ // offset to write to within the page. This minimizes bus traffic -+ // due to cache line collision. -+ __ serialize_memory(xthread, x12, t0); ++ int sp_offset_in_slots = 0; ++ int step_in_slots = 0; ++ if (_save_vectors) { ++ step_in_slots = vector_size_in_slots; ++ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { ++ VectorRegister r = as_VectorRegister(i); ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); + } -+ } else { -+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); + } + -+ // check for safepoint operation in progress and/or pending suspend requests -+ Label safepoint_in_progress, safepoint_in_progress_done; -+ { -+ __ safepoint_poll_acquire(safepoint_in_progress); -+ __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset())); -+ __ bnez(t0, safepoint_in_progress); -+ __ bind(safepoint_in_progress_done); ++ step_in_slots = FloatRegisterImpl::max_slots_per_register; ++ for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { ++ FloatRegister r = as_FloatRegister(i); ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); + } + -+ // change thread state -+ Label after_transition; -+ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); -+ __ mv(t0, _thread_in_Java); -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sw(t0, Address(t1)); -+ __ bind(after_transition); -+ -+ Label reguard; -+ Label reguard_done; -+ __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset())); -+ __ mv(t1, JavaThread::stack_guard_yellow_reserved_disabled); -+ __ beq(t0, t1, reguard); -+ __ bind(reguard_done); -+ -+ // native result if any is live ++ step_in_slots = RegisterImpl::max_slots_per_register; ++ // skip the slot reserved for alignment, see MacroAssembler::push_reg; ++ // also skip x5 ~ x6 on the stack because they are caller-saved registers. ++ sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3; ++ // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack. ++ for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { ++ Register r = as_Register(i); ++ if (r != xthread) { ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg()); ++ } ++ } + -+ // Unlock -+ Label unlock_done; -+ Label slow_path_unlock; -+ if (method->is_synchronized()) { ++ return oop_map; ++} + -+ // Get locked oop from the handle we passed to jni -+ __ ld(obj_reg, Address(oop_handle_reg, 0)); ++void RegisterSaver::restore_live_registers(MacroAssembler* masm) { ++ assert_cond(masm != NULL); ++#ifdef COMPILER2 ++ __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE)); ++#else ++ __ pop_CPU_state(_save_vectors); ++#endif ++ __ leave(); ++} + -+ Label done; ++// Is vector's size (in bytes) bigger than a size saved by default? ++// riscv does not ovlerlay the floating-point registers on vector registers like aarch64. ++bool SharedRuntime::is_wide_vector(int size) { ++ return UseRVV; ++} + -+ if (UseBiasedLocking) { -+ __ biased_locking_exit(obj_reg, old_hdr, done); -+ } ++// The java_calling_convention describes stack locations as ideal slots on ++// a frame with no abi restrictions. Since we must observe abi restrictions ++// (like the placement of the register window) the slots must be biased by ++// the following value. ++static int reg2offset_in(VMReg r) { ++ // Account for saved fp and ra ++ // This should really be in_preserve_stack_slots ++ return r->reg2stack() * VMRegImpl::stack_slot_size; ++} + -+ // Simple recursive lock? ++static int reg2offset_out(VMReg r) { ++ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++} + -+ __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); -+ __ beqz(t0, done); ++// --------------------------------------------------------------------------- ++// Read the array of BasicTypes from a signature, and compute where the ++// arguments should go. Values in the VMRegPair regs array refer to 4-byte ++// quantities. Values less than VMRegImpl::stack0 are registers, those above ++// refer to 4-byte stack slots. All stack slots are based off of the stack pointer ++// as framesizes are fixed. ++// VMRegImpl::stack0 refers to the first slot 0(sp). ++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register ++// up to RegisterImpl::number_of_registers) are the 64-bit ++// integer registers. + -+ // Must save x10 if if it is live now because cmpxchg must use it -+ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { -+ save_native_result(masm, ret_type, stack_slots); -+ } ++// Note: the INPUTS in sig_bt are in units of Java argument words, ++// which are 64-bit. The OUTPUTS are in 32-bit units. + ++// The Java calling convention is a "shifted" version of the C ABI. ++// By skipping the first C ABI register we can call non-static jni ++// methods with small numbers of arguments without having to shuffle ++// the arguments at all. Since we control the java ABI we ought to at ++// least get some advantage out of it. + -+ // get address of the stack lock -+ __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); -+ // get old displaced header -+ __ ld(old_hdr, Address(x10, 0)); ++int SharedRuntime::java_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ int total_args_passed) { ++ // Create the mapping between argument positions and ++ // registers. ++ static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { ++ j_rarg0, j_rarg1, j_rarg2, j_rarg3, ++ j_rarg4, j_rarg5, j_rarg6, j_rarg7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { ++ j_farg0, j_farg1, j_farg2, j_farg3, ++ j_farg4, j_farg5, j_farg6, j_farg7 ++ }; + -+ // Atomic swap old header if oop still contains the stack lock -+ Label succeed; -+ __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock); -+ __ bind(succeed); ++ uint int_args = 0; ++ uint fp_args = 0; ++ uint stk_args = 0; // inc by 2 each time + -+ // slow path re-enters here -+ __ bind(unlock_done); -+ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { -+ restore_native_result(masm, ret_type, stack_slots); ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_BOOLEAN: // fall through ++ case T_CHAR: // fall through ++ case T_BYTE: // fall through ++ case T_SHORT: // fall through ++ case T_INT: ++ if (int_args < Argument::n_int_register_parameters_j) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_VOID: ++ // halves of T_LONG or T_DOUBLE ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_LONG: // fall through ++ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); ++ case T_OBJECT: // fall through ++ case T_ARRAY: // fall through ++ case T_ADDRESS: ++ if (int_args < Argument::n_int_register_parameters_j) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (fp_args < Argument::n_float_register_parameters_j) { ++ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (fp_args < Argument::n_float_register_parameters_j) { ++ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); + } -+ -+ __ bind(done); -+ } -+ -+ Label dtrace_method_exit, dtrace_method_exit_done; -+ { -+ int32_t offset = 0; -+ __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset); -+ __ lbu(t0, Address(t0, offset)); -+ __ bnez(t0, dtrace_method_exit); -+ __ bind(dtrace_method_exit_done); + } + -+ __ reset_last_Java_frame(false); ++ return align_up(stk_args, 2); ++} + -+ // Unbox oop result, e.g. JNIHandles::resolve result. -+ if (ret_type == T_OBJECT || ret_type == T_ARRAY) { -+ __ resolve_jobject(x10, xthread, t1); -+ } ++// Patch the callers callsite with entry to compiled code if it exists. ++static void patch_callers_callsite(MacroAssembler *masm) { ++ assert_cond(masm != NULL); ++ Label L; ++ __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); ++ __ beqz(t0, L); + -+ if (CheckJNICalls) { -+ // clear_pending_jni_exception_check -+ __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset())); -+ } ++ __ enter(); ++ __ push_CPU_state(); + -+ if (!is_critical_native) { -+ // reset handle block -+ __ ld(x12, Address(xthread, JavaThread::active_handles_offset())); -+ __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes())); -+ } ++ // VM needs caller's callsite ++ // VM needs target method ++ // This needs to be a long call since we will relocate this adapter to ++ // the codeBuffer and it may not reach + -+ __ leave(); ++#ifndef PRODUCT ++ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#endif + -+ if (!is_critical_native) { -+ // Any exception pending? -+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); -+ __ bnez(t0, exception_pending); -+ } ++ __ mv(c_rarg0, xmethod); ++ __ mv(c_rarg1, ra); ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset); ++ __ jalr(x1, t0, offset); + -+ // We're done -+ __ ret(); ++ // Explicit fence.i required because fixup_callers_callsite may change the code ++ // stream. ++ __ safepoint_ifence(); + -+ // Unexpected paths are out of line and go here ++ __ pop_CPU_state(); ++ // restore sp ++ __ leave(); ++ __ bind(L); ++} + -+ if (!is_critical_native) { -+ // forward the exception -+ __ bind(exception_pending); ++static void gen_c2i_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ Label& skip_fixup) { ++ // Before we get into the guts of the C2I adapter, see if we should be here ++ // at all. We've come from compiled code and are attempting to jump to the ++ // interpreter, which means the caller made a static call to get here ++ // (vcalls always get a compiled target if there is one). Check for a ++ // compiled target. If there is one, we need to patch the caller's call. ++ patch_callers_callsite(masm); + -+ // and forward the exception -+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); -+ } ++ __ bind(skip_fixup); + -+ // Slow path locking & unlocking -+ if (method->is_synchronized()) { ++ int words_pushed = 0; + -+ __ block_comment("Slow path lock {"); -+ __ bind(slow_path_lock); ++ // Since all args are passed on the stack, total_args_passed * ++ // Interpreter::stackElementSize is the space we need. + -+ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM -+ // args are (oop obj, BasicLock* lock, JavaThread* thread) ++ int extraspace = total_args_passed * Interpreter::stackElementSize; + -+ // protect the args we've loaded -+ save_args(masm, total_c_args, c_arg, out_regs); ++ __ mv(x30, sp); + -+ __ mv(c_rarg0, obj_reg); -+ __ mv(c_rarg1, lock_reg); -+ __ mv(c_rarg2, xthread); ++ // stack is aligned, keep it that way ++ extraspace = align_up(extraspace, 2 * wordSize); + -+ // Not a leaf but we have last_Java_frame setup as we want -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); -+ restore_args(masm, total_c_args, c_arg, out_regs); ++ if (extraspace) { ++ __ sub(sp, sp, extraspace); ++ } + -+#ifdef ASSERT -+ { Label L; -+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); -+ __ beqz(t0, L); -+ __ stop("no pending exception allowed on exit from monitorenter"); -+ __ bind(L); ++ // Now write the args into the outgoing interpreter space ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half"); ++ continue; + } -+#endif -+ __ j(lock_done); + -+ __ block_comment("} Slow path lock"); ++ // offset to start parameters ++ int st_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; ++ int next_off = st_off - Interpreter::stackElementSize; + -+ __ block_comment("Slow path unlock {"); -+ __ bind(slow_path_unlock); ++ // Say 4 args: ++ // i st_off ++ // 0 32 T_LONG ++ // 1 24 T_VOID ++ // 2 16 T_OBJECT ++ // 3 8 T_BOOL ++ // - 0 return address ++ // ++ // However to make thing extra confusing. Because we can fit a Java long/double in ++ // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter ++ // leaves one slot empty and only stores to a single slot. In this case the ++ // slot that is occupied is the T_VOID slot. See I said it was confusing. + -+ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { -+ save_native_result(masm, ret_type, stack_slots); ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; + } ++ if (r_1->is_stack()) { ++ // memory to memory use t0 ++ int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size ++ + extraspace ++ + words_pushed * wordSize); ++ if (!r_2->is_valid()) { ++ __ lwu(t0, Address(sp, ld_off)); ++ __ sd(t0, Address(sp, st_off), /*temp register*/esp); ++ } else { ++ __ ld(t0, Address(sp, ld_off), /*temp register*/esp); + -+ __ mv(c_rarg2, xthread); -+ __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); -+ __ mv(c_rarg0, obj_reg); ++ // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG ++ // T_DOUBLE and T_LONG use two slots in the interpreter ++ if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { ++ // ld_off == LSW, ld_off+wordSize == MSW ++ // st_off == MSW, next_off == LSW ++ __ sd(t0, Address(sp, next_off), /*temp register*/esp); ++#ifdef ASSERT ++ // Overwrite the unused slot with known junk ++ __ li(t0, 0xdeadffffdeadaaaaul); ++ __ sd(t0, Address(sp, st_off), /*temp register*/esp); ++#endif /* ASSERT */ ++ } else { ++ __ sd(t0, Address(sp, st_off), /*temp register*/esp); ++ } ++ } ++ } else if (r_1->is_Register()) { ++ Register r = r_1->as_Register(); ++ if (!r_2->is_valid()) { ++ // must be only an int (or less ) so move only 32bits to slot ++ __ sd(r, Address(sp, st_off)); ++ } else { ++ // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG ++ // T_DOUBLE and T_LONG use two slots in the interpreter ++ if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { ++ // long/double in gpr ++#ifdef ASSERT ++ // Overwrite the unused slot with known junk ++ __ li(t0, 0xdeadffffdeadaaabul); ++ __ sd(t0, Address(sp, st_off), /*temp register*/esp); ++#endif /* ASSERT */ ++ __ sd(r, Address(sp, next_off)); ++ } else { ++ __ sd(r, Address(sp, st_off)); ++ } ++ } ++ } else { ++ assert(r_1->is_FloatRegister(), ""); ++ if (!r_2->is_valid()) { ++ // only a float use just part of the slot ++ __ fsw(r_1->as_FloatRegister(), Address(sp, st_off)); ++ } else { ++#ifdef ASSERT ++ // Overwrite the unused slot with known junk ++ __ li(t0, 0xdeadffffdeadaaacul); ++ __ sd(t0, Address(sp, st_off), /*temp register*/esp); ++#endif /* ASSERT */ ++ __ fsd(r_1->as_FloatRegister(), Address(sp, next_off)); ++ } ++ } ++ } + -+ // Save pending exception around call to VM (which contains an EXCEPTION_MARK) -+ // NOTE that obj_reg == x9 currently -+ __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); -+ __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ mv(esp, sp); // Interp expects args on caller's expression stack + -+ __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); ++ __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset()))); ++ __ jr(t0); ++} + -+#ifdef ASSERT -+ { -+ Label L; -+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); -+ __ beqz(t0, L); -+ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); -+ __ bind(L); -+ } -+#endif /* ASSERT */ ++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs) { ++ // Cut-out for having no stack args. ++ int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord; ++ if (comp_args_on_stack != 0) { ++ __ sub(t0, sp, comp_words_on_stack * wordSize); ++ __ andi(sp, t0, -16); ++ } + -+ __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ // Will jump to the compiled code just as if compiled code was doing it. ++ // Pre-load the register-jump target early, to schedule it better. ++ __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset()))); + -+ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { -+ restore_native_result(masm, ret_type, stack_slots); ++ // Now generate the shuffle code. ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half"); ++ continue; + } -+ __ j(unlock_done); + -+ __ block_comment("} Slow path unlock"); ++ // Pick up 0, 1 or 2 words from SP+offset. + -+ } // synchronized ++ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), ++ "scrambled load targets?"); ++ // Load in argument order going down. ++ int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; ++ // Point to interpreter value (vs. tag) ++ int next_off = ld_off - Interpreter::stackElementSize; + -+ // SLOW PATH Reguard the stack if needed ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset (+ wordSize to account for return address ) ++ int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size; ++ if (!r_2->is_valid()) { ++ __ lw(t0, Address(esp, ld_off)); ++ __ sd(t0, Address(sp, st_off), /*temp register*/t2); ++ } else { ++ // ++ // We are using two optoregs. This can be either T_OBJECT, ++ // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates ++ // two slots but only uses one for thr T_LONG or T_DOUBLE case ++ // So we must adjust where to pick up the data to match the ++ // interpreter. ++ // ++ // Interpreter local[n] == MSW, local[n+1] == LSW however locals ++ // are accessed as negative so LSW is at LOW address + -+ __ bind(reguard); -+ save_native_result(masm, ret_type, stack_slots); -+ __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); -+ restore_native_result(masm, ret_type, stack_slots); -+ // and continue -+ __ j(reguard_done); ++ // ld_off is MSW so get LSW ++ const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? ++ next_off : ld_off; ++ __ ld(t0, Address(esp, offset)); ++ // st_off is LSW (i.e. reg.first()) ++ __ sd(t0, Address(sp, st_off), /*temp register*/t2); ++ } ++ } else if (r_1->is_Register()) { // Register argument ++ Register r = r_1->as_Register(); ++ if (r_2->is_valid()) { ++ // ++ // We are using two VMRegs. This can be either T_OBJECT, ++ // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates ++ // two slots but only uses one for thr T_LONG or T_DOUBLE case ++ // So we must adjust where to pick up the data to match the ++ // interpreter. + -+ // SLOW PATH safepoint -+ { -+ __ block_comment("safepoint {"); -+ __ bind(safepoint_in_progress); ++ const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? ++ next_off : ld_off; + -+ // Don't use call_VM as it will see a possible pending exception and forward it -+ // and never return here preventing us from clearing _last_native_pc down below. -+ // -+ save_native_result(masm, ret_type, stack_slots); -+ __ mv(c_rarg0, xthread); -+#ifndef PRODUCT -+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); -+#endif -+ int32_t offset = 0; -+ if (!is_critical_native) { -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset); ++ // this can be a misaligned move ++ __ ld(r, Address(esp, offset)); ++ } else { ++ // sign extend and use a full word? ++ __ lw(r, Address(esp, ld_off)); ++ } + } else { -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)), offset); -+ } -+ __ jalr(x1, t0, offset); -+ // Restore any method result value -+ restore_native_result(masm, ret_type, stack_slots); -+ -+ if (is_critical_native) { -+ // The call above performed the transition to thread_in_Java so -+ // skip the transition logic above. -+ __ j(after_transition); ++ if (!r_2->is_valid()) { ++ __ flw(r_1->as_FloatRegister(), Address(esp, ld_off)); ++ } else { ++ __ fld(r_1->as_FloatRegister(), Address(esp, next_off)); ++ } + } -+ -+ __ j(safepoint_in_progress_done); -+ __ block_comment("} safepoint"); + } + -+ // SLOW PATH dtrace support -+ { -+ __ block_comment("dtrace entry {"); -+ __ bind(dtrace_method_entry); ++ // 6243940 We might end up in handle_wrong_method if ++ // the callee is deoptimized as we race thru here. If that ++ // happens we don't want to take a safepoint because the ++ // caller frame will look interpreted and arguments are now ++ // "compiled" so it is much better to make this transition ++ // invisible to the stack walking code. Unfortunately if ++ // we try and find the callee by normal means a safepoint ++ // is possible. So we stash the desired callee in the thread ++ // and the vm will find there should this case occur. + -+ // We have all of the arguments setup at this point. We must not touch any register -+ // argument registers at this point (what if we save/restore them there are no oop? ++ __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset())); + -+ save_args(masm, total_c_args, c_arg, out_regs); -+ __ mov_metadata(c_rarg1, method()); -+ __ call_VM_leaf( -+ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), -+ xthread, c_rarg1); -+ restore_args(masm, total_c_args, c_arg, out_regs); -+ __ j(dtrace_method_entry_done); -+ __ block_comment("} dtrace entry"); -+ } -+ -+ { -+ __ block_comment("dtrace exit {"); -+ __ bind(dtrace_method_exit); -+ save_native_result(masm, ret_type, stack_slots); -+ __ mov_metadata(c_rarg1, method()); -+ __ call_VM_leaf( -+ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), -+ xthread, c_rarg1); -+ restore_native_result(masm, ret_type, stack_slots); -+ __ j(dtrace_method_exit_done); -+ __ block_comment("} dtrace exit"); -+ } ++ __ jr(t1); ++} + -+ __ flush(); ++// --------------------------------------------------------------- ++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ AdapterFingerPrint* fingerprint) { ++ address i2c_entry = __ pc(); ++ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + -+ nmethod *nm = nmethod::new_native_nmethod(method, -+ compile_id, -+ masm->code(), -+ vep_offset, -+ frame_complete, -+ stack_slots / VMRegImpl::slots_per_word, -+ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), -+ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), -+ oop_maps); -+ assert(nm != NULL, "create native nmethod fail!"); -+ if (is_critical_native) { -+ nm->set_lazy_critical_native(true); -+ } ++ address c2i_unverified_entry = __ pc(); ++ Label skip_fixup; + -+ return nm; -+} ++ Label ok; + -+// this function returns the adjust size (in number of words) to a c2i adapter -+// activation for use during deoptimization -+int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { -+ assert(callee_locals >= callee_parameters, -+ "test and remove; got more parms than locals"); -+ if (callee_locals < callee_parameters) { -+ return 0; // No adjustment for negative locals -+ } -+ int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords; -+ // diff is counted in stack words -+ return align_up(diff, 2); -+} ++ const Register holder = t1; ++ const Register receiver = j_rarg0; ++ const Register tmp = t2; // A call-clobbered register not used for arg passing + -+//------------------------------generate_deopt_blob---------------------------- -+void SharedRuntime::generate_deopt_blob() { -+ // Allocate space for the code -+ ResourceMark rm; -+ // Setup code generation tools -+ int pad = 0; -+ CodeBuffer buffer("deopt_blob", 2048 + pad, 1024); -+ MacroAssembler* masm = new MacroAssembler(&buffer); -+ int frame_size_in_words = -1; -+ OopMap* map = NULL; -+ OopMapSet *oop_maps = new OopMapSet(); -+ assert_cond(masm != NULL && oop_maps != NULL); -+ RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0); ++ // ------------------------------------------------------------------------- ++ // Generate a C2I adapter. On entry we know xmethod holds the Method* during calls ++ // to the interpreter. The args start out packed in the compiled layout. They ++ // need to be unpacked into the interpreter layout. This will almost always ++ // require some stack space. We grow the current (compiled) stack, then repack ++ // the args. We finally end in a jump to the generic interpreter entry point. ++ // On exit from the interpreter, the interpreter will restore our SP (lest the ++ // compiled code, which relys solely on SP and not FP, get sick). + -+ // ------------- -+ // This code enters when returning to a de-optimized nmethod. A return -+ // address has been pushed on the the stack, and return values are in -+ // registers. -+ // If we are doing a normal deopt then we were called from the patched -+ // nmethod from the point we returned to the nmethod. So the return -+ // address on the stack is wrong by NativeCall::instruction_size -+ // We will adjust the value so it looks like we have the original return -+ // address on the stack (like when we eagerly deoptimized). -+ // In the case of an exception pending when deoptimizing, we enter -+ // with a return address on the stack that points after the call we patched -+ // into the exception handler. We have the following register state from, -+ // e.g., the forward exception stub (see stubGenerator_riscv.cpp). -+ // x10: exception oop -+ // x9: exception handler -+ // x13: throwing pc -+ // So in this case we simply jam x13 into the useless return address and -+ // the stack looks just like we want. -+ // -+ // At this point we need to de-opt. We save the argument return -+ // registers. We call the first C routine, fetch_unroll_info(). This -+ // routine captures the return values and returns a structure which -+ // describes the current frame size and the sizes of all replacement frames. -+ // The current frame is compiled code and may contain many inlined -+ // functions, each with their own JVM state. We pop the current frame, then -+ // push all the new frames. Then we call the C routine unpack_frames() to -+ // populate these frames. Finally unpack_frames() returns us the new target -+ // address. Notice that callee-save registers are BLOWN here; they have -+ // already been captured in the vframeArray at the time the return PC was -+ // patched. -+ address start = __ pc(); -+ Label cont; ++ { ++ __ block_comment("c2i_unverified_entry {"); ++ __ load_klass(t0, receiver); ++ __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset())); ++ __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset())); ++ __ beq(t0, tmp, ok); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + -+ // Prolog for non exception case! ++ __ bind(ok); ++ // Method might have been compiled since the call site was patched to ++ // interpreted; if that is the case treat it as a miss so we can get ++ // the call site corrected. ++ __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); ++ __ beqz(t0, skip_fixup); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); ++ __ block_comment("} c2i_unverified_entry"); ++ } + -+ // Save everything in sight. -+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); ++ address c2i_entry = __ pc(); + -+ // Normal deoptimization. Save exec mode for unpack_frames. -+ __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved -+ __ j(cont); ++ // Class initialization barrier for static methods ++ address c2i_no_clinit_check_entry = NULL; ++ if (VM_Version::supports_fast_class_init_checks()) { ++ Label L_skip_barrier; + -+ int reexecute_offset = __ pc() - start; ++ { // Bypass the barrier for non-static methods ++ __ lwu(t0, Address(xmethod, Method::access_flags_offset())); ++ __ andi(t1, t0, JVM_ACC_STATIC); ++ __ beqz(t1, L_skip_barrier); // non-static ++ } + -+ // Reexecute case -+ // return address is the pc describes what bci to do re-execute at ++ __ load_method_holder(t1, xmethod); ++ __ clinit_barrier(t1, t0, &L_skip_barrier); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + -+ // No need to update map as each call to save_live_registers will produce identical oopmap -+ (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words); ++ __ bind(L_skip_barrier); ++ c2i_no_clinit_check_entry = __ pc(); ++ } + -+ __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved -+ __ j(cont); ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->c2i_entry_barrier(masm); + -+ int exception_offset = __ pc() - start; ++ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + -+ // Prolog for exception case ++ __ flush(); ++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); ++} + -+ // all registers are dead at this entry point, except for x10, and -+ // x13 which contain the exception oop and exception pc -+ // respectively. Set them in TLS and fall thru to the -+ // unpack_with_exception_in_tls entry point. ++int SharedRuntime::vector_calling_convention(VMRegPair *regs, ++ uint num_bits, ++ uint total_args_passed) { ++ Unimplemented(); ++ return 0; ++} + -+ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); -+ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); ++int SharedRuntime::c_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ VMRegPair *regs2, ++ int total_args_passed) { ++ assert(regs2 == NULL, "not needed on riscv"); + -+ int exception_in_tls_offset = __ pc() - start; ++ // We return the amount of VMRegImpl stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. + -+ // new implementation because exception oop is now passed in JavaThread ++ static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { ++ c_rarg0, c_rarg1, c_rarg2, c_rarg3, ++ c_rarg4, c_rarg5, c_rarg6, c_rarg7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { ++ c_farg0, c_farg1, c_farg2, c_farg3, ++ c_farg4, c_farg5, c_farg6, c_farg7 ++ }; + -+ // Prolog for exception case -+ // All registers must be preserved because they might be used by LinearScan -+ // Exceptiop oop and throwing PC are passed in JavaThread -+ // tos: stack at point of call to method that threw the exception (i.e. only -+ // args are on the stack, no return address) ++ uint int_args = 0; ++ uint fp_args = 0; ++ uint stk_args = 0; // inc by 2 each time + -+ // The return address pushed by save_live_registers will be patched -+ // later with the throwing pc. The correct value is not available -+ // now because loading it from memory would destroy registers. ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_BOOLEAN: // fall through ++ case T_CHAR: // fall through ++ case T_BYTE: // fall through ++ case T_SHORT: // fall through ++ case T_INT: ++ if (int_args < Argument::n_int_register_parameters_c) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: // fall through ++ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); ++ case T_OBJECT: // fall through ++ case T_ARRAY: // fall through ++ case T_ADDRESS: // fall through ++ case T_METADATA: ++ if (int_args < Argument::n_int_register_parameters_c) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (fp_args < Argument::n_float_register_parameters_c) { ++ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else if (int_args < Argument::n_int_register_parameters_c) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (fp_args < Argument::n_float_register_parameters_c) { ++ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else if (int_args < Argument::n_int_register_parameters_c) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_VOID: // Halves of longs and doubles ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } + -+ // NB: The SP at this point must be the SP of the method that is -+ // being deoptimized. Deoptimization assumes that the frame created -+ // here by save_live_registers is immediately below the method's SP. -+ // This is a somewhat fragile mechanism. ++ return stk_args; ++} + -+ // Save everything in sight. -+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); ++// On 64 bit we will store integer like items to the stack as ++// 64 bits items (riscv64 abi) even though java would only store ++// 32bits for a parameter. On 32bit it will simply be 32 bits ++// So this routine will do 32->32 on 32bit and 32->64 on 64bit ++static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sd(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ // stack to reg ++ __ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ if (dst.first() != src.first()) { ++ // 32bits extend sign ++ __ addw(dst.first()->as_Register(), src.first()->as_Register(), zr); ++ } ++ } ++} + -+ // Now it is safe to overwrite any register ++// An oop arg. Must pass a handle not the oop itself ++static void object_move(MacroAssembler* masm, ++ OopMap* map, ++ int oop_handle_offset, ++ int framesize_in_slots, ++ VMRegPair src, ++ VMRegPair dst, ++ bool is_receiver, ++ int* receiver_offset) { ++ assert_cond(masm != NULL && map != NULL && receiver_offset != NULL); ++ // must pass a handle. First figure out the location we use as a handle ++ Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); + -+ // Deopt during an exception. Save exec mode for unpack_frames. -+ __ mv(xcpool, Deoptimization::Unpack_exception); // callee-saved ++ // See if oop is NULL if it is we need no handle + -+ // load throwing pc from JavaThread and patch it as the return address -+ // of the current frame. Then clear the field in JavaThread ++ if (src.first()->is_stack()) { + -+ __ ld(x13, Address(xthread, JavaThread::exception_pc_offset())); -+ __ sd(x13, Address(fp, frame::return_addr_offset * wordSize)); -+ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); ++ // Oop is already on the stack as an argument ++ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); ++ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); ++ if (is_receiver) { ++ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; ++ } + -+#ifdef ASSERT -+ // verify that there is really an exception oop in JavaThread -+ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); -+ __ verify_oop(x10); ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ la(rHandle, Address(fp, reg2offset_in(src.first()))); ++ // conditionally move a NULL ++ Label notZero1; ++ __ bnez(t0, notZero1); ++ __ mv(rHandle, zr); ++ __ bind(notZero1); ++ } else { + -+ // verify that there is no pending exception -+ Label no_pending_exception; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ beqz(t0, no_pending_exception); -+ __ stop("must not have pending exception here"); -+ __ bind(no_pending_exception); -+#endif ++ // Oop is in an a register we must store it to the space we reserve ++ // on the stack for oop_handles and pass a handle if oop is non-NULL + -+ __ bind(cont); ++ const Register rOop = src.first()->as_Register(); ++ int oop_slot = -1; ++ if (rOop == j_rarg0) { ++ oop_slot = 0; ++ } else if (rOop == j_rarg1) { ++ oop_slot = 1; ++ } else if (rOop == j_rarg2) { ++ oop_slot = 2; ++ } else if (rOop == j_rarg3) { ++ oop_slot = 3; ++ } else if (rOop == j_rarg4) { ++ oop_slot = 4; ++ } else if (rOop == j_rarg5) { ++ oop_slot = 5; ++ } else if (rOop == j_rarg6) { ++ oop_slot = 6; ++ } else { ++ assert(rOop == j_rarg7, "wrong register"); ++ oop_slot = 7; ++ } + -+ // Call C code. Need thread and this frame, but NOT official VM entry -+ // crud. We cannot block on this call, no GC can happen. -+ // -+ // UnrollBlock* fetch_unroll_info(JavaThread* thread) ++ oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; ++ int offset = oop_slot * VMRegImpl::stack_slot_size; + -+ // fetch_unroll_info needs to call last_java_frame(). ++ map->set_oop(VMRegImpl::stack2reg(oop_slot)); ++ // Store oop in handle area, may be NULL ++ __ sd(rOop, Address(sp, offset)); ++ if (is_receiver) { ++ *receiver_offset = offset; ++ } + -+ Label retaddr; -+ __ set_last_Java_frame(sp, noreg, retaddr, t0); -+#ifdef ASSERT -+ { -+ Label L; -+ __ ld(t0, Address(xthread, -+ JavaThread::last_Java_fp_offset())); -+ __ beqz(t0, L); -+ __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); -+ __ bind(L); ++ //rOop maybe the same as rHandle ++ if (rOop == rHandle) { ++ Label isZero; ++ __ beqz(rOop, isZero); ++ __ la(rHandle, Address(sp, offset)); ++ __ bind(isZero); ++ } else { ++ Label notZero2; ++ __ la(rHandle, Address(sp, offset)); ++ __ bnez(rOop, notZero2); ++ __ mv(rHandle, zr); ++ __ bind(notZero2); ++ } + } -+#endif // ASSERT -+ __ mv(c_rarg0, xthread); -+ __ mv(c_rarg1, xcpool); -+ int32_t offset = 0; -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset); -+ __ jalr(x1, t0, offset); -+ __ bind(retaddr); + -+ // Need to have an oopmap that tells fetch_unroll_info where to -+ // find any register it might need. -+ oop_maps->add_gc_map(__ pc() - start, map); ++ // If arg is on the stack then place it otherwise it is already in correct reg. ++ if (dst.first()->is_stack()) { ++ __ sd(rHandle, Address(sp, reg2offset_out(dst.first()))); ++ } ++} + -+ __ reset_last_Java_frame(false); ++// A float arg may have to do float reg int reg conversion ++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(src.first()->is_stack() && dst.first()->is_stack() || ++ src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ __ lwu(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sw(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else if (dst.first()->is_Register()) { ++ __ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (src.first() != dst.first()) { ++ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { ++ __ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } ++} + -+ // Load UnrollBlock* into x15 -+ __ mv(x15, x10); ++// A long move ++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sd(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ // stack to reg ++ __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ if (dst.first() != src.first()) { ++ __ mv(dst.first()->as_Register(), src.first()->as_Register()); ++ } ++ } ++} + -+ __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); -+ Label noException; -+ __ mv(t0, Deoptimization::Unpack_exception); -+ __ bne(xcpool, t0, noException); // Was exception pending? -+ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); -+ __ ld(x13, Address(xthread, JavaThread::exception_pc_offset())); -+ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset())); -+ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); ++// A double move ++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(src.first()->is_stack() && dst.first()->is_stack() || ++ src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sd(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else if (dst.first()-> is_Register()) { ++ __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (src.first() != dst.first()) { ++ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { ++ __ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } ++} + -+ __ verify_oop(x10); ++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ assert_cond(masm != NULL); ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ fsw(f10, Address(fp, -3 * wordSize)); ++ break; ++ case T_DOUBLE: ++ __ fsd(f10, Address(fp, -3 * wordSize)); ++ break; ++ case T_VOID: break; ++ default: { ++ __ sd(x10, Address(fp, -3 * wordSize)); ++ } ++ } ++} + -+ // Overwrite the result registers with the exception results. -+ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); ++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ assert_cond(masm != NULL); ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ flw(f10, Address(fp, -3 * wordSize)); ++ break; ++ case T_DOUBLE: ++ __ fld(f10, Address(fp, -3 * wordSize)); ++ break; ++ case T_VOID: break; ++ default: { ++ __ ld(x10, Address(fp, -3 * wordSize)); ++ } ++ } ++} + -+ __ bind(noException); ++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ assert_cond(masm != NULL && args != NULL); ++ RegSet x; ++ for ( int i = first_arg ; i < arg_count ; i++ ) { ++ if (args[i].first()->is_Register()) { ++ x = x + args[i].first()->as_Register(); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ addi(sp, sp, -2 * wordSize); ++ __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0)); ++ } ++ } ++ __ push_reg(x, sp); ++} + -+ // Only register save data is on the stack. -+ // Now restore the result registers. Everything else is either dead -+ // or captured in the vframeArray. -+ reg_saver.restore_result_registers(masm); ++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ assert_cond(masm != NULL && args != NULL); ++ RegSet x; ++ for ( int i = first_arg ; i < arg_count ; i++ ) { ++ if (args[i].first()->is_Register()) { ++ x = x + args[i].first()->as_Register(); ++ } else { ++ ; ++ } ++ } ++ __ pop_reg(x, sp); ++ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { ++ if (args[i].first()->is_Register()) { ++ ; ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0)); ++ __ add(sp, sp, 2 * wordSize); ++ } ++ } ++} + -+ // All of the register save area has been popped of the stack. Only the -+ // return address remains. ++static void rt_call(MacroAssembler* masm, address dest) { ++ assert_cond(masm != NULL); ++ CodeBlob *cb = CodeCache::find_blob(dest); ++ if (cb) { ++ __ far_call(RuntimeAddress(dest)); ++ } else { ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(dest), offset); ++ __ jalr(x1, t0, offset); ++ } ++} + -+ // Pop all the frames we must move/replace. -+ // -+ // Frame picture (youngest to oldest) -+ // 1: self-frame (no frame link) -+ // 2: deopting frame (no frame link) -+ // 3: caller of deopting frame (could be compiled/interpreted). -+ // -+ // Note: by leaving the return address of self-frame on the stack -+ // and using the size of frame 2 to adjust the stack -+ // when we are done the return to frame 3 will still be on the stack. -+ -+ // Pop deoptimized frame -+ __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); -+ __ sub(x12, x12, 2 * wordSize); -+ __ add(sp, sp, x12); -+ __ ld(fp, Address(sp, 0)); -+ __ ld(ra, Address(sp, wordSize)); -+ __ addi(sp, sp, 2 * wordSize); -+ // RA should now be the return address to the caller (3) -+ -+#ifdef ASSERT -+ // Compilers generate code that bang the stack by as much as the -+ // interpreter would need. So this stack banging should never -+ // trigger a fault. Verify that it does not on non product builds. -+ if (UseStackBanging) { -+ __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); -+ __ bang_stack_size(x9, x12); ++static void verify_oop_args(MacroAssembler* masm, ++ const methodHandle& method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ const Register temp_reg = x9; // not part of any compiled calling seq ++ if (VerifyOops) { ++ for (int i = 0; i < method->size_of_parameters(); i++) { ++ if (sig_bt[i] == T_OBJECT || ++ sig_bt[i] == T_ARRAY) { ++ VMReg r = regs[i].first(); ++ assert(r->is_valid(), "bad oop arg"); ++ if (r->is_stack()) { ++ __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ __ verify_oop(temp_reg); ++ } else { ++ __ verify_oop(r->as_Register()); ++ } ++ } ++ } + } -+#endif -+ // Load address of array of frame pcs into x12 -+ __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); ++} + -+ // Load address of array of frame sizes into x14 -+ __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); ++static void gen_special_dispatch(MacroAssembler* masm, ++ const methodHandle& method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ verify_oop_args(masm, method, sig_bt, regs); ++ vmIntrinsics::ID iid = method->intrinsic_id(); + -+ // Load counter into x13 -+ __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); ++ // Now write the args into the outgoing interpreter space ++ bool has_receiver = false; ++ Register receiver_reg = noreg; ++ int member_arg_pos = -1; ++ Register member_reg = noreg; ++ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); ++ if (ref_kind != 0) { ++ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument ++ member_reg = x9; // known to be free at this point ++ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); ++ } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { ++ has_receiver = true; ++ } else { ++ fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); ++ } + -+ // Now adjust the caller's stack to make up for the extra locals -+ // but record the original sp so that we can save it in the skeletal interpreter -+ // frame and the stack walking of interpreter_sender will get the unextended sp -+ // value and not the "real" sp value. ++ if (member_reg != noreg) { ++ // Load the member_arg into register, if necessary. ++ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); ++ VMReg r = regs[member_arg_pos].first(); ++ if (r->is_stack()) { ++ __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ member_reg = r->as_Register(); ++ } ++ } + -+ const Register sender_sp = x16; ++ if (has_receiver) { ++ // Make sure the receiver is loaded into a register. ++ assert(method->size_of_parameters() > 0, "oob"); ++ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); ++ VMReg r = regs[0].first(); ++ assert(r->is_valid(), "bad receiver arg"); ++ if (r->is_stack()) { ++ // Porting note: This assumes that compiled calling conventions always ++ // pass the receiver oop in a register. If this is not true on some ++ // platform, pick a temp and load the receiver from stack. ++ fatal("receiver always in a register"); ++ receiver_reg = x12; // known to be free at this point ++ __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ receiver_reg = r->as_Register(); ++ } ++ } + -+ __ mv(sender_sp, sp); -+ __ lwu(x9, Address(x15, -+ Deoptimization::UnrollBlock:: -+ caller_adjustment_offset_in_bytes())); -+ __ sub(sp, sp, x9); ++ // Figure out which address we are really jumping to: ++ MethodHandles::generate_method_handle_dispatch(masm, iid, ++ receiver_reg, member_reg, /*for_compiler_entry:*/ true); ++} + -+ // Push interpreter frames in a loop -+ __ mv(t0, (uint64_t)0xDEADDEAD); // Make a recognizable pattern -+ __ mv(t1, t0); -+ Label loop; -+ __ bind(loop); -+ __ ld(x9, Address(x14, 0)); // Load frame size -+ __ addi(x14, x14, wordSize); -+ __ sub(x9, x9, 2 * wordSize); // We'll push pc and fp by hand -+ __ ld(ra, Address(x12, 0)); // Load pc -+ __ addi(x12, x12, wordSize); -+ __ enter(); // Save old & set new fp -+ __ sub(sp, sp, x9); // Prolog -+ // This value is corrected by layout_activation_impl -+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); -+ __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable -+ __ mv(sender_sp, sp); // Pass sender_sp to next frame -+ __ addi(x13, x13, -1); // Decrement counter -+ __ bnez(x13, loop); ++// --------------------------------------------------------------------------- ++// Generate a native wrapper for a given method. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// convention (handlizes oops, etc), transitions to native, makes the call, ++// returns to java state (possibly blocking), unhandlizes any result and ++// returns. ++// ++// Critical native functions are a shorthand for the use of ++// GetPrimtiveArrayCritical and disallow the use of any other JNI ++// functions. The wrapper is expected to unpack the arguments before ++// passing them to the callee and perform checks before and after the ++// native call to ensure that they GCLocker ++// lock_critical/unlock_critical semantics are followed. Some other ++// parts of JNI setup are skipped like the tear down of the JNI handle ++// block and the check for pending exceptions it's impossible for them ++// to be thrown. ++// ++// They are roughly structured like this: ++// if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical() ++// tranistion to thread_in_native ++// unpack arrray arguments and call native entry point ++// check for safepoint in progress ++// check if any thread suspend flags are set ++// call into JVM and possible unlock the JNI critical ++// if a GC was suppressed while in the critical native. ++// transition back to thread_in_Java ++// return to caller ++// ++nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, ++ const methodHandle& method, ++ int compile_id, ++ BasicType* in_sig_bt, ++ VMRegPair* in_regs, ++ BasicType ret_type) { ++ if (method->is_method_handle_intrinsic()) { ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ intptr_t start = (intptr_t)__ pc(); ++ int vep_offset = ((intptr_t)__ pc()) - start; + -+ // Re-push self-frame -+ __ ld(ra, Address(x12)); -+ __ enter(); ++ // First instruction must be a nop as it may need to be patched on deoptimisation ++ __ nop(); ++ gen_special_dispatch(masm, ++ method, ++ in_sig_bt, ++ in_regs); ++ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period ++ __ flush(); ++ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually ++ return nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ in_ByteSize(-1), ++ in_ByteSize(-1), ++ (OopMapSet*)NULL); ++ } ++ address native_func = method->native_function(); ++ assert(native_func != NULL, "must have function"); + -+ // Allocate a full sized register save area. We subtract 2 because -+ // enter() just pushed 2 words -+ __ sub(sp, sp, (frame_size_in_words - 2) * wordSize); ++ // An OopMap for lock (and class if static) ++ OopMapSet *oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ intptr_t start = (intptr_t)__ pc(); + -+ // Restore frame locals after moving the frame -+ __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); -+ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the jni function will expect them. To figure out where they go ++ // we convert the java signature to a C signature by inserting ++ // the hidden arguments as arg[0] and possibly arg[1] (static method) + -+ // Call C code. Need thread but NOT official VM entry -+ // crud. We cannot block on this call, no GC can happen. Call should -+ // restore return values to their stack-slots with the new SP. -+ // -+ // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) ++ const int total_in_args = method->size_of_parameters(); ++ int total_c_args = total_in_args + (method->is_static() ? 2 : 1); + -+ // Use fp because the frames look interpreted now -+ // Don't need the precise return PC here, just precise enough to point into this code blob. -+ address the_pc = __ pc(); -+ __ set_last_Java_frame(sp, fp, the_pc, t0); ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); ++ BasicType* in_elem_bt = NULL; + -+ __ mv(c_rarg0, xthread); -+ __ mv(c_rarg1, xcpool); // second arg: exec_mode -+ offset = 0; -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset); -+ __ jalr(x1, t0, offset); ++ int argc = 0; ++ out_sig_bt[argc++] = T_ADDRESS; ++ if (method->is_static()) { ++ out_sig_bt[argc++] = T_OBJECT; ++ } + -+ // Set an oopmap for the call site -+ // Use the same PC we used for the last java frame -+ oop_maps->add_gc_map(the_pc - start, -+ new OopMap( frame_size_in_words, 0 )); ++ for (int i = 0; i < total_in_args ; i++) { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ } + -+ // Clear fp AND pc -+ __ reset_last_Java_frame(true); ++ // Now figure out where the args must be stored and how much stack space ++ // they require. ++ int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + -+ // Collect return values -+ __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); -+ __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); ++ // Compute framesize for the wrapper. We need to handlize all oops in ++ // incoming registers + -+ // Pop self-frame. -+ __ leave(); // Epilog ++ // Calculate the total number of stack slots we will need. + -+ // Jump to interpreter -+ __ ret(); ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + -+ // Make sure all code is generated -+ masm->flush(); ++ // Now the space for the inbound oop handle area ++ int total_save_slots = 8 * VMRegImpl::slots_per_word; // 8 arguments passed in registers + -+ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); -+ assert(_deopt_blob != NULL, "create deoptimization blob fail!"); -+ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); -+} ++ int oop_handle_offset = stack_slots; ++ stack_slots += total_save_slots; + -+uint SharedRuntime::out_preserve_stack_slots() { -+ return 0; -+} ++ // Now any space we need for handlizing a klass if static method + -+#ifdef COMPILER2 -+//------------------------------generate_uncommon_trap_blob-------------------- -+void SharedRuntime::generate_uncommon_trap_blob() { -+ // Allocate space for the code -+ ResourceMark rm; -+ // Setup code generation tools -+ CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); -+ MacroAssembler* masm = new MacroAssembler(&buffer); -+ assert_cond(masm != NULL); ++ int klass_slot_offset = 0; ++ int klass_offset = -1; ++ int lock_slot_offset = 0; ++ bool is_static = false; + -+ assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); ++ if (method->is_static()) { ++ klass_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; ++ is_static = true; ++ } + -+ address start = __ pc(); ++ // Plus a lock if needed + -+ // Push self-frame. We get here with a return address in RA -+ // and sp should be 16 byte aligned -+ // push fp and retaddr by hand -+ __ addi(sp, sp, -2 * wordSize); -+ __ sd(ra, Address(sp, wordSize)); -+ __ sd(fp, Address(sp, 0)); -+ // we don't expect an arg reg save area -+#ifndef PRODUCT -+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); -+#endif -+ // compiler left unloaded_class_index in j_rarg0 move to where the -+ // runtime expects it. -+ __ addiw(c_rarg1, j_rarg0, 0); ++ if (method->is_synchronized()) { ++ lock_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ } + -+ // we need to set the past SP to the stack pointer of the stub frame -+ // and the pc to the address where this runtime call will return -+ // although actually any pc in this code blob will do). -+ Label retaddr; -+ __ set_last_Java_frame(sp, noreg, retaddr, t0); ++ // Now a place (+2) to save return values or temp during shuffling ++ // + 4 for return address (which we own) and saved fp ++ stack_slots += 6; + -+ // Call C code. Need thread but NOT official VM entry -+ // crud. We cannot block on this call, no GC can happen. Call should -+ // capture callee-saved registers as well as return values. ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // | 2 slots (ra) | ++ // | 2 slots (fp) | ++ // |---------------------| ++ // | 2 slots for moves | ++ // |---------------------| ++ // | lock box (if sync) | ++ // |---------------------| <- lock_slot_offset ++ // | klass (if static) | ++ // |---------------------| <- klass_slot_offset ++ // | oopHandle area | ++ // |---------------------| <- oop_handle_offset (8 java arg registers) ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | + // -+ // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode) + // -+ // n.b. 3 gp args, 0 fp args, integral return type -+ -+ __ mv(c_rarg0, xthread); -+ __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap); -+ int32_t offset = 0; -+ __ la_patchable(t0, -+ RuntimeAddress(CAST_FROM_FN_PTR(address, -+ Deoptimization::uncommon_trap)), offset); -+ __ jalr(x1, t0, offset); -+ __ bind(retaddr); + -+ // Set an oopmap for the call site -+ OopMapSet* oop_maps = new OopMapSet(); -+ OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); -+ assert_cond(oop_maps != NULL && map != NULL); + -+ // location of fp is known implicitly by the frame sender code ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = align_up(stack_slots, StackAlignmentInSlots); + -+ oop_maps->add_gc_map(__ pc() - start, map); ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; + -+ __ reset_last_Java_frame(false); ++ // First thing make an ic check to see if we should even be here + -+ // move UnrollBlock* into x14 -+ __ mv(x14, x10); ++ // We are free to use all registers as temps without saving them and ++ // restoring them except fp. fp is the only callee save register ++ // as far as the interpreter and the compiler(s) are concerned. + -+#ifdef ASSERT -+ { Label L; -+ __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); -+ __ mvw(t1, Deoptimization::Unpack_uncommon_trap); -+ __ beq(t0, t1, L); -+ __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); -+ __ bind(L); -+ } -+#endif + -+ // Pop all the frames we must move/replace. -+ // -+ // Frame picture (youngest to oldest) -+ // 1: self-frame (no frame link) -+ // 2: deopting frame (no frame link) -+ // 3: caller of deopting frame (could be compiled/interpreted). ++ const Register ic_reg = t1; ++ const Register receiver = j_rarg0; + -+ __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog! ++ Label hit; ++ Label exception_pending; + -+ // Pop deoptimized frame (int) -+ __ lwu(x12, Address(x14, -+ Deoptimization::UnrollBlock:: -+ size_of_deoptimized_frame_offset_in_bytes())); -+ __ sub(x12, x12, 2 * wordSize); -+ __ add(sp, sp, x12); -+ __ ld(fp, sp, 0); -+ __ ld(ra, sp, wordSize); -+ __ addi(sp, sp, 2 * wordSize); -+ // RA should now be the return address to the caller (3) frame ++ assert_different_registers(ic_reg, receiver, t0); ++ __ verify_oop(receiver); ++ __ cmp_klass(receiver, ic_reg, t0, hit); + -+#ifdef ASSERT -+ // Compilers generate code that bang the stack by as much as the -+ // interpreter would need. So this stack banging should never -+ // trigger a fault. Verify that it does not on non product builds. -+ if (UseStackBanging) { -+ __ lwu(x11, Address(x14, -+ Deoptimization::UnrollBlock:: -+ total_frame_sizes_offset_in_bytes())); -+ __ bang_stack_size(x11, x12); -+ } -+#endif ++ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + -+ // Load address of array of frame pcs into x12 (address*) -+ __ ld(x12, Address(x14, -+ Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); ++ // Verified entry point must be aligned ++ __ align(8); + -+ // Load address of array of frame sizes into x15 (intptr_t*) -+ __ ld(x15, Address(x14, -+ Deoptimization::UnrollBlock:: -+ frame_sizes_offset_in_bytes())); ++ __ bind(hit); + -+ // Counter -+ __ lwu(x13, Address(x14, -+ Deoptimization::UnrollBlock:: -+ number_of_frames_offset_in_bytes())); // (int) ++ int vep_offset = ((intptr_t)__ pc()) - start; + -+ // Now adjust the caller's stack to make up for the extra locals but -+ // record the original sp so that we can save it in the skeletal -+ // interpreter frame and the stack walking of interpreter_sender -+ // will get the unextended sp value and not the "real" sp value. ++ // If we have to make this method not-entrant we'll overwrite its ++ // first instruction with a jump. ++ __ nop(); + -+ const Register sender_sp = t1; // temporary register ++ if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { ++ Label L_skip_barrier; ++ __ mov_metadata(t1, method->method_holder()); // InstanceKlass* ++ __ clinit_barrier(t1, t0, &L_skip_barrier); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + -+ __ lwu(x11, Address(x14, -+ Deoptimization::UnrollBlock:: -+ caller_adjustment_offset_in_bytes())); // (int) -+ __ mv(sender_sp, sp); -+ __ sub(sp, sp, x11); ++ __ bind(L_skip_barrier); ++ } + -+ // Push interpreter frames in a loop -+ Label loop; -+ __ bind(loop); -+ __ ld(x11, Address(x15, 0)); // Load frame size -+ __ sub(x11, x11, 2 * wordSize); // We'll push pc and fp by hand -+ __ ld(ra, Address(x12, 0)); // Save return address -+ __ enter(); // and old fp & set new fp -+ __ sub(sp, sp, x11); // Prolog -+ __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable -+ // This value is corrected by layout_activation_impl -+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); -+ __ mv(sender_sp, sp); // Pass sender_sp to next frame -+ __ add(x15, x15, wordSize); // Bump array pointer (sizes) -+ __ add(x12, x12, wordSize); // Bump array pointer (pcs) -+ __ subw(x13, x13, 1); // Decrement counter -+ __ bgtz(x13, loop); -+ __ ld(ra, Address(x12, 0)); // save final return address -+ // Re-push self-frame -+ __ enter(); // & old fp & set new fp ++ // Generate stack overflow check ++ __ bang_stack_with_offset(checked_cast(StackOverflow::stack_shadow_zone_size())); + -+ // Use fp because the frames look interpreted now -+ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. -+ // Don't need the precise return PC here, just precise enough to point into this code blob. -+ address the_pc = __ pc(); -+ __ set_last_Java_frame(sp, fp, the_pc, t0); ++ // Generate a new frame for the wrapper. ++ __ enter(); ++ // -2 because return address is already present and so is saved fp ++ __ sub(sp, sp, stack_size - 2 * wordSize); + -+ // Call C code. Need thread but NOT official VM entry -+ // crud. We cannot block on this call, no GC can happen. Call should -+ // restore return values to their stack-slots with the new SP. -+ // -+ // BasicType unpack_frames(JavaThread* thread, int exec_mode) -+ // ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ assert_cond(bs != NULL); ++ bs->nmethod_entry_barrier(masm); + -+ // n.b. 2 gp args, 0 fp args, integral return type ++ // Frame is now completed as far as size and linkage. ++ int frame_complete = ((intptr_t)__ pc()) - start; + -+ // sp should already be aligned -+ __ mv(c_rarg0, xthread); -+ __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap); -+ offset = 0; -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset); -+ __ jalr(x1, t0, offset); ++ // We use x18 as the oop handle for the receiver/klass ++ // It is callee save so it survives the call to native + -+ // Set an oopmap for the call site -+ // Use the same PC we used for the last java frame -+ oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); ++ const Register oop_handle_reg = x18; + -+ // Clear fp AND pc -+ __ reset_last_Java_frame(true); ++ // ++ // We immediately shuffle the arguments so that any vm call we have to ++ // make from here on out (sync slow path, jvmti, etc.) we will have ++ // captured the oops from our caller and have a valid oopMap for ++ // them. + -+ // Pop self-frame. -+ __ leave(); // Epilog ++ // ----------------- ++ // The Grand Shuffle + -+ // Jump to interpreter -+ __ ret(); ++ // The Java calling convention is either equal (linux) or denser (win64) than the ++ // c calling convention. However the because of the jni_env argument the c calling ++ // convention always has at least one more (and two for static) arguments than Java. ++ // Therefore if we move the args from java -> c backwards then we will never have ++ // a register->register conflict and we don't have to build a dependency graph ++ // and figure out how to break any cycles. ++ // + -+ // Make sure all code is generated -+ masm->flush(); ++ // Record esp-based slot for receiver on stack for non-static methods ++ int receiver_offset = -1; + -+ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, -+ SimpleRuntimeFrame::framesize >> 1); -+} -+#endif // COMPILER2 ++ // This is a trick. We double the stack slots so we can claim ++ // the oops in the caller's frame. Since we are sure to have ++ // more args than the caller doubling is enough to make ++ // sure we can capture all the incoming oop args from the ++ // caller. ++ // ++ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); ++ assert_cond(map != NULL); + -+//------------------------------generate_handler_blob------ -+// -+// Generate a special Compile2Runtime blob that saves all registers, -+// and setup oopmap. -+// -+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { -+ ResourceMark rm; -+ OopMapSet *oop_maps = new OopMapSet(); -+ assert_cond(oop_maps != NULL); -+ OopMap* map = NULL; ++ int float_args = 0; ++ int int_args = 0; + -+ // Allocate space for the code. Setup code generation tools. -+ CodeBuffer buffer("handler_blob", 2048, 1024); -+ MacroAssembler* masm = new MacroAssembler(&buffer); -+ assert_cond(masm != NULL); ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } + -+ address start = __ pc(); -+ address call_pc = NULL; -+ int frame_size_in_words = -1; -+ bool cause_return = (poll_type == POLL_AT_RETURN); -+ RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); ++#endif /* ASSERT */ + -+ // Save Integer and Float registers. -+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); ++ // For JNI natives the incoming and outgoing registers are offset upwards. ++ GrowableArray arg_order(2 * total_in_args); ++ VMRegPair tmp_vmreg; ++ tmp_vmreg.set2(x9->as_VMReg()); + -+ // The following is basically a call_VM. However, we need the precise -+ // address of the call in order to generate an oopmap. Hence, we do all the -+ // work outselves. ++ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { ++ arg_order.push(i); ++ arg_order.push(c_arg); ++ } + -+ Label retaddr; -+ __ set_last_Java_frame(sp, noreg, retaddr, t0); ++ int temploc = -1; ++ for (int ai = 0; ai < arg_order.length(); ai += 2) { ++ int i = arg_order.at(ai); ++ int c_arg = arg_order.at(ai + 1); ++ __ block_comment(err_msg("mv %d -> %d", i, c_arg)); ++ assert(c_arg != -1 && i != -1, "wrong order"); ++#ifdef ASSERT ++ if (in_regs[i].first()->is_Register()) { ++ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); ++ } ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif /* ASSERT */ ++ switch (in_sig_bt[i]) { ++ case T_ARRAY: ++ case T_OBJECT: ++ object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ++ ((i == 0) && (!is_static)), ++ &receiver_offset); ++ int_args++; ++ break; ++ case T_VOID: ++ break; + -+ // The return address must always be correct so that frame constructor never -+ // sees an invalid pc. ++ case T_FLOAT: ++ float_move(masm, in_regs[i], out_regs[c_arg]); ++ float_args++; ++ break; + -+ if (!cause_return) { -+ // overwrite the return address pushed by save_live_registers -+ // Additionally, x18 is a callee-saved register so we can look at -+ // it later to determine if someone changed the return address for -+ // us! -+ __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset())); -+ __ sd(x18, Address(fp, frame::return_addr_offset * wordSize)); ++ case T_DOUBLE: ++ assert( i + 1 < total_in_args && ++ in_sig_bt[i + 1] == T_VOID && ++ out_sig_bt[c_arg + 1] == T_VOID, "bad arg list"); ++ double_move(masm, in_regs[i], out_regs[c_arg]); ++ float_args++; ++ break; ++ ++ case T_LONG : ++ long_move(masm, in_regs[i], out_regs[c_arg]); ++ int_args++; ++ break; ++ ++ case T_ADDRESS: ++ assert(false, "found T_ADDRESS in java args"); ++ break; ++ ++ default: ++ move32_64(masm, in_regs[i], out_regs[c_arg]); ++ int_args++; ++ } + } + -+ // Do the call -+ __ mv(c_rarg0, xthread); -+ int32_t offset = 0; -+ __ la_patchable(t0, RuntimeAddress(call_ptr), offset); -+ __ jalr(x1, t0, offset); -+ __ bind(retaddr); ++ // point c_arg at the first arg that is already loaded in case we ++ // need to spill before we call out ++ int c_arg = total_c_args - total_in_args; + -+ // Set an oopmap for the call site. This oopmap will map all -+ // oop-registers and debug-info registers as callee-saved. This -+ // will allow deoptimization at this safepoint to find all possible -+ // debug-info recordings, as well as let GC find all oops. ++ // Pre-load a static method's oop into c_rarg1. ++ if (method->is_static()) { + -+ oop_maps->add_gc_map( __ pc() - start, map); ++ // load oop into a register ++ __ movoop(c_rarg1, ++ JNIHandles::make_local(method->method_holder()->java_mirror()), ++ /*immediate*/true); + -+ Label noException; ++ // Now handlize the static class mirror it's known not-null. ++ __ sd(c_rarg1, Address(sp, klass_offset)); ++ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); + -+ __ reset_last_Java_frame(false); ++ // Now get the handle ++ __ la(c_rarg1, Address(sp, klass_offset)); ++ // and protect the arg if we must spill ++ c_arg--; ++ } + -+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a stack traversal). ++ // We use the same pc/oopMap repeatedly when we call out + -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ beqz(t0, noException); ++ Label native_return; ++ __ set_last_Java_frame(sp, noreg, native_return, t0); + -+ // Exception pending ++ Label dtrace_method_entry, dtrace_method_entry_done; ++ { ++ int32_t offset = 0; ++ __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset); ++ __ lbu(t0, Address(t0, offset)); ++ __ addw(t0, t0, zr); ++ __ bnez(t0, dtrace_method_entry); ++ __ bind(dtrace_method_entry_done); ++ } + -+ reg_saver.restore_live_registers(masm); ++ // RedefineClasses() tracing support for obsolete method entry ++ if (log_is_enabled(Trace, redefine, class, obsolete)) { ++ // protect the args we've loaded ++ save_args(masm, total_c_args, c_arg, out_regs); ++ __ mov_metadata(c_rarg1, method()); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), ++ xthread, c_rarg1); ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ } + -+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); ++ // Lock a synchronized method + -+ // No exception case -+ __ bind(noException); ++ // Register definitions used by locking and unlocking + -+ Label no_adjust, bail; -+ if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { -+ // If our stashed return pc was modified by the runtime we avoid touching it -+ __ ld(t0, Address(fp, frame::return_addr_offset * wordSize)); -+ __ bne(x18, t0, no_adjust); ++ const Register swap_reg = x10; ++ const Register obj_reg = x9; // Will contain the oop ++ const Register lock_reg = x30; // Address of compiler lock object (BasicLock) ++ const Register old_hdr = x30; // value of old header at unlock time ++ const Register tmp = ra; + -+#ifdef ASSERT -+ // Verify the correct encoding of the poll we're about to skip. -+ // See NativeInstruction::is_lwu_to_zr() -+ __ lwu(t0, Address(x18)); -+ __ andi(t1, t0, 0b0000011); -+ __ mv(t2, 0b0000011); -+ __ bne(t1, t2, bail); // 0-6:0b0000011 -+ __ srli(t1, t0, 7); -+ __ andi(t1, t1, 0b00000); -+ __ bnez(t1, bail); // 7-11:0b00000 -+ __ srli(t1, t0, 12); -+ __ andi(t1, t1, 0b110); -+ __ mv(t2, 0b110); -+ __ bne(t1, t2, bail); // 12-14:0b110 -+#endif -+ // Adjust return pc forward to step over the safepoint poll instruction -+ __ add(x18, x18, NativeInstruction::instruction_size); -+ __ sd(x18, Address(fp, frame::return_addr_offset * wordSize)); -+ } ++ Label slow_path_lock; ++ Label lock_done; + -+ __ bind(no_adjust); -+ // Normal exit, restore registers and exit. ++ if (method->is_synchronized()) { + -+ reg_saver.restore_live_registers(masm); -+ __ ret(); ++ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); + -+#ifdef ASSERT -+ __ bind(bail); -+ __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); -+#endif ++ // Get the handle (the 2nd argument) ++ __ mv(oop_handle_reg, c_rarg1); + -+ // Make sure all code is generated -+ masm->flush(); ++ // Get address of the box + -+ // Fill-out other meta info -+ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); -+} ++ __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + -+// -+// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss -+// -+// Generate a stub that calls into vm to find out the proper destination -+// of a java call. All the argument registers are live at this point -+// but since this is generic code we don't know what they are and the caller -+// must do any gc of the args. -+// -+RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { -+ assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ // Load the oop from the handle ++ __ ld(obj_reg, Address(oop_handle_reg, 0)); + -+ // allocate space for the code -+ ResourceMark rm; ++ if (!UseHeavyMonitors) { ++ // Load (object->mark() | 1) into swap_reg % x10 ++ __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ __ ori(swap_reg, t0, 1); + -+ CodeBuffer buffer(name, 1000, 512); -+ MacroAssembler* masm = new MacroAssembler(&buffer); -+ assert_cond(masm != NULL); ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); + -+ int frame_size_in_words = -1; -+ RegisterSaver reg_saver(false /* save_vectors */); ++ // src -> dest if dest == x10 else x10 <- dest ++ { ++ Label here; ++ __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL); ++ } + -+ OopMapSet *oop_maps = new OopMapSet(); -+ assert_cond(oop_maps != NULL); -+ OopMap* map = NULL; ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg ++ ++ __ sub(swap_reg, swap_reg, sp); ++ __ andi(swap_reg, swap_reg, 3 - os::vm_page_size()); ++ ++ // Save the test result, for recursive case, the result is zero ++ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); ++ __ bnez(swap_reg, slow_path_lock); ++ } else { ++ __ j(slow_path_lock); ++ } + -+ int start = __ offset(); ++ // Slow path will re-enter here ++ __ bind(lock_done); ++ } + -+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + -+ int frame_complete = __ offset(); ++ // Finally just about ready to make the JNI call + -+ { -+ Label retaddr; -+ __ set_last_Java_frame(sp, noreg, retaddr, t0); ++ // get JNIEnv* which is first argument to native ++ __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset()))); + -+ __ mv(c_rarg0, xthread); -+ int32_t offset = 0; -+ __ la_patchable(t0, RuntimeAddress(destination), offset); -+ __ jalr(x1, t0, offset); -+ __ bind(retaddr); ++ // Now set thread in native ++ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); ++ __ mv(t0, _thread_in_native); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sw(t0, Address(t1)); ++ ++ rt_call(masm, native_func); ++ ++ __ bind(native_return); ++ ++ intptr_t return_pc = (intptr_t) __ pc(); ++ oop_maps->add_gc_map(return_pc - start, map); ++ ++ // Unpack native results. ++ if (ret_type != T_OBJECT && ret_type != T_ARRAY) { ++ __ cast_primitive_type(ret_type, x10); + } + -+ // Set an oopmap for the call site. -+ // We need this not only for callee-saved registers, but also for volatile -+ // registers that the compiler might be keeping live across a safepoint. ++ Label safepoint_in_progress, safepoint_in_progress_done; ++ Label after_transition; + -+ oop_maps->add_gc_map( __ offset() - start, map); ++ // Switch thread to "native transition" state before reading the synchronization state. ++ // This additional state is necessary because reading and testing the synchronization ++ // state is not atomic w.r.t. GC, as this scenario demonstrates: ++ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. ++ // VM thread changes sync state to synchronizing and suspends threads for GC. ++ // Thread A is resumed to finish this native method, but doesn't block here since it ++ // didn't see any synchronization is progress, and escapes. ++ __ mv(t0, _thread_in_native_trans); + -+ // x10 contains the address we are going to jump to assuming no exception got installed ++ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); + -+ // clear last_Java_sp -+ __ reset_last_Java_frame(false); -+ // check for pending exceptions -+ Label pending; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ bnez(t0, pending); ++ // Force this write out before the read below ++ __ membar(MacroAssembler::AnyAny); + -+ // get the returned Method* -+ __ get_vm_result_2(xmethod, xthread); -+ __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod))); ++ // check for safepoint operation in progress and/or pending suspend requests ++ { ++ // We need an acquire here to ensure that any subsequent load of the ++ // global SafepointSynchronize::_state flag is ordered after this load ++ // of the thread-local polling word. We don't want this poll to ++ // return false (i.e. not safepointing) and a later poll of the global ++ // SafepointSynchronize::_state spuriously to return true. ++ // This is to avoid a race when we're in a native->Java transition ++ // racing the code which wakes up from a safepoint. ++ ++ __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */); ++ __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset())); ++ __ bnez(t0, safepoint_in_progress); ++ __ bind(safepoint_in_progress_done); ++ } + -+ // x10 is where we want to jump, overwrite t0 which is saved and temporary -+ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0))); -+ reg_saver.restore_live_registers(masm); ++ // change thread state ++ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); ++ __ mv(t0, _thread_in_Java); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sw(t0, Address(t1)); ++ __ bind(after_transition); + -+ // We are back the the original state on entry and ready to go. ++ Label reguard; ++ Label reguard_done; ++ __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset())); ++ __ mv(t1, StackOverflow::stack_guard_yellow_reserved_disabled); ++ __ beq(t0, t1, reguard); ++ __ bind(reguard_done); + -+ __ jr(t0); ++ // native result if any is live + -+ // Pending exception after the safepoint ++ // Unlock ++ Label unlock_done; ++ Label slow_path_unlock; ++ if (method->is_synchronized()) { + -+ __ bind(pending); ++ // Get locked oop from the handle we passed to jni ++ __ ld(obj_reg, Address(oop_handle_reg, 0)); + -+ reg_saver.restore_live_registers(masm); ++ Label done; + -+ // exception pending => remove activation and forward to exception handler ++ if (!UseHeavyMonitors) { ++ // Simple recursive lock? ++ __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ __ beqz(t0, done); ++ } + -+ __ sd(zr, Address(xthread, JavaThread::vm_result_offset())); + -+ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); -+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); ++ // Must save x10 if if it is live now because cmpxchg must use it ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ save_native_result(masm, ret_type, stack_slots); ++ } + -+ // ------------- -+ // make sure all code is generated -+ masm->flush(); ++ if (!UseHeavyMonitors) { ++ // get address of the stack lock ++ __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ // get old displaced header ++ __ ld(old_hdr, Address(x10, 0)); + -+ // return the blob -+ return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); -+} ++ // Atomic swap old header if oop still contains the stack lock ++ Label succeed; ++ __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock); ++ __ bind(succeed); ++ } else { ++ __ j(slow_path_unlock); ++ } + -+#ifdef COMPILER2 -+//------------------------------generate_exception_blob--------------------------- -+// creates exception blob at the end -+// Using exception blob, this code is jumped from a compiled method. -+// (see emit_exception_handler in riscv.ad file) -+// -+// Given an exception pc at a call we call into the runtime for the -+// handler in this method. This handler might merely restore state -+// (i.e. callee save registers) unwind the frame and jump to the -+// exception handler for the nmethod if there is no Java level handler -+// for the nmethod. -+// -+// This code is entered with a jmp. -+// -+// Arguments: -+// x10: exception oop -+// x13: exception pc -+// -+// Results: -+// x10: exception oop -+// x13: exception pc in caller -+// destination: exception handler of caller -+// -+// Note: the exception pc MUST be at a call (precise debug information) -+// Registers x10, x13, x12, x14, x15, t0 are not callee saved. -+// ++ // slow path re-enters here ++ __ bind(unlock_done); ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } + -+void OptoRuntime::generate_exception_blob() { -+ assert(!OptoRuntime::is_callee_saved_register(R13_num), ""); -+ assert(!OptoRuntime::is_callee_saved_register(R10_num), ""); -+ assert(!OptoRuntime::is_callee_saved_register(R12_num), ""); ++ __ bind(done); ++ } + -+ assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); ++ Label dtrace_method_exit, dtrace_method_exit_done; ++ { ++ int32_t offset = 0; ++ __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset); ++ __ lbu(t0, Address(t0, offset)); ++ __ bnez(t0, dtrace_method_exit); ++ __ bind(dtrace_method_exit_done); ++ } + -+ // Allocate space for the code -+ ResourceMark rm; -+ // Setup code generation tools -+ CodeBuffer buffer("exception_blob", 2048, 1024); -+ MacroAssembler* masm = new MacroAssembler(&buffer); -+ assert_cond(masm != NULL); ++ __ reset_last_Java_frame(false); + -+ // TODO check various assumptions made here -+ // -+ // make sure we do so before running this ++ // Unbox oop result, e.g. JNIHandles::resolve result. ++ if (is_reference_type(ret_type)) { ++ __ resolve_jobject(x10, xthread, t1); ++ } + -+ address start = __ pc(); ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset())); ++ } + -+ // push fp and retaddr by hand -+ // Exception pc is 'return address' for stack walker -+ __ addi(sp, sp, -2 * wordSize); -+ __ sd(ra, Address(sp, wordSize)); -+ __ sd(fp, Address(sp)); -+ // there are no callee save registers and we don't expect an -+ // arg reg save area -+#ifndef PRODUCT -+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); -+#endif -+ // Store exception in Thread object. We cannot pass any arguments to the -+ // handle_exception call, since we do not want to make any assumption -+ // about the size of the frame where the exception happened in. -+ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); -+ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ // reset handle block ++ __ ld(x12, Address(xthread, JavaThread::active_handles_offset())); ++ __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes())); + -+ // This call does all the hard work. It checks if an exception handler -+ // exists in the method. -+ // If so, it returns the handler address. -+ // If not, it prepares for stack-unwinding, restoring the callee-save -+ // registers of the frame being removed. -+ // -+ // address OptoRuntime::handle_exception_C(JavaThread* thread) -+ // -+ // n.b. 1 gp arg, 0 fp args, integral return type ++ __ leave(); + -+ // the stack should always be aligned -+ address the_pc = __ pc(); -+ __ set_last_Java_frame(sp, noreg, the_pc, t0); -+ __ mv(c_rarg0, xthread); -+ int32_t offset = 0; -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset); -+ __ jalr(x1, t0, offset); ++ // Any exception pending? ++ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ bnez(t0, exception_pending); + -+ // Set an oopmap for the call site. This oopmap will only be used if we -+ // are unwinding the stack. Hence, all locations will be dead. -+ // Callee-saved registers will be the same as the frame above (i.e., -+ // handle_exception_stub), since they were restored when we got the -+ // exception. ++ // We're done ++ __ ret(); + -+ OopMapSet* oop_maps = new OopMapSet(); -+ assert_cond(oop_maps != NULL); ++ // Unexpected paths are out of line and go here + -+ oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); ++ // forward the exception ++ __ bind(exception_pending); + -+ __ reset_last_Java_frame(false); ++ // and forward the exception ++ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + -+ // Restore callee-saved registers ++ // Slow path locking & unlocking ++ if (method->is_synchronized()) { + -+ // fp is an implicitly saved callee saved register (i.e. the calling -+ // convention will save restore it in prolog/epilog) Other than that -+ // there are no callee save registers now that adapter frames are gone. -+ // and we dont' expect an arg reg save area -+ __ ld(fp, Address(sp)); -+ __ ld(x13, Address(sp, wordSize)); -+ __ addi(sp, sp , 2 * wordSize); ++ __ block_comment("Slow path lock {"); ++ __ bind(slow_path_lock); + -+ // x10: exception handler ++ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM ++ // args are (oop obj, BasicLock* lock, JavaThread* thread) + -+ // We have a handler in x10 (could be deopt blob). -+ __ mv(t0, x10); ++ // protect the args we've loaded ++ save_args(masm, total_c_args, c_arg, out_regs); ++ ++ __ mv(c_rarg0, obj_reg); ++ __ mv(c_rarg1, lock_reg); ++ __ mv(c_rarg2, xthread); ++ ++ // Not a leaf but we have last_Java_frame setup as we want ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); ++ restore_args(masm, total_c_args, c_arg, out_regs); + -+ // Get the exception oop -+ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); -+ // Get the exception pc in case we are deoptimized -+ __ ld(x14, Address(xthread, JavaThread::exception_pc_offset())); +#ifdef ASSERT -+ __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset())); -+ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); ++ { Label L; ++ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ beqz(t0, L); ++ __ stop("no pending exception allowed on exit from monitorenter"); ++ __ bind(L); ++ } +#endif -+ // Clear the exception oop so GC no longer processes it as a root. -+ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset())); ++ __ j(lock_done); + -+ // x10: exception oop -+ // t0: exception handler -+ // x14: exception pc -+ // Jump to handler ++ __ block_comment("} Slow path lock"); + -+ __ jr(t0); ++ __ block_comment("Slow path unlock {"); ++ __ bind(slow_path_unlock); + -+ // Make sure all code is generated -+ masm->flush(); ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE) { ++ save_native_result(masm, ret_type, stack_slots); ++ } + -+ // Set exception blob -+ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); -+} -+#endif // COMPILER2 -diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -new file mode 100644 -index 000000000..c5b3b094c ---- /dev/null -+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -@@ -0,0 +1,3743 @@ -+/* -+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ __ mv(c_rarg2, xthread); ++ __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ __ mv(c_rarg0, obj_reg); + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "gc/shared/barrierSet.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" -+#include "interpreter/interpreter.hpp" -+#include "nativeInst_riscv.hpp" -+#include "oops/instanceOop.hpp" -+#include "oops/method.hpp" -+#include "oops/objArrayKlass.hpp" -+#include "oops/oop.inline.hpp" -+#include "prims/methodHandles.hpp" -+#include "runtime/frame.inline.hpp" -+#include "runtime/handles.inline.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/stubCodeGenerator.hpp" -+#include "runtime/stubRoutines.hpp" -+#include "runtime/thread.inline.hpp" -+#include "utilities/align.hpp" -+#ifdef COMPILER2 -+#include "opto/runtime.hpp" -+#endif ++ // Save pending exception around call to VM (which contains an EXCEPTION_MARK) ++ // NOTE that obj_reg == x9 currently ++ __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + ++ rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); + -+// Declaration and definition of StubGenerator (no .hpp file). -+// For a more detailed description of the stub routine structure -+// see the comment in stubRoutines.hpp ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ beqz(t0, L); ++ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); ++ __ bind(L); ++ } ++#endif /* ASSERT */ + -+#undef __ -+#define __ _masm-> ++ __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + -+#ifdef PRODUCT -+#define BLOCK_COMMENT(str) /* nothing */ -+#else -+#define BLOCK_COMMENT(str) __ block_comment(str) -+#endif ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ __ j(unlock_done); + -+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ __ block_comment("} Slow path unlock"); + -+// Stub Code definitions ++ } // synchronized + -+class StubGenerator: public StubCodeGenerator { -+ private: ++ // SLOW PATH Reguard the stack if needed + -+#ifdef PRODUCT -+#define inc_counter_np(counter) ((void)0) -+#else -+ void inc_counter_np_(int& counter) { -+ __ la(t1, ExternalAddress((address)&counter)); -+ __ lwu(t0, Address(t1, 0)); -+ __ addiw(t0, t0, 1); -+ __ sw(t0, Address(t1, 0)); -+ } -+#define inc_counter_np(counter) \ -+ BLOCK_COMMENT("inc_counter " #counter); \ -+ inc_counter_np_(counter); -+#endif ++ __ bind(reguard); ++ save_native_result(masm, ret_type, stack_slots); ++ rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); ++ restore_native_result(masm, ret_type, stack_slots); ++ // and continue ++ __ j(reguard_done); + -+ // Call stubs are used to call Java from C -+ // -+ // Arguments: -+ // c_rarg0: call wrapper address address -+ // c_rarg1: result address -+ // c_rarg2: result type BasicType -+ // c_rarg3: method Method* -+ // c_rarg4: (interpreter) entry point address -+ // c_rarg5: parameters intptr_t* -+ // c_rarg6: parameter size (in words) int -+ // c_rarg7: thread Thread* -+ // -+ // There is no return from the stub itself as any Java result -+ // is written to result -+ // -+ // we save x1 (ra) as the return PC at the base of the frame and -+ // link x8 (fp) below it as the frame pointer installing sp (x2) -+ // into fp. -+ // -+ // we save x10-x17, which accounts for all the c arguments. -+ // -+ // TODO: strictly do we need to save them all? they are treated as -+ // volatile by C so could we omit saving the ones we are going to -+ // place in global registers (thread? method?) or those we only use -+ // during setup of the Java call? -+ // -+ // we don't need to save x5 which C uses as an indirect result location -+ // return register. -+ // -+ // we don't need to save x6-x7 and x28-x31 which both C and Java treat as -+ // volatile -+ // -+ // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary -+ // registers and C expects to be callee-save -+ // -+ // so the stub frame looks like this when we enter Java code -+ // -+ // [ return_from_Java ] <--- sp -+ // [ argument word n ] -+ // ... -+ // -34 [ argument word 1 ] -+ // -33 [ saved f27 ] <--- sp_after_call -+ // -32 [ saved f26 ] -+ // -31 [ saved f25 ] -+ // -30 [ saved f24 ] -+ // -29 [ saved f23 ] -+ // -28 [ saved f22 ] -+ // -27 [ saved f21 ] -+ // -26 [ saved f20 ] -+ // -25 [ saved f19 ] -+ // -24 [ saved f18 ] -+ // -23 [ saved f9 ] -+ // -22 [ saved f8 ] -+ // -21 [ saved x27 ] -+ // -20 [ saved x26 ] -+ // -19 [ saved x25 ] -+ // -18 [ saved x24 ] -+ // -17 [ saved x23 ] -+ // -16 [ saved x22 ] -+ // -15 [ saved x21 ] -+ // -14 [ saved x20 ] -+ // -13 [ saved x19 ] -+ // -12 [ saved x18 ] -+ // -11 [ saved x9 ] -+ // -10 [ call wrapper (x10) ] -+ // -9 [ result (x11) ] -+ // -8 [ result type (x12) ] -+ // -7 [ method (x13) ] -+ // -6 [ entry point (x14) ] -+ // -5 [ parameters (x15) ] -+ // -4 [ parameter size (x16) ] -+ // -3 [ thread (x17) ] -+ // -2 [ saved fp (x8) ] -+ // -1 [ saved ra (x1) ] -+ // 0 [ ] <--- fp == saved sp (x2) ++ // SLOW PATH safepoint ++ { ++ __ block_comment("safepoint {"); ++ __ bind(safepoint_in_progress); + -+ // Call stub stack layout word offsets from fp -+ enum call_stub_layout { -+ sp_after_call_off = -33, ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // ++ save_native_result(masm, ret_type, stack_slots); ++ __ mv(c_rarg0, xthread); ++#ifndef PRODUCT ++ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#endif ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset); ++ __ jalr(x1, t0, offset); + -+ f27_off = -33, -+ f26_off = -32, -+ f25_off = -31, -+ f24_off = -30, -+ f23_off = -29, -+ f22_off = -28, -+ f21_off = -27, -+ f20_off = -26, -+ f19_off = -25, -+ f18_off = -24, -+ f9_off = -23, -+ f8_off = -22, ++ // Restore any method result value ++ restore_native_result(masm, ret_type, stack_slots); + -+ x27_off = -21, -+ x26_off = -20, -+ x25_off = -19, -+ x24_off = -18, -+ x23_off = -17, -+ x22_off = -16, -+ x21_off = -15, -+ x20_off = -14, -+ x19_off = -13, -+ x18_off = -12, -+ x9_off = -11, ++ __ j(safepoint_in_progress_done); ++ __ block_comment("} safepoint"); ++ } + -+ call_wrapper_off = -10, -+ result_off = -9, -+ result_type_off = -8, -+ method_off = -7, -+ entry_point_off = -6, -+ parameters_off = -5, -+ parameter_size_off = -4, -+ thread_off = -3, -+ fp_f = -2, -+ retaddr_off = -1, -+ }; ++ // SLOW PATH dtrace support ++ { ++ __ block_comment("dtrace entry {"); ++ __ bind(dtrace_method_entry); + -+ address generate_call_stub(address& return_address) { -+ assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 && -+ (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, -+ "adjust this code"); ++ // We have all of the arguments setup at this point. We must not touch any register ++ // argument registers at this point (what if we save/restore them there are no oop? + -+ StubCodeMark mark(this, "StubRoutines", "call_stub"); -+ address start = __ pc(); ++ save_args(masm, total_c_args, c_arg, out_regs); ++ __ mov_metadata(c_rarg1, method()); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ xthread, c_rarg1); ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ __ j(dtrace_method_entry_done); ++ __ block_comment("} dtrace entry"); ++ } + -+ const Address sp_after_call (fp, sp_after_call_off * wordSize); ++ { ++ __ block_comment("dtrace exit {"); ++ __ bind(dtrace_method_exit); ++ save_native_result(masm, ret_type, stack_slots); ++ __ mov_metadata(c_rarg1, method()); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ xthread, c_rarg1); ++ restore_native_result(masm, ret_type, stack_slots); ++ __ j(dtrace_method_exit_done); ++ __ block_comment("} dtrace exit"); ++ } + -+ const Address call_wrapper (fp, call_wrapper_off * wordSize); -+ const Address result (fp, result_off * wordSize); -+ const Address result_type (fp, result_type_off * wordSize); -+ const Address method (fp, method_off * wordSize); -+ const Address entry_point (fp, entry_point_off * wordSize); -+ const Address parameters (fp, parameters_off * wordSize); -+ const Address parameter_size(fp, parameter_size_off * wordSize); ++ __ flush(); + -+ const Address thread (fp, thread_off * wordSize); ++ nmethod *nm = nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), ++ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), ++ oop_maps); ++ assert(nm != NULL, "create native nmethod fail!"); ++ return nm; ++} + -+ const Address f27_save (fp, f27_off * wordSize); -+ const Address f26_save (fp, f26_off * wordSize); -+ const Address f25_save (fp, f25_off * wordSize); -+ const Address f24_save (fp, f24_off * wordSize); -+ const Address f23_save (fp, f23_off * wordSize); -+ const Address f22_save (fp, f22_off * wordSize); -+ const Address f21_save (fp, f21_off * wordSize); -+ const Address f20_save (fp, f20_off * wordSize); -+ const Address f19_save (fp, f19_off * wordSize); -+ const Address f18_save (fp, f18_off * wordSize); -+ const Address f9_save (fp, f9_off * wordSize); -+ const Address f8_save (fp, f8_off * wordSize); ++// this function returns the adjust size (in number of words) to a c2i adapter ++// activation for use during deoptimization ++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { ++ assert(callee_locals >= callee_parameters, ++ "test and remove; got more parms than locals"); ++ if (callee_locals < callee_parameters) { ++ return 0; // No adjustment for negative locals ++ } ++ int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords; ++ // diff is counted in stack words ++ return align_up(diff, 2); ++} + -+ const Address x27_save (fp, x27_off * wordSize); -+ const Address x26_save (fp, x26_off * wordSize); -+ const Address x25_save (fp, x25_off * wordSize); -+ const Address x24_save (fp, x24_off * wordSize); -+ const Address x23_save (fp, x23_off * wordSize); -+ const Address x22_save (fp, x22_off * wordSize); -+ const Address x21_save (fp, x21_off * wordSize); -+ const Address x20_save (fp, x20_off * wordSize); -+ const Address x19_save (fp, x19_off * wordSize); -+ const Address x18_save (fp, x18_off * wordSize); ++//------------------------------generate_deopt_blob---------------------------- ++void SharedRuntime::generate_deopt_blob() { ++ // Allocate space for the code ++ ResourceMark rm; ++ // Setup code generation tools ++ int pad = 0; ++ CodeBuffer buffer("deopt_blob", 2048 + pad, 1024); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ int frame_size_in_words = -1; ++ OopMap* map = NULL; ++ OopMapSet *oop_maps = new OopMapSet(); ++ assert_cond(masm != NULL && oop_maps != NULL); ++ RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0); + -+ const Address x9_save (fp, x9_off * wordSize); ++ // ------------- ++ // This code enters when returning to a de-optimized nmethod. A return ++ // address has been pushed on the the stack, and return values are in ++ // registers. ++ // If we are doing a normal deopt then we were called from the patched ++ // nmethod from the point we returned to the nmethod. So the return ++ // address on the stack is wrong by NativeCall::instruction_size ++ // We will adjust the value so it looks like we have the original return ++ // address on the stack (like when we eagerly deoptimized). ++ // In the case of an exception pending when deoptimizing, we enter ++ // with a return address on the stack that points after the call we patched ++ // into the exception handler. We have the following register state from, ++ // e.g., the forward exception stub (see stubGenerator_riscv.cpp). ++ // x10: exception oop ++ // x9: exception handler ++ // x13: throwing pc ++ // So in this case we simply jam x13 into the useless return address and ++ // the stack looks just like we want. ++ // ++ // At this point we need to de-opt. We save the argument return ++ // registers. We call the first C routine, fetch_unroll_info(). This ++ // routine captures the return values and returns a structure which ++ // describes the current frame size and the sizes of all replacement frames. ++ // The current frame is compiled code and may contain many inlined ++ // functions, each with their own JVM state. We pop the current frame, then ++ // push all the new frames. Then we call the C routine unpack_frames() to ++ // populate these frames. Finally unpack_frames() returns us the new target ++ // address. Notice that callee-save registers are BLOWN here; they have ++ // already been captured in the vframeArray at the time the return PC was ++ // patched. ++ address start = __ pc(); ++ Label cont; + -+ // stub code ++ // Prolog for non exception case! + -+ address riscv_entry = __ pc(); ++ // Save everything in sight. ++ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + -+ // set up frame and move sp to end of save area -+ __ enter(); -+ __ addi(sp, fp, sp_after_call_off * wordSize); ++ // Normal deoptimization. Save exec mode for unpack_frames. ++ __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved ++ __ j(cont); + -+ // save register parameters and Java temporary/global registers -+ // n.b. we save thread even though it gets installed in -+ // xthread because we want to sanity check tp later -+ __ sd(c_rarg7, thread); -+ __ sw(c_rarg6, parameter_size); -+ __ sd(c_rarg5, parameters); -+ __ sd(c_rarg4, entry_point); -+ __ sd(c_rarg3, method); -+ __ sd(c_rarg2, result_type); -+ __ sd(c_rarg1, result); -+ __ sd(c_rarg0, call_wrapper); ++ int reexecute_offset = __ pc() - start; + -+ __ sd(x9, x9_save); ++ // Reexecute case ++ // return address is the pc describes what bci to do re-execute at + -+ __ sd(x18, x18_save); -+ __ sd(x19, x19_save); -+ __ sd(x20, x20_save); -+ __ sd(x21, x21_save); -+ __ sd(x22, x22_save); -+ __ sd(x23, x23_save); -+ __ sd(x24, x24_save); -+ __ sd(x25, x25_save); -+ __ sd(x26, x26_save); -+ __ sd(x27, x27_save); ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + -+ __ fsd(f8, f8_save); -+ __ fsd(f9, f9_save); -+ __ fsd(f18, f18_save); -+ __ fsd(f19, f19_save); -+ __ fsd(f20, f20_save); -+ __ fsd(f21, f21_save); -+ __ fsd(f22, f22_save); -+ __ fsd(f23, f23_save); -+ __ fsd(f24, f24_save); -+ __ fsd(f25, f25_save); -+ __ fsd(f26, f26_save); -+ __ fsd(f27, f27_save); ++ __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved ++ __ j(cont); + -+ // install Java thread in global register now we have saved -+ // whatever value it held -+ __ mv(xthread, c_rarg7); ++ int exception_offset = __ pc() - start; + -+ // And method -+ __ mv(xmethod, c_rarg3); ++ // Prolog for exception case + -+ // set up the heapbase register -+ __ reinit_heapbase(); ++ // all registers are dead at this entry point, except for x10, and ++ // x13 which contain the exception oop and exception pc ++ // respectively. Set them in TLS and fall thru to the ++ // unpack_with_exception_in_tls entry point. + -+#ifdef ASSERT -+ // make sure we have no pending exceptions -+ { -+ Label L; -+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); -+ __ beqz(t0, L); -+ __ stop("StubRoutines::call_stub: entered with pending exception"); -+ __ BIND(L); -+ } -+#endif -+ // pass parameters if any -+ __ mv(esp, sp); -+ __ slli(t0, c_rarg6, LogBytesPerWord); -+ __ sub(t0, sp, t0); // Move SP out of the way -+ __ andi(sp, t0, -2 * wordSize); ++ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); + -+ BLOCK_COMMENT("pass parameters if any"); -+ Label parameters_done; -+ // parameter count is still in c_rarg6 -+ // and parameter pointer identifying param 1 is in c_rarg5 -+ __ beqz(c_rarg6, parameters_done); ++ int exception_in_tls_offset = __ pc() - start; + -+ address loop = __ pc(); -+ __ ld(t0, c_rarg5, 0); -+ __ addi(c_rarg5, c_rarg5, wordSize); -+ __ addi(c_rarg6, c_rarg6, -1); -+ __ push_reg(t0); -+ __ bgtz(c_rarg6, loop); ++ // new implementation because exception oop is now passed in JavaThread + -+ __ BIND(parameters_done); ++ // Prolog for exception case ++ // All registers must be preserved because they might be used by LinearScan ++ // Exceptiop oop and throwing PC are passed in JavaThread ++ // tos: stack at point of call to method that threw the exception (i.e. only ++ // args are on the stack, no return address) + -+ // call Java entry -- passing methdoOop, and current sp -+ // xmethod: Method* -+ // x30: sender sp -+ BLOCK_COMMENT("call Java function"); -+ __ mv(x30, sp); -+ __ jalr(c_rarg4); ++ // The return address pushed by save_live_registers will be patched ++ // later with the throwing pc. The correct value is not available ++ // now because loading it from memory would destroy registers. + -+ // save current address for use by exception handling code ++ // NB: The SP at this point must be the SP of the method that is ++ // being deoptimized. Deoptimization assumes that the frame created ++ // here by save_live_registers is immediately below the method's SP. ++ // This is a somewhat fragile mechanism. + -+ return_address = __ pc(); ++ // Save everything in sight. ++ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + -+ // store result depending on type (everything that is not -+ // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) -+ // n.b. this assumes Java returns an integral result in x10 -+ // and a floating result in j_farg0 -+ __ ld(j_rarg2, result); -+ Label is_long, is_float, is_double, exit; -+ __ ld(j_rarg1, result_type); -+ __ mv(t0, (u1)T_OBJECT); -+ __ beq(j_rarg1, t0, is_long); -+ __ mv(t0, (u1)T_LONG); -+ __ beq(j_rarg1, t0, is_long); -+ __ mv(t0, (u1)T_FLOAT); -+ __ beq(j_rarg1, t0, is_float); -+ __ mv(t0, (u1)T_DOUBLE); -+ __ beq(j_rarg1, t0, is_double); ++ // Now it is safe to overwrite any register + -+ // handle T_INT case -+ __ sw(x10, Address(j_rarg2)); ++ // Deopt during an exception. Save exec mode for unpack_frames. ++ __ li(xcpool, Deoptimization::Unpack_exception); // callee-saved + -+ __ BIND(exit); ++ // load throwing pc from JavaThread and patch it as the return address ++ // of the current frame. Then clear the field in JavaThread + -+ // pop parameters -+ __ addi(esp, fp, sp_after_call_off * wordSize); ++ __ ld(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ __ sd(x13, Address(fp, frame::return_addr_offset * wordSize)); ++ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); + +#ifdef ASSERT -+ // verify that threads correspond -+ { -+ Label L, S; -+ __ ld(t0, thread); -+ __ bne(xthread, t0, S); -+ __ get_thread(t0); -+ __ beq(xthread, t0, L); -+ __ BIND(S); -+ __ stop("StubRoutines::call_stub: threads must correspond"); -+ __ BIND(L); -+ } -+#endif -+ -+ // restore callee-save registers -+ __ fld(f27, f27_save); -+ __ fld(f26, f26_save); -+ __ fld(f25, f25_save); -+ __ fld(f24, f24_save); -+ __ fld(f23, f23_save); -+ __ fld(f22, f22_save); -+ __ fld(f21, f21_save); -+ __ fld(f20, f20_save); -+ __ fld(f19, f19_save); -+ __ fld(f18, f18_save); -+ __ fld(f9, f9_save); -+ __ fld(f8, f8_save); -+ -+ __ ld(x27, x27_save); -+ __ ld(x26, x26_save); -+ __ ld(x25, x25_save); -+ __ ld(x24, x24_save); -+ __ ld(x23, x23_save); -+ __ ld(x22, x22_save); -+ __ ld(x21, x21_save); -+ __ ld(x20, x20_save); -+ __ ld(x19, x19_save); -+ __ ld(x18, x18_save); ++ // verify that there is really an exception oop in JavaThread ++ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ __ verify_oop(x10); + -+ __ ld(x9, x9_save); ++ // verify that there is no pending exception ++ Label no_pending_exception; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, no_pending_exception); ++ __ stop("must not have pending exception here"); ++ __ bind(no_pending_exception); ++#endif + -+ __ ld(c_rarg0, call_wrapper); -+ __ ld(c_rarg1, result); -+ __ ld(c_rarg2, result_type); -+ __ ld(c_rarg3, method); -+ __ ld(c_rarg4, entry_point); -+ __ ld(c_rarg5, parameters); -+ __ ld(c_rarg6, parameter_size); -+ __ ld(c_rarg7, thread); ++ __ bind(cont); + -+ // leave frame and return to caller -+ __ leave(); -+ __ ret(); ++ // Call C code. Need thread and this frame, but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. ++ // ++ // UnrollBlock* fetch_unroll_info(JavaThread* thread) + -+ // handle return types different from T_INT ++ // fetch_unroll_info needs to call last_java_frame(). + -+ __ BIND(is_long); -+ __ sd(x10, Address(j_rarg2, 0)); -+ __ j(exit); ++ Label retaddr; ++ __ set_last_Java_frame(sp, noreg, retaddr, t0); ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(t0, Address(xthread, ++ JavaThread::last_Java_fp_offset())); ++ __ beqz(t0, L); ++ __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ __ mv(c_rarg0, xthread); ++ __ mv(c_rarg1, xcpool); ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset); ++ __ jalr(x1, t0, offset); ++ __ bind(retaddr); + -+ __ BIND(is_float); -+ __ fsw(j_farg0, Address(j_rarg2, 0), t0); -+ __ j(exit); ++ // Need to have an oopmap that tells fetch_unroll_info where to ++ // find any register it might need. ++ oop_maps->add_gc_map(__ pc() - start, map); + -+ __ BIND(is_double); -+ __ fsd(j_farg0, Address(j_rarg2, 0), t0); -+ __ j(exit); ++ __ reset_last_Java_frame(false); + -+ return start; -+ } ++ // Load UnrollBlock* into x15 ++ __ mv(x15, x10); + -+ // Return point for a Java call if there's an exception thrown in -+ // Java code. The exception is caught and transformed into a -+ // pending exception stored in JavaThread that can be tested from -+ // within the VM. -+ // -+ // Note: Usually the parameters are removed by the callee. In case -+ // of an exception crossing an activation frame boundary, that is -+ // not the case if the callee is compiled code => need to setup the -+ // sp. -+ // -+ // x10: exception oop ++ __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); ++ Label noException; ++ __ li(t0, Deoptimization::Unpack_exception); ++ __ bne(xcpool, t0, noException); // Was exception pending? ++ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ __ ld(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset())); ++ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); + -+ address generate_catch_exception() { -+ StubCodeMark mark(this, "StubRoutines", "catch_exception"); -+ address start = __ pc(); ++ __ verify_oop(x10); + -+ // same as in generate_call_stub(): -+ const Address thread(fp, thread_off * wordSize); ++ // Overwrite the result registers with the exception results. ++ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); + -+#ifdef ASSERT -+ // verify that threads correspond -+ { -+ Label L, S; -+ __ ld(t0, thread); -+ __ bne(xthread, t0, S); -+ __ get_thread(t0); -+ __ beq(xthread, t0, L); -+ __ bind(S); -+ __ stop("StubRoutines::catch_exception: threads must correspond"); -+ __ bind(L); -+ } -+#endif ++ __ bind(noException); + -+ // set pending exception -+ __ verify_oop(x10); ++ // Only register save data is on the stack. ++ // Now restore the result registers. Everything else is either dead ++ // or captured in the vframeArray. + -+ __ sd(x10, Address(xthread, Thread::pending_exception_offset())); -+ __ mv(t0, (address)__FILE__); -+ __ sd(t0, Address(xthread, Thread::exception_file_offset())); -+ __ mv(t0, (int)__LINE__); -+ __ sw(t0, Address(xthread, Thread::exception_line_offset())); ++ // Restore fp result register ++ __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); ++ // Restore integer result register ++ __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); + -+ // complete return to VM -+ assert(StubRoutines::_call_stub_return_address != NULL, -+ "_call_stub_return_address must have been generated before"); -+ __ j(StubRoutines::_call_stub_return_address); ++ // Pop all of the register save area off the stack ++ __ add(sp, sp, frame_size_in_words * wordSize); + -+ return start; -+ } ++ // All of the register save area has been popped of the stack. Only the ++ // return address remains. + -+ // Continuation point for runtime calls returning with a pending -+ // exception. The pending exception check happened in the runtime -+ // or native call stub. The pending exception in Thread is -+ // converted into a Java-level exception. ++ // Pop all the frames we must move/replace. + // -+ // Contract with Java-level exception handlers: -+ // x10: exception -+ // x13: throwing pc ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: caller of deopting frame (could be compiled/interpreted). + // -+ // NOTE: At entry of this stub, exception-pc must be in RA !! -+ -+ // NOTE: this is always used as a jump target within generated code -+ // so it just needs to be generated code with no x86 prolog -+ -+ address generate_forward_exception() { -+ StubCodeMark mark(this, "StubRoutines", "forward exception"); -+ address start = __ pc(); ++ // Note: by leaving the return address of self-frame on the stack ++ // and using the size of frame 2 to adjust the stack ++ // when we are done the return to frame 3 will still be on the stack. + -+ // Upon entry, RA points to the return address returning into -+ // Java (interpreted or compiled) code; i.e., the return address -+ // becomes the throwing pc. -+ // -+ // Arguments pushed before the runtime call are still on the stack -+ // but the exception handler will reset the stack pointer -> -+ // ignore them. A potential result in registers can be ignored as -+ // well. ++ // Pop deoptimized frame ++ __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); ++ __ sub(x12, x12, 2 * wordSize); ++ __ add(sp, sp, x12); ++ __ ld(fp, Address(sp, 0)); ++ __ ld(ra, Address(sp, wordSize)); ++ __ addi(sp, sp, 2 * wordSize); ++ // RA should now be the return address to the caller (3) + +#ifdef ASSERT -+ // make sure this code is only executed if there is a pending exception -+ { -+ Label L; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ bnez(t0, L); -+ __ stop("StubRoutines::forward exception: no pending exception (1)"); -+ __ bind(L); -+ } ++ // Compilers generate code that bang the stack by as much as the ++ // interpreter would need. So this stack banging should never ++ // trigger a fault. Verify that it does not on non product builds. ++ __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); ++ __ bang_stack_size(x9, x12); +#endif ++ // Load address of array of frame pcs into x12 ++ __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); + -+ // compute exception handler into x9 -+ -+ // call the VM to find the handler address associated with the -+ // caller address. pass thread in x10 and caller pc (ret address) -+ // in x11. n.b. the caller pc is in ra, unlike x86 where it is on -+ // the stack. -+ __ mv(c_rarg1, ra); -+ // ra will be trashed by the VM call so we move it to x9 -+ // (callee-saved) because we also need to pass it to the handler -+ // returned by this call. -+ __ mv(x9, ra); -+ BLOCK_COMMENT("call exception_handler_for_return_address"); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, -+ SharedRuntime::exception_handler_for_return_address), -+ xthread, c_rarg1); -+ // we should not really care that ra is no longer the callee -+ // address. we saved the value the handler needs in x9 so we can -+ // just copy it to x13. however, the C2 handler will push its own -+ // frame and then calls into the VM and the VM code asserts that -+ // the PC for the frame above the handler belongs to a compiled -+ // Java method. So, we restore ra here to satisfy that assert. -+ __ mv(ra, x9); -+ // setup x10 & x13 & clear pending exception -+ __ mv(x13, x9); -+ __ mv(x9, x10); -+ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); -+ __ sd(zr, Address(xthread, Thread::pending_exception_offset())); ++ // Load address of array of frame sizes into x14 ++ __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); + -+#ifdef ASSERT -+ // make sure exception is set -+ { -+ Label L; -+ __ bnez(x10, L); -+ __ stop("StubRoutines::forward exception: no pending exception (2)"); -+ __ bind(L); -+ } -+#endif ++ // Load counter into x13 ++ __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); + -+ // continue at exception handler -+ // x10: exception -+ // x13: throwing pc -+ // x9: exception handler -+ __ verify_oop(x10); -+ __ jr(x9); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. + -+ return start; -+ } ++ const Register sender_sp = x16; + -+ // Non-destructive plausibility checks for oops -+ // -+ // Arguments: -+ // x10: oop to verify -+ // t0: error message -+ // -+ // Stack after saving c_rarg3: -+ // [tos + 0]: saved c_rarg3 -+ // [tos + 1]: saved c_rarg2 -+ // [tos + 2]: saved ra -+ // [tos + 3]: saved t1 -+ // [tos + 4]: saved x10 -+ // [tos + 5]: saved t0 -+ address generate_verify_oop() { ++ __ mv(sender_sp, sp); ++ __ lwu(x9, Address(x15, ++ Deoptimization::UnrollBlock:: ++ caller_adjustment_offset_in_bytes())); ++ __ sub(sp, sp, x9); + -+ StubCodeMark mark(this, "StubRoutines", "verify_oop"); -+ address start = __ pc(); ++ // Push interpreter frames in a loop ++ __ li(t0, 0xDEADDEAD); // Make a recognizable pattern ++ __ mv(t1, t0); ++ Label loop; ++ __ bind(loop); ++ __ ld(x9, Address(x14, 0)); // Load frame size ++ __ addi(x14, x14, wordSize); ++ __ sub(x9, x9, 2 * wordSize); // We'll push pc and fp by hand ++ __ ld(ra, Address(x12, 0)); // Load pc ++ __ addi(x12, x12, wordSize); ++ __ enter(); // Save old & set new fp ++ __ sub(sp, sp, x9); // Prolog ++ // This value is corrected by layout_activation_impl ++ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable ++ __ mv(sender_sp, sp); // Pass sender_sp to next frame ++ __ addi(x13, x13, -1); // Decrement counter ++ __ bnez(x13, loop); + -+ Label exit, error; ++ // Re-push self-frame ++ __ ld(ra, Address(x12)); ++ __ enter(); + -+ __ push_reg(RegSet::of(c_rarg2, c_rarg3), sp); // save c_rarg2 and c_rarg3 ++ // Allocate a full sized register save area. We subtract 2 because ++ // enter() just pushed 2 words ++ __ sub(sp, sp, (frame_size_in_words - 2) * wordSize); + -+ __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr())); -+ __ ld(c_rarg3, Address(c_rarg2)); -+ __ add(c_rarg3, c_rarg3, 1); -+ __ sd(c_rarg3, Address(c_rarg2)); ++ // Restore frame locals after moving the frame ++ __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); ++ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); + -+ // object is in x10 -+ // make sure object is 'reasonable' -+ __ beqz(x10, exit); // if obj is NULL it is OK ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // restore return values to their stack-slots with the new SP. ++ // ++ // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) + -+ // Check if the oop is in the right area of memory -+ __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask()); -+ __ andr(c_rarg2, x10, c_rarg3); -+ __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits()); ++ // Use fp because the frames look interpreted now ++ // Don't need the precise return PC here, just precise enough to point into this code blob. ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(sp, fp, the_pc, t0); + -+ // Compare c_rarg2 and c_rarg3 -+ __ bne(c_rarg2, c_rarg3, error); ++ __ mv(c_rarg0, xthread); ++ __ mv(c_rarg1, xcpool); // second arg: exec_mode ++ offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset); ++ __ jalr(x1, t0, offset); + -+ // make sure klass is 'reasonable', which is not zero. -+ __ load_klass(x10, x10); // get klass -+ __ beqz(x10, error); // if klass is NULL it is broken ++ // Set an oopmap for the call site ++ // Use the same PC we used for the last java frame ++ oop_maps->add_gc_map(the_pc - start, ++ new OopMap(frame_size_in_words, 0)); + -+ // return if everything seems ok -+ __ bind(exit); ++ // Clear fp AND pc ++ __ reset_last_Java_frame(true); + -+ __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3 -+ __ ret(); ++ // Collect return values ++ __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); ++ __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); + -+ // handle errors -+ __ bind(error); -+ __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3 ++ // Pop self-frame. ++ __ leave(); // Epilog + -+ __ push_reg(RegSet::range(x0, x31), sp); -+ // prepare parameters for debug64, c_rarg0: address of error message, -+ // c_rarg1: return address, c_rarg2: address of regs on stack -+ __ mv(c_rarg0, t0); // pass address of error message -+ __ mv(c_rarg1, ra); // pass return address -+ __ mv(c_rarg2, sp); // pass address of regs on stack -+#ifndef PRODUCT -+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); -+#endif -+ BLOCK_COMMENT("call MacroAssembler::debug"); -+ int32_t offset = 0; -+ __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset); -+ __ jalr(x1, t0, offset); ++ // Jump to interpreter ++ __ ret(); + -+ return start; -+ } ++ // Make sure all code is generated ++ masm->flush(); + -+ // The inner part of zero_words(). -+ // -+ // Inputs: -+ // x28: the HeapWord-aligned base address of an array to zero. -+ // x29: the count in HeapWords, x29 > 0. -+ // -+ // Returns x28 and x29, adjusted for the caller to clear. -+ // x28: the base address of the tail of words left to clear. -+ // x29: the number of words in the tail. -+ // x29 < MacroAssembler::zero_words_block_size. ++ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); ++ assert(_deopt_blob != NULL, "create deoptimization blob fail!"); ++ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); ++} + -+ address generate_zero_blocks() { -+ Label done; ++// Number of stack slots between incoming argument block and the start of ++// a new frame. The PROLOG must add this many slots to the stack. The ++// EPILOG must remove this many slots. ++// RISCV needs two words for RA (return address) and FP (frame pointer). ++uint SharedRuntime::in_preserve_stack_slots() { ++ return 2 * VMRegImpl::slots_per_word; ++} + -+ const Register base = x28, cnt = x29; ++uint SharedRuntime::out_preserve_stack_slots() { ++ return 0; ++} + -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", "zero_blocks"); -+ address start = __ pc(); ++#ifdef COMPILER2 ++//------------------------------generate_uncommon_trap_blob-------------------- ++void SharedRuntime::generate_uncommon_trap_blob() { ++ // Allocate space for the code ++ ResourceMark rm; ++ // Setup code generation tools ++ CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ assert_cond(masm != NULL); + -+ { -+ // Clear the remaining blocks. -+ Label loop; -+ __ sub(cnt, cnt, MacroAssembler::zero_words_block_size); -+ __ bltz(cnt, done); -+ __ bind(loop); -+ for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) { -+ __ sd(zr, Address(base, 0)); -+ __ add(base, base, 8); -+ } -+ __ sub(cnt, cnt, MacroAssembler::zero_words_block_size); -+ __ bgez(cnt, loop); -+ __ bind(done); -+ __ add(cnt, cnt, MacroAssembler::zero_words_block_size); -+ } ++ assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); + -+ __ ret(); ++ address start = __ pc(); + -+ return start; -+ } ++ // Push self-frame. We get here with a return address in RA ++ // and sp should be 16 byte aligned ++ // push fp and retaddr by hand ++ __ addi(sp, sp, -2 * wordSize); ++ __ sd(ra, Address(sp, wordSize)); ++ __ sd(fp, Address(sp, 0)); ++ // we don't expect an arg reg save area ++#ifndef PRODUCT ++ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#endif ++ // compiler left unloaded_class_index in j_rarg0 move to where the ++ // runtime expects it. ++ __ addiw(c_rarg1, j_rarg0, 0); + -+ typedef void (MacroAssembler::*copy_insn)(Register R1, Register R2, const int32_t offset); ++ // we need to set the past SP to the stack pointer of the stub frame ++ // and the pc to the address where this runtime call will return ++ // although actually any pc in this code blob will do). ++ Label retaddr; ++ __ set_last_Java_frame(sp, noreg, retaddr, t0); + -+ void copy_by_step(RegSet tmp_regs, Register src, Register dst, -+ unsigned unroll_factor, int unit) { -+ unsigned char regs[32]; -+ int offset = unit < 0 ? unit : 0; ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // capture callee-saved registers as well as return values. ++ // ++ // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode) ++ // ++ // n.b. 3 gp args, 0 fp args, integral return type + -+ // Scan bitset to get tmp regs -+ unsigned int regsSize = 0; -+ unsigned bitset = tmp_regs.bits(); ++ __ mv(c_rarg0, xthread); ++ __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap); ++ int32_t offset = 0; ++ __ la_patchable(t0, ++ RuntimeAddress(CAST_FROM_FN_PTR(address, ++ Deoptimization::uncommon_trap)), offset); ++ __ jalr(x1, t0, offset); ++ __ bind(retaddr); + -+ assert(((bitset & (1 << (src->encoding()))) == 0), "src should not in tmp regs"); -+ assert(((bitset & (1 << (dst->encoding()))) == 0), "dst should not in tmp regs"); ++ // Set an oopmap for the call site ++ OopMapSet* oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); ++ assert_cond(oop_maps != NULL && map != NULL); + -+ for (int reg = 31; reg >= 0; reg--) { -+ if ((1U << 31) & bitset) { -+ regs[regsSize++] = reg; -+ } -+ bitset <<= 1; -+ } ++ // location of fp is known implicitly by the frame sender code + -+ copy_insn ld_arr = NULL, st_arr = NULL; -+ switch (abs(unit)) { -+ case 1 : -+ ld_arr = (copy_insn)&MacroAssembler::lbu; -+ st_arr = (copy_insn)&MacroAssembler::sb; -+ break; -+ case BytesPerShort : -+ ld_arr = (copy_insn)&MacroAssembler::lhu; -+ st_arr = (copy_insn)&MacroAssembler::sh; -+ break; -+ case BytesPerInt : -+ ld_arr = (copy_insn)&MacroAssembler::lwu; -+ st_arr = (copy_insn)&MacroAssembler::sw; -+ break; -+ case BytesPerLong : -+ ld_arr = (copy_insn)&MacroAssembler::ld; -+ st_arr = (copy_insn)&MacroAssembler::sd; -+ break; -+ default : -+ ShouldNotReachHere(); -+ } ++ oop_maps->add_gc_map(__ pc() - start, map); + -+ for (unsigned i = 0; i < unroll_factor; i++) { -+ (_masm->*ld_arr)(as_Register(regs[0]), src, i * unit + offset); -+ (_masm->*st_arr)(as_Register(regs[0]), dst, i * unit + offset); -+ } ++ __ reset_last_Java_frame(false); + -+ __ addi(src, src, unroll_factor * unit); -+ __ addi(dst, dst, unroll_factor * unit); -+ } ++ // move UnrollBlock* into x14 ++ __ mv(x14, x10); + -+ void copy_tail(Register src, Register dst, Register count_in_bytes, Register tmp, -+ int ele_size, unsigned align_unit) { -+ bool is_backwards = ele_size < 0; -+ size_t granularity = uabs(ele_size); -+ for (unsigned unit = (align_unit >> 1); unit >= granularity; unit >>= 1) { -+ int offset = is_backwards ? (int)(-unit) : unit; -+ Label exit; -+ __ andi(tmp, count_in_bytes, unit); -+ __ beqz(tmp, exit); -+ copy_by_step(RegSet::of(tmp), src, dst, /* unroll_factor */ 1, offset); -+ __ bind(exit); -+ } ++#ifdef ASSERT ++ { Label L; ++ __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); ++ __ mvw(t1, Deoptimization::Unpack_uncommon_trap); ++ __ beq(t0, t1, L); ++ __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); ++ __ bind(L); + } ++#endif + -+ void copy_loop8(Register src, Register dst, Register count_in_bytes, Register tmp, -+ int step, Label *Lcopy_small, Register loopsize = noreg) { -+ size_t granularity = uabs(step); -+ RegSet tmp_regs = RegSet::range(x13, x16); -+ assert_different_registers(src, dst, count_in_bytes, tmp); -+ -+ Label loop, copy2, copy1, finish; -+ if (loopsize == noreg) { -+ loopsize = t1; -+ __ mv(loopsize, 8 * granularity); -+ } -+ -+ // Cyclic copy with 8*step. -+ __ bind(loop); -+ { -+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 8, step); -+ __ sub(count_in_bytes, count_in_bytes, 8 * granularity); -+ __ bge(count_in_bytes, loopsize, loop); -+ } -+ -+ if (Lcopy_small != NULL) { -+ __ bind(*Lcopy_small); -+ } ++ // Pop all the frames we must move/replace. ++ // ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: caller of deopting frame (could be compiled/interpreted). + -+ // copy memory smaller than step * 8 bytes -+ __ andi(tmp, count_in_bytes, granularity << 2); -+ __ beqz(tmp, copy2); -+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 4, step); ++ __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog! + -+ __ bind(copy2); -+ __ andi(tmp, count_in_bytes, granularity << 1); -+ __ beqz(tmp, copy1); -+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 2, step); ++ // Pop deoptimized frame (int) ++ __ lwu(x12, Address(x14, ++ Deoptimization::UnrollBlock:: ++ size_of_deoptimized_frame_offset_in_bytes())); ++ __ sub(x12, x12, 2 * wordSize); ++ __ add(sp, sp, x12); ++ __ ld(fp, sp, 0); ++ __ ld(ra, sp, wordSize); ++ __ addi(sp, sp, 2 * wordSize); ++ // RA should now be the return address to the caller (3) frame + -+ __ bind(copy1); -+ __ andi(tmp, count_in_bytes, granularity); -+ __ beqz(tmp, finish); -+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 1, step); ++#ifdef ASSERT ++ // Compilers generate code that bang the stack by as much as the ++ // interpreter would need. So this stack banging should never ++ // trigger a fault. Verify that it does not on non product builds. ++ __ lwu(x11, Address(x14, ++ Deoptimization::UnrollBlock:: ++ total_frame_sizes_offset_in_bytes())); ++ __ bang_stack_size(x11, x12); ++#endif + -+ __ bind(finish); -+ } ++ // Load address of array of frame pcs into x12 (address*) ++ __ ld(x12, Address(x14, ++ Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); + -+ // Cyclic copy with one step. -+ void copy_loop1(Register src, Register dst, Register count_in_bytes, int step, Register loopsize = noreg) { -+ size_t granularity = uabs(step); -+ Label loop1; -+ if (loopsize == noreg) { -+ loopsize = t0; -+ __ mv(loopsize, granularity); -+ } ++ // Load address of array of frame sizes into x15 (intptr_t*) ++ __ ld(x15, Address(x14, ++ Deoptimization::UnrollBlock:: ++ frame_sizes_offset_in_bytes())); + -+ __ bind(loop1); -+ { -+ copy_by_step(RegSet::of(x13), src, dst, /* unroll_factor */ 1, step); -+ __ sub(count_in_bytes, count_in_bytes, granularity); -+ __ bge(count_in_bytes, loopsize, loop1); -+ } -+ } -+ -+ void align_unit(Register src, Register dst, Register count_in_bytes, -+ unsigned unit, bool is_backwards) { -+ Label skip; -+ __ andi(t0, dst, unit); -+ __ beqz(t0, skip); -+ copy_by_step(RegSet::of(t0), src, dst, 1, is_backwards ? -unit : unit); -+ __ sub(count_in_bytes, count_in_bytes, unit); -+ __ bind(skip); -+ } -+ -+ void copy_memory(bool is_align, Register s, Register d, Register count_in_elements, -+ Register tmp, int ele_step) { -+ -+ bool is_backwards = ele_step < 0; -+ unsigned int granularity = uabs(ele_step); -+ Label Lcopy_small, Ldone, Lcopy_ele, Laligned; -+ const Register count_in_bytes = x31, src = x28, dst = x29; -+ assert_different_registers(src, dst, count_in_elements, count_in_bytes, tmp, t1); -+ __ slli(count_in_bytes, count_in_elements, exact_log2(granularity)); -+ __ add(src, s, is_backwards ? count_in_bytes : zr); -+ __ add(dst, d, is_backwards ? count_in_bytes : zr); -+ -+ // if count_in_elements < 8, copy_small -+ __ mv(t0, 8); -+ if (is_align && granularity < BytesPerLong) { -+ __ blt(count_in_bytes, t0, Lcopy_small); -+ } else { -+ __ blt(count_in_elements, t0, Lcopy_small); -+ } -+ -+ if (granularity < BytesPerLong) { -+ Label Lcopy_aligned[3]; -+ Label Lalign8; -+ if (!is_align) { -+ Label Lalign_and_copy; -+ __ mv(t0, EagerArrayCopyThreshold); -+ __ blt(count_in_bytes, t0, Lalign_and_copy); -+ // Align dst to 8. -+ for (unsigned unit = granularity; unit <= 4; unit <<= 1) { -+ align_unit(src, dst, count_in_bytes, unit, is_backwards); -+ } ++ // Counter ++ __ lwu(x13, Address(x14, ++ Deoptimization::UnrollBlock:: ++ number_of_frames_offset_in_bytes())); // (int) + -+ Register shr = x30, shl = x7, tmp1 = x13; ++ // Now adjust the caller's stack to make up for the extra locals but ++ // record the original sp so that we can save it in the skeletal ++ // interpreter frame and the stack walking of interpreter_sender ++ // will get the unextended sp value and not the "real" sp value. + -+ __ andi(shr, src, 0x7); -+ __ beqz(shr, Lalign8); -+ { -+ // calculaute the shift for store doubleword -+ __ slli(shr, shr, 3); -+ __ sub(shl, shr, 64); -+ __ sub(shl, zr, shl); -+ -+ // alsrc: previous position of src octal alignment -+ Register alsrc = t1; -+ __ andi(alsrc, src, -8); -+ -+ // move src to tail -+ __ andi(t0, count_in_bytes, -8); -+ if (is_backwards) { -+ __ sub(src, src, t0); -+ } else { -+ __ add(src, src, t0); -+ } ++ const Register sender_sp = t1; // temporary register + -+ // prepare for copy_dstaligned_loop -+ __ ld(tmp1, alsrc, 0); -+ dst_aligned_copy_32bytes_loop(alsrc, dst, shr, shl, count_in_bytes, is_backwards); -+ __ mv(x17, 8); -+ __ blt(count_in_bytes, x17, Lcopy_small); -+ dst_aligned_copy_8bytes_loop(alsrc, dst, shr, shl, count_in_bytes, x17, is_backwards); -+ __ j(Lcopy_small); -+ } -+ __ j(Ldone); -+ __ bind(Lalign_and_copy); -+ -+ // Check src and dst could be 8/4/2 algined at the same time. If could, align the -+ // memory and copy by 8/4/2. -+ __ xorr(t1, src, dst); -+ -+ for (unsigned alignment = granularity << 1; alignment <= 8; alignment <<= 1) { -+ Label skip; -+ unsigned int unit = alignment >> 1; -+ // Check src and dst could be aligned to checkbyte at the same time -+ // if copy from src to dst. If couldn't, jump to label not_aligned. -+ __ andi(t0, t1, alignment - 1); -+ __ bnez(t0, Lcopy_aligned[exact_log2(unit)]); -+ // Align src and dst to unit. -+ align_unit(src, dst, count_in_bytes, unit, is_backwards); -+ } -+ } -+ __ bind(Lalign8); -+ for (unsigned step_size = 8; step_size > granularity; step_size >>= 1) { -+ // Copy memory by steps, which has been aligned to step_size. -+ Label loop8, Ltail; -+ int step = is_backwards ? (-step_size) : step_size; -+ if (!(step_size == 8 && is_align)) { // which has load 8 to t0 before -+ // Check whether the memory size is smaller than step_size. -+ __ mv(t0, step_size); -+ __ blt(count_in_bytes, t0, Ltail); -+ } -+ const Register eight_step = t1; -+ __ mv(eight_step, step_size * 8); -+ __ bge(count_in_bytes, eight_step, loop8); -+ // If memory is less than 8*step_size bytes, loop by step. -+ copy_loop1(src, dst, count_in_bytes, step, t0); -+ copy_tail(src, dst, count_in_bytes, tmp, ele_step, step_size); -+ __ j(Ldone); -+ -+ __ bind(loop8); -+ // If memory is greater than or equal to 8*step_size bytes, loop by step*8. -+ copy_loop8(src, dst, count_in_bytes, tmp, step, NULL, eight_step); -+ __ bind(Ltail); -+ copy_tail(src, dst, count_in_bytes, tmp, ele_step, step_size); -+ __ j(Ldone); -+ -+ __ bind(Lcopy_aligned[exact_log2(step_size >> 1)]); -+ } -+ } -+ // If the ele_step is greater than 8, or the memory src and dst cannot -+ // be aligned with a number greater than the value of step. -+ // Cyclic copy with 8*ele_step. -+ copy_loop8(src, dst, count_in_bytes, tmp, ele_step, &Lcopy_small, noreg); ++ __ lwu(x11, Address(x14, ++ Deoptimization::UnrollBlock:: ++ caller_adjustment_offset_in_bytes())); // (int) ++ __ mv(sender_sp, sp); ++ __ sub(sp, sp, x11); + -+ __ bind(Ldone); -+ } ++ // Push interpreter frames in a loop ++ Label loop; ++ __ bind(loop); ++ __ ld(x11, Address(x15, 0)); // Load frame size ++ __ sub(x11, x11, 2 * wordSize); // We'll push pc and fp by hand ++ __ ld(ra, Address(x12, 0)); // Save return address ++ __ enter(); // and old fp & set new fp ++ __ sub(sp, sp, x11); // Prolog ++ __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable ++ // This value is corrected by layout_activation_impl ++ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ __ mv(sender_sp, sp); // Pass sender_sp to next frame ++ __ add(x15, x15, wordSize); // Bump array pointer (sizes) ++ __ add(x12, x12, wordSize); // Bump array pointer (pcs) ++ __ subw(x13, x13, 1); // Decrement counter ++ __ bgtz(x13, loop); ++ __ ld(ra, Address(x12, 0)); // save final return address ++ // Re-push self-frame ++ __ enter(); // & old fp & set new fp + -+ void dst_aligned_copy_32bytes_loop(Register alsrc, Register dst, -+ Register shr, Register shl, -+ Register count_in_bytes, bool is_backwards) { -+ const Register tmp1 = x13, tmp2 = x14, tmp3 = x15, tmp4 = x16, thirty_two = x17; -+ const Register sll_reg1 = is_backwards ? tmp1 : tmp2, -+ srl_reg1 = is_backwards ? tmp2 : tmp1, -+ sll_reg2 = is_backwards ? tmp2 : tmp3, -+ srl_reg2 = is_backwards ? tmp3 : tmp2, -+ sll_reg3 = is_backwards ? tmp3 : tmp4, -+ srl_reg3 = is_backwards ? tmp4 : tmp3, -+ sll_reg4 = is_backwards ? tmp4 : tmp1, -+ srl_reg4 = is_backwards ? tmp1 : tmp4; -+ assert_different_registers(t0, thirty_two, alsrc, shr, shl); -+ int unit = is_backwards ? -wordSize : wordSize; -+ int offset = is_backwards ? -wordSize : 0; -+ Label loop, done; -+ -+ __ mv(thirty_two, 32); -+ __ blt(count_in_bytes, thirty_two, done); ++ // Use fp because the frames look interpreted now ++ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. ++ // Don't need the precise return PC here, just precise enough to point into this code blob. ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(sp, fp, the_pc, t0); + -+ __ bind(loop); -+ __ ld(tmp2, alsrc, unit); -+ __ sll(t0, sll_reg1, shl); -+ __ srl(tmp1, srl_reg1, shr); -+ __ orr(tmp1, tmp1, t0); -+ __ sd(tmp1, dst, offset); -+ -+ __ ld(tmp3, alsrc, unit * 2); -+ __ sll(t0, sll_reg2, shl); -+ __ srl(tmp2, srl_reg2, shr); -+ __ orr(tmp2, tmp2, t0); -+ __ sd(tmp2, dst, unit + offset); -+ -+ __ ld(tmp4, alsrc, unit * 3); -+ __ sll(t0, sll_reg3, shl); -+ __ srl(tmp3, srl_reg3, shr); -+ __ orr(tmp3, tmp3, t0); -+ __ sd(tmp3, dst, unit * 2 + offset); -+ -+ __ ld(tmp1, alsrc, unit * 4); -+ __ sll(t0, sll_reg4, shl); -+ __ srl(tmp4, srl_reg4, shr); -+ __ orr(tmp4, tmp4, t0); -+ __ sd(tmp4, dst, unit * 3 + offset); -+ -+ __ add(alsrc, alsrc, unit * 4); -+ __ add(dst, dst, unit * 4); -+ __ sub(count_in_bytes, count_in_bytes, 32); -+ __ bge(count_in_bytes, thirty_two, loop); ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // restore return values to their stack-slots with the new SP. ++ // ++ // BasicType unpack_frames(JavaThread* thread, int exec_mode) ++ // + -+ __ bind(done); -+ } ++ // n.b. 2 gp args, 0 fp args, integral return type + -+ void dst_aligned_copy_8bytes_loop(Register alsrc, Register dst, -+ Register shr, Register shl, -+ Register count_in_bytes, Register eight, -+ bool is_backwards) { -+ const Register tmp1 = x13, tmp2 = x14, tmp3 = x15, tmp4 = x16; -+ const Register sll_reg = is_backwards ? tmp1 : tmp2, -+ srl_reg = is_backwards ? tmp2 : tmp1; -+ assert_different_registers(t0, eight, alsrc, shr, shl); -+ Label loop; -+ int unit = is_backwards ? -wordSize : wordSize; ++ // sp should already be aligned ++ __ mv(c_rarg0, xthread); ++ __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap); ++ offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset); ++ __ jalr(x1, t0, offset); + -+ __ bind(loop); -+ __ ld(tmp2, alsrc, unit); -+ __ sll(t0, sll_reg, shl); -+ __ srl(tmp1, srl_reg, shr); -+ __ orr(t0, tmp1, t0); -+ __ sd(t0, dst, is_backwards ? unit : 0); -+ __ mv(tmp1, tmp2); -+ __ add(alsrc, alsrc, unit); -+ __ add(dst, dst, unit); -+ __ sub(count_in_bytes, count_in_bytes, 8); -+ __ bge(count_in_bytes, eight, loop); -+ } ++ // Set an oopmap for the call site ++ // Use the same PC we used for the last java frame ++ oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); + -+ // Scan over array at a for count oops, verifying each one. -+ // Preserves a and count, clobbers t0 and t1. -+ void verify_oop_array(int size, Register a, Register count, Register temp) { -+ Label loop, end; -+ __ mv(t1, zr); -+ __ slli(t0, count, exact_log2(size)); -+ __ bind(loop); -+ __ bgeu(t1, t0, end); ++ // Clear fp AND pc ++ __ reset_last_Java_frame(true); + -+ __ add(temp, a, t1); -+ if (size == wordSize) { -+ __ ld(temp, Address(temp, 0)); -+ __ verify_oop(temp); -+ } else { -+ __ lwu(temp, Address(temp, 0)); -+ __ decode_heap_oop(temp); // calls verify_oop -+ } -+ __ add(t1, t1, size); -+ __ j(loop); -+ __ bind(end); -+ } ++ // Pop self-frame. ++ __ leave(); // Epilog + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // is_oop - true => oop array, so generate store check code -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // -+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let -+ // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomically. -+ // -+ // Side Effects: -+ // disjoint_int_copy_entry is set to the no-overlap entry point -+ // used by generate_conjoint_int_oop_copy(). -+ // -+ address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry, -+ const char* name, bool dest_uninitialized = false) { -+ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; -+ RegSet saved_reg = RegSet::of(s, d, count); -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", name); -+ address start = __ pc(); -+ __ enter(); ++ // Jump to interpreter ++ __ ret(); + -+ if (entry != NULL) { -+ *entry = __ pc(); -+ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) -+ BLOCK_COMMENT("Entry:"); -+ } ++ // Make sure all code is generated ++ masm->flush(); + -+ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; -+ if (dest_uninitialized) { -+ decorators |= IS_DEST_UNINITIALIZED; -+ } -+ if (aligned) { -+ decorators |= ARRAYCOPY_ALIGNED; -+ } ++ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, ++ SimpleRuntimeFrame::framesize >> 1); ++} ++#endif // COMPILER2 + -+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg); ++//------------------------------generate_handler_blob------ ++// ++// Generate a special Compile2Runtime blob that saves all registers, ++// and setup oopmap. ++// ++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { ++ ResourceMark rm; ++ OopMapSet *oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ OopMap* map = NULL; + -+ if (is_oop) { -+ // save regs before copy_memory -+ __ push_reg(RegSet::of(d, count), sp); -+ } -+ copy_memory(aligned, s, d, count, t0, checked_cast(size)); ++ // Allocate space for the code. Setup code generation tools. ++ CodeBuffer buffer("handler_blob", 2048, 1024); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ assert_cond(masm != NULL); + -+ if (is_oop) { -+ __ pop_reg(RegSet::of(d, count), sp); -+ if (VerifyOops) { -+ verify_oop_array(checked_cast(size), d, count, t2); -+ } -+ } ++ address start = __ pc(); ++ address call_pc = NULL; ++ int frame_size_in_words = -1; ++ bool cause_return = (poll_type == POLL_AT_RETURN); ++ RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); + -+ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, saved_reg); ++ // Save Integer and Float registers. ++ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + -+ __ leave(); -+ __ mv(x10, zr); // return 0 -+ __ ret(); -+ return start; ++ // The following is basically a call_VM. However, we need the precise ++ // address of the call in order to generate an oopmap. Hence, we do all the ++ // work outselves. ++ ++ Label retaddr; ++ __ set_last_Java_frame(sp, noreg, retaddr, t0); ++ ++ // The return address must always be correct so that frame constructor never ++ // sees an invalid pc. ++ ++ if (!cause_return) { ++ // overwrite the return address pushed by save_live_registers ++ // Additionally, x18 is a callee-saved register so we can look at ++ // it later to determine if someone changed the return address for ++ // us! ++ __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset())); ++ __ sd(x18, Address(fp, frame::return_addr_offset * wordSize)); + } + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // is_oop - true => oop array, so generate store check code -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // -+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let -+ // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomically. -+ // -+ address generate_conjoint_copy(int size, bool aligned, bool is_oop, address nooverlap_target, -+ address* entry, const char* name, -+ bool dest_uninitialized = false) { -+ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; -+ RegSet saved_regs = RegSet::of(s, d, count); -+ StubCodeMark mark(this, "StubRoutines", name); -+ address start = __ pc(); -+ __ enter(); ++ // Do the call ++ __ mv(c_rarg0, xthread); ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(call_ptr), offset); ++ __ jalr(x1, t0, offset); ++ __ bind(retaddr); + -+ if (entry != NULL) { -+ *entry = __ pc(); -+ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) -+ BLOCK_COMMENT("Entry:"); -+ } ++ // Set an oopmap for the call site. This oopmap will map all ++ // oop-registers and debug-info registers as callee-saved. This ++ // will allow deoptimization at this safepoint to find all possible ++ // debug-info recordings, as well as let GC find all oops. + -+ // use fwd copy when (d-s) above_equal (count*size) -+ __ sub(t0, d, s); -+ __ slli(t1, count, exact_log2(size)); -+ __ bgeu(t0, t1, nooverlap_target); ++ oop_maps->add_gc_map( __ pc() - start, map); + -+ DecoratorSet decorators = IN_HEAP | IS_ARRAY; -+ if (dest_uninitialized) { -+ decorators |= IS_DEST_UNINITIALIZED; -+ } -+ if (aligned) { -+ decorators |= ARRAYCOPY_ALIGNED; -+ } ++ Label noException; + -+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs); ++ __ reset_last_Java_frame(false); + -+ if (is_oop) { -+ // save regs before copy_memory -+ __ push_reg(RegSet::of(d, count), sp); -+ } ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + -+ copy_memory(aligned, s, d, count, t0, -size); -+ if (is_oop) { -+ __ pop_reg(RegSet::of(d, count), sp); -+ if (VerifyOops) { -+ verify_oop_array(size, d, count, t2); -+ } -+ } -+ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, saved_regs); -+ __ leave(); -+ __ mv(x10, zr); // return 0 -+ __ ret(); -+ return start; -+ } ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, noException); + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // -+ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, -+ // we let the hardware handle it. The one to eight bytes within words, -+ // dwords or qwords that span cache line boundaries will still be loaded -+ // and stored atomically. -+ // -+ // Side Effects: -+ // disjoint_byte_copy_entry is set to the no-overlap entry point // -+ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, -+ // we let the hardware handle it. The one to eight bytes within words, -+ // dwords or qwords that span cache line boundaries will still be loaded -+ // and stored atomically. -+ // -+ // Side Effects: -+ // disjoint_byte_copy_entry is set to the no-overlap entry point -+ // used by generate_conjoint_byte_copy(). -+ // -+ address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) { -+ const bool not_oop = false; -+ return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name); -+ } ++ // Exception pending + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // -+ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, -+ // we let the hardware handle it. The one to eight bytes within words, -+ // dwords or qwords that span cache line boundaries will still be loaded -+ // and stored atomically. -+ // -+ address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, -+ address* entry, const char* name) { -+ const bool not_oop = false; -+ return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name); -+ } ++ reg_saver.restore_live_registers(masm); + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // -+ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we -+ // let the hardware handle it. The two or four words within dwords -+ // or qwords that span cache line boundaries will still be loaded -+ // and stored atomically. -+ // -+ // Side Effects: -+ // disjoint_short_copy_entry is set to the no-overlap entry point -+ // used by generate_conjoint_short_copy(). -+ // -+ address generate_disjoint_short_copy(bool aligned, -+ address* entry, const char* name) { -+ const bool not_oop = false; -+ return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name); -+ } ++ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // -+ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we -+ // let the hardware handle it. The two or four words within dwords -+ // or qwords that span cache line boundaries will still be loaded -+ // and stored atomically. -+ // -+ address generate_conjoint_short_copy(bool aligned, address nooverlap_target, -+ address* entry, const char* name) { -+ const bool not_oop = false; -+ return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name); -+ } ++ // No exception case ++ __ bind(noException); + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // -+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let -+ // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomically. -+ // -+ // Side Effects: -+ // disjoint_int_copy_entry is set to the no-overlap entry point -+ // used by generate_conjoint_int_oop_copy(). -+ // -+ address generate_disjoint_int_copy(bool aligned, address* entry, -+ const char* name, bool dest_uninitialized = false) { -+ const bool not_oop = false; -+ return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name); -+ } ++ Label no_adjust, bail; ++ if (!cause_return) { ++ // If our stashed return pc was modified by the runtime we avoid touching it ++ __ ld(t0, Address(fp, frame::return_addr_offset * wordSize)); ++ __ bne(x18, t0, no_adjust); + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // -+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let -+ // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomically. -+ // -+ address generate_conjoint_int_copy(bool aligned, address nooverlap_target, -+ address* entry, const char* name, -+ bool dest_uninitialized = false) { -+ const bool not_oop = false; -+ return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name); ++#ifdef ASSERT ++ // Verify the correct encoding of the poll we're about to skip. ++ // See NativeInstruction::is_lwu_to_zr() ++ __ lwu(t0, Address(x18)); ++ __ andi(t1, t0, 0b0000011); ++ __ mv(t2, 0b0000011); ++ __ bne(t1, t2, bail); // 0-6:0b0000011 ++ __ srli(t1, t0, 7); ++ __ andi(t1, t1, 0b00000); ++ __ bnez(t1, bail); // 7-11:0b00000 ++ __ srli(t1, t0, 12); ++ __ andi(t1, t1, 0b110); ++ __ mv(t2, 0b110); ++ __ bne(t1, t2, bail); // 12-14:0b110 ++#endif ++ // Adjust return pc forward to step over the safepoint poll instruction ++ __ add(x18, x18, NativeInstruction::instruction_size); ++ __ sd(x18, Address(fp, frame::return_addr_offset * wordSize)); + } + ++ __ bind(no_adjust); ++ // Normal exit, restore registers and exit. + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as size_t, can be zero -+ // -+ // Side Effects: -+ // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the -+ // no-overlap entry point used by generate_conjoint_long_oop_copy(). -+ // -+ address generate_disjoint_long_copy(bool aligned, address* entry, -+ const char* name, bool dest_uninitialized = false) { -+ const bool not_oop = false; -+ return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name); -+ } ++ reg_saver.restore_live_registers(masm); ++ __ ret(); + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as size_t, can be zero -+ // -+ address generate_conjoint_long_copy(bool aligned, -+ address nooverlap_target, address* entry, -+ const char* name, bool dest_uninitialized = false) { -+ const bool not_oop = false; -+ return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name); -+ } ++#ifdef ASSERT ++ __ bind(bail); ++ __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); ++#endif + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as size_t, can be zero -+ // -+ // Side Effects: -+ // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the -+ // no-overlap entry point used by generate_conjoint_long_oop_copy(). -+ // -+ address generate_disjoint_oop_copy(bool aligned, address* entry, -+ const char* name, bool dest_uninitialized) { -+ const bool is_oop = true; -+ const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); -+ return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized); -+ } ++ // Make sure all code is generated ++ masm->flush(); + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as size_t, can be zero -+ // -+ address generate_conjoint_oop_copy(bool aligned, -+ address nooverlap_target, address* entry, -+ const char* name, bool dest_uninitialized) { -+ const bool is_oop = true; -+ const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); -+ return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry, -+ name, dest_uninitialized); -+ } -+ -+ // Helper for generating a dynamic type check. -+ // Smashes t0, t1. -+ void generate_type_check(Register sub_klass, -+ Register super_check_offset, -+ Register super_klass, -+ Label& L_success) { -+ assert_different_registers(sub_klass, super_check_offset, super_klass); -+ -+ BLOCK_COMMENT("type_check:"); -+ -+ Label L_miss; -+ -+ __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset); -+ __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL); -+ -+ // Fall through on failure! -+ __ BIND(L_miss); -+ } -+ -+ // -+ // Generate checkcasting array copy stub -+ // -+ // Input: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // c_rarg3 - size_t ckoff (super_check_offset) -+ // c_rarg4 - oop ckval (super_klass) -+ // -+ // Output: -+ // x10 == 0 - success -+ // x10 == -1^K - failure, where K is partial transfer count -+ // -+ address generate_checkcast_copy(const char* name, address* entry, -+ bool dest_uninitialized = false) { -+ Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop; -+ -+ // Input registers (after setup_arg_regs) -+ const Register from = c_rarg0; // source array address -+ const Register to = c_rarg1; // destination array address -+ const Register count = c_rarg2; // elementscount -+ const Register ckoff = c_rarg3; // super_check_offset -+ const Register ckval = c_rarg4; // super_klass ++ // Fill-out other meta info ++ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); ++} + -+ RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4); -+ RegSet wb_post_saved_regs = RegSet::of(count); ++// ++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss ++// ++// Generate a stub that calls into vm to find out the proper destination ++// of a java call. All the argument registers are live at this point ++// but since this is generic code we don't know what they are and the caller ++// must do any gc of the args. ++// ++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + -+ // Registers used as temps (x7, x9, x18 are save-on-entry) -+ const Register count_save = x19; // orig elementscount -+ const Register start_to = x18; // destination array start address -+ const Register copied_oop = x7; // actual oop copied -+ const Register r9_klass = x9; // oop._klass ++ // allocate space for the code ++ ResourceMark rm; + -+ //--------------------------------------------------------------- -+ // Assembler stub will be used for this call to arraycopy -+ // if the two arrays are subtypes of Object[] but the -+ // destination array type is not equal to or a supertype -+ // of the source type. Each element must be separately -+ // checked. ++ CodeBuffer buffer(name, 1000, 512); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ assert_cond(masm != NULL); + -+ assert_different_registers(from, to, count, ckoff, ckval, start_to, -+ copied_oop, r9_klass, count_save); ++ int frame_size_in_words = -1; ++ RegisterSaver reg_saver(false /* save_vectors */); + -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", name); -+ address start = __ pc(); ++ OopMapSet *oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ OopMap* map = NULL; + -+ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ int start = __ offset(); + -+ // Caller of this entry point must set up the argument registers -+ if (entry != NULL) { -+ *entry = __ pc(); -+ BLOCK_COMMENT("Entry:"); -+ } ++ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + -+ // Empty array: Nothing to do -+ __ beqz(count, L_done); ++ int frame_complete = __ offset(); + -+ __ push_reg(RegSet::of(x7, x9, x18, x19), sp); ++ { ++ Label retaddr; ++ __ set_last_Java_frame(sp, noreg, retaddr, t0); + -+#ifdef ASSERT -+ BLOCK_COMMENT("assert consistent ckoff/ckval"); -+ // The ckoff and ckval must be mutually consistent, -+ // even though caller generates both. -+ { Label L; -+ int sco_offset = in_bytes(Klass::super_check_offset_offset()); -+ __ lwu(start_to, Address(ckval, sco_offset)); -+ __ beq(ckoff, start_to, L); -+ __ stop("super_check_offset inconsistent"); -+ __ bind(L); -+ } -+#endif //ASSERT ++ __ mv(c_rarg0, xthread); ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(destination), offset); ++ __ jalr(x1, t0, offset); ++ __ bind(retaddr); ++ } + -+ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT; -+ bool is_oop = true; -+ if (dest_uninitialized) { -+ decorators |= IS_DEST_UNINITIALIZED; -+ } ++ // Set an oopmap for the call site. ++ // We need this not only for callee-saved registers, but also for volatile ++ // registers that the compiler might be keeping live across a safepoint. + -+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs); ++ oop_maps->add_gc_map( __ offset() - start, map); + -+ // save the original count -+ __ mv(count_save, count); ++ // x10 contains the address we are going to jump to assuming no exception got installed + -+ // Copy from low to high addresses -+ __ mv(start_to, to); // Save destination array start address -+ __ j(L_load_element); ++ // clear last_Java_sp ++ __ reset_last_Java_frame(false); ++ // check for pending exceptions ++ Label pending; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ bnez(t0, pending); + -+ // ======== begin loop ======== -+ // (Loop is rotated; its entry is L_load_element.) -+ // Loop control: -+ // for count to 0 do -+ // copied_oop = load_heap_oop(from++) -+ // ... generate_type_check ... -+ // store_heap_oop(to++, copied_oop) -+ // end ++ // get the returned Method* ++ __ get_vm_result_2(xmethod, xthread); ++ __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod))); + -+ __ align(OptoLoopAlignment); ++ // x10 is where we want to jump, overwrite t0 which is saved and temporary ++ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0))); ++ reg_saver.restore_live_registers(masm); + -+ __ BIND(L_store_element); -+ __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, noreg, AS_RAW); // store the oop -+ __ add(to, to, UseCompressedOops ? 4 : 8); -+ __ sub(count, count, 1); -+ __ beqz(count, L_do_card_marks); ++ // We are back the the original state on entry and ready to go. + -+ // ======== loop entry is here ======== -+ __ BIND(L_load_element); -+ __ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop -+ __ add(from, from, UseCompressedOops ? 4 : 8); -+ __ beqz(copied_oop, L_store_element); ++ __ jr(t0); + -+ __ load_klass(r9_klass, copied_oop);// query the object klass -+ generate_type_check(r9_klass, ckoff, ckval, L_store_element); -+ // ======== end loop ======== ++ // Pending exception after the safepoint + -+ // It was a real error; we must depend on the caller to finish the job. -+ // Register count = remaining oops, count_orig = total oops. -+ // Emit GC store barriers for the oops we have copied and report -+ // their number to the caller. ++ __ bind(pending); + -+ __ sub(count, count_save, count); // K = partially copied oop count -+ __ xori(count, count, -1); // report (-1^K) to caller -+ __ beqz(count, L_done_pop); ++ reg_saver.restore_live_registers(masm); + -+ __ BIND(L_do_card_marks); -+ bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs); ++ // exception pending => remove activation and forward to exception handler + -+ __ bind(L_done_pop); -+ __ pop_reg(RegSet::of(x7, x9, x18, x19), sp); -+ inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); ++ __ sd(zr, Address(xthread, JavaThread::vm_result_offset())); + -+ __ bind(L_done); -+ __ mv(x10, count); -+ __ leave(); -+ __ ret(); ++ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); ++ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + -+ return start; -+ } ++ // ------------- ++ // make sure all code is generated ++ masm->flush(); + -+ // Perform range checks on the proposed arraycopy. -+ // Kills temp, but nothing else. -+ // Also, clean the sign bits of src_pos and dst_pos. -+ void arraycopy_range_checks(Register src, // source array oop (c_rarg0) -+ Register src_pos, // source position (c_rarg1) -+ Register dst, // destination array oo (c_rarg2) -+ Register dst_pos, // destination position (c_rarg3) -+ Register length, -+ Register temp, -+ Label& L_failed) { -+ BLOCK_COMMENT("arraycopy_range_checks:"); ++ // return the blob ++ return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); ++} + -+ assert_different_registers(t0, temp); ++#ifdef COMPILER2 ++RuntimeStub* SharedRuntime::make_native_invoker(address call_target, ++ int shadow_space_bytes, ++ const GrowableArray& input_registers, ++ const GrowableArray& output_registers) { ++ Unimplemented(); ++ return nullptr; ++} + -+ // if [src_pos + length > arrayOop(src)->length()] then FAIL -+ __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes())); -+ __ addw(temp, length, src_pos); -+ __ bgtu(temp, t0, L_failed); ++//------------------------------generate_exception_blob--------------------------- ++// creates exception blob at the end ++// Using exception blob, this code is jumped from a compiled method. ++// (see emit_exception_handler in riscv.ad file) ++// ++// Given an exception pc at a call we call into the runtime for the ++// handler in this method. This handler might merely restore state ++// (i.e. callee save registers) unwind the frame and jump to the ++// exception handler for the nmethod if there is no Java level handler ++// for the nmethod. ++// ++// This code is entered with a jmp. ++// ++// Arguments: ++// x10: exception oop ++// x13: exception pc ++// ++// Results: ++// x10: exception oop ++// x13: exception pc in caller ++// destination: exception handler of caller ++// ++// Note: the exception pc MUST be at a call (precise debug information) ++// Registers x10, x13, x12, x14, x15, t0 are not callee saved. ++// + -+ // if [dst_pos + length > arrayOop(dst)->length()] then FAIL -+ __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes())); -+ __ addw(temp, length, dst_pos); -+ __ bgtu(temp, t0, L_failed); ++void OptoRuntime::generate_exception_blob() { ++ assert(!OptoRuntime::is_callee_saved_register(R13_num), ""); ++ assert(!OptoRuntime::is_callee_saved_register(R10_num), ""); ++ assert(!OptoRuntime::is_callee_saved_register(R12_num), ""); + -+ // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'. -+ __ zero_extend(src_pos, src_pos, 32); -+ __ zero_extend(dst_pos, dst_pos, 32); ++ assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); + -+ BLOCK_COMMENT("arraycopy_range_checks done"); -+ } ++ // Allocate space for the code ++ ResourceMark rm; ++ // Setup code generation tools ++ CodeBuffer buffer("exception_blob", 2048, 1024); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ assert_cond(masm != NULL); + ++ // TODO check various assumptions made here + // -+ // Generate 'unsafe' array copy stub -+ // Though just as safe as the other stubs, it takes an unscaled -+ // size_t argument instead of an element count. -+ // -+ // Input: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - byte count, treated as ssize_t, can be zero -+ // -+ // Examines the alignment of the operands and dispatches -+ // to a long, int, short, or byte copy loop. -+ // -+ address generate_unsafe_copy(const char* name, -+ address byte_copy_entry, -+ address short_copy_entry, -+ address int_copy_entry, -+ address long_copy_entry) { -+ assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL && -+ int_copy_entry != NULL && long_copy_entry != NULL); -+ Label L_long_aligned, L_int_aligned, L_short_aligned; -+ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; -+ -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", name); -+ address start = __ pc(); -+ __ enter(); // required for proper stackwalking of RuntimeStub frame -+ -+ // bump this on entry, not on exit: -+ inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); -+ -+ __ orr(t0, s, d); -+ __ orr(t0, t0, count); -+ -+ __ andi(t0, t0, BytesPerLong - 1); -+ __ beqz(t0, L_long_aligned); -+ __ andi(t0, t0, BytesPerInt - 1); -+ __ beqz(t0, L_int_aligned); -+ __ andi(t0, t0, 1); -+ __ beqz(t0, L_short_aligned); -+ __ j(RuntimeAddress(byte_copy_entry)); ++ // make sure we do so before running this + -+ __ BIND(L_short_aligned); -+ __ srli(count, count, LogBytesPerShort); // size => short_count -+ __ j(RuntimeAddress(short_copy_entry)); -+ __ BIND(L_int_aligned); -+ __ srli(count, count, LogBytesPerInt); // size => int_count -+ __ j(RuntimeAddress(int_copy_entry)); -+ __ BIND(L_long_aligned); -+ __ srli(count, count, LogBytesPerLong); // size => long_count -+ __ j(RuntimeAddress(long_copy_entry)); ++ address start = __ pc(); + -+ return start; -+ } ++ // push fp and retaddr by hand ++ // Exception pc is 'return address' for stack walker ++ __ addi(sp, sp, -2 * wordSize); ++ __ sd(ra, Address(sp, wordSize)); ++ __ sd(fp, Address(sp)); ++ // there are no callee save registers and we don't expect an ++ // arg reg save area ++#ifndef PRODUCT ++ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#endif ++ // Store exception in Thread object. We cannot pass any arguments to the ++ // handle_exception call, since we do not want to make any assumption ++ // about the size of the frame where the exception happened in. ++ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); + ++ // This call does all the hard work. It checks if an exception handler ++ // exists in the method. ++ // If so, it returns the handler address. ++ // If not, it prepares for stack-unwinding, restoring the callee-save ++ // registers of the frame being removed. + // -+ // Generate generic array copy stubs -+ // -+ // Input: -+ // c_rarg0 - src oop -+ // c_rarg1 - src_pos (32-bits) -+ // c_rarg2 - dst oop -+ // c_rarg3 - dst_pos (32-bits) -+ // c_rarg4 - element count (32-bits) -+ // -+ // Output: -+ // x10 == 0 - success -+ // x10 == -1^K - failure, where K is partial transfer count ++ // address OptoRuntime::handle_exception_C(JavaThread* thread) + // -+ address generate_generic_copy(const char* name, -+ address byte_copy_entry, address short_copy_entry, -+ address int_copy_entry, address oop_copy_entry, -+ address long_copy_entry, address checkcast_copy_entry) { -+ assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL && -+ int_copy_entry != NULL && oop_copy_entry != NULL && -+ long_copy_entry != NULL && checkcast_copy_entry != NULL); -+ Label L_failed, L_failed_0, L_objArray; -+ Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; -+ -+ // Input registers -+ const Register src = c_rarg0; // source array oop -+ const Register src_pos = c_rarg1; // source position -+ const Register dst = c_rarg2; // destination array oop -+ const Register dst_pos = c_rarg3; // destination position -+ const Register length = c_rarg4; -+ -+ __ align(CodeEntryAlignment); -+ -+ StubCodeMark mark(this, "StubRoutines", name); ++ // n.b. 1 gp arg, 0 fp args, integral return type + -+ // Registers used as temps -+ const Register dst_klass = c_rarg5; ++ // the stack should always be aligned ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(sp, noreg, the_pc, t0); ++ __ mv(c_rarg0, xthread); ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset); ++ __ jalr(x1, t0, offset); + -+ address start = __ pc(); + -+ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ // handle_exception_C is a special VM call which does not require an explicit ++ // instruction sync afterwards. + -+ // bump this on entry, not on exit: -+ inc_counter_np(SharedRuntime::_generic_array_copy_ctr); ++ // Set an oopmap for the call site. This oopmap will only be used if we ++ // are unwinding the stack. Hence, all locations will be dead. ++ // Callee-saved registers will be the same as the frame above (i.e., ++ // handle_exception_stub), since they were restored when we got the ++ // exception. + -+ //----------------------------------------------------------------------- -+ // Assembler stub will be used for this call to arraycopy -+ // if the following conditions are met: -+ // -+ // (1) src and dst must not be null. -+ // (2) src_pos must not be negative. -+ // (3) dst_pos must not be negative. -+ // (4) length must not be negative. -+ // (5) src klass and dst klass should be the same and not NULL. -+ // (6) src and dst should be arrays. -+ // (7) src_pos + length must not exceed length of src. -+ // (8) dst_pos + length must not exceed length of dst. -+ // ++ OopMapSet* oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); + -+ // if [src == NULL] then return -1 -+ __ beqz(src, L_failed); ++ oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); + -+ // if [src_pos < 0] then return -1 -+ // i.e. sign bit set -+ __ andi(t0, src_pos, 1UL << 31); -+ __ bnez(t0, L_failed); ++ __ reset_last_Java_frame(false); + -+ // if [dst == NULL] then return -1 -+ __ beqz(dst, L_failed); ++ // Restore callee-saved registers + -+ // if [dst_pos < 0] then return -1 -+ // i.e. sign bit set -+ __ andi(t0, dst_pos, 1UL << 31); -+ __ bnez(t0, L_failed); ++ // fp is an implicitly saved callee saved register (i.e. the calling ++ // convention will save restore it in prolog/epilog) Other than that ++ // there are no callee save registers now that adapter frames are gone. ++ // and we dont' expect an arg reg save area ++ __ ld(fp, Address(sp)); ++ __ ld(x13, Address(sp, wordSize)); ++ __ addi(sp, sp , 2 * wordSize); + -+ // registers used as temp -+ const Register scratch_length = x28; // elements count to copy -+ const Register scratch_src_klass = x29; // array klass -+ const Register lh = x30; // layout helper ++ // x10: exception handler + -+ // if [length < 0] then return -1 -+ __ addw(scratch_length, length, zr); // length (elements count, 32-bits value) -+ // i.e. sign bit set -+ __ andi(t0, scratch_length, 1UL << 31); -+ __ bnez(t0, L_failed); ++ // We have a handler in x10 (could be deopt blob). ++ __ mv(t0, x10); + -+ __ load_klass(scratch_src_klass, src); ++ // Get the exception oop ++ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ // Get the exception pc in case we are deoptimized ++ __ ld(x14, Address(xthread, JavaThread::exception_pc_offset())); +#ifdef ASSERT -+ { -+ BLOCK_COMMENT("assert klasses not null {"); -+ Label L1, L2; -+ __ bnez(scratch_src_klass, L2); // it is broken if klass is NULL -+ __ bind(L1); -+ __ stop("broken null klass"); -+ __ bind(L2); -+ __ load_klass(t0, dst); -+ __ beqz(t0, L1); // this would be broken also -+ BLOCK_COMMENT("} assert klasses not null done"); -+ } ++ __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset())); ++ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); +#endif ++ // Clear the exception oop so GC no longer processes it as a root. ++ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset())); + -+ // Load layout helper (32-bits) -+ // -+ // |array_tag| | header_size | element_type | |log2_element_size| -+ // 32 30 24 16 8 2 0 -+ // -+ // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 -+ // -+ -+ const int lh_offset = in_bytes(Klass::layout_helper_offset()); -+ -+ // Handle objArrays completely differently... -+ const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); -+ __ lw(lh, Address(scratch_src_klass, lh_offset)); -+ __ mvw(t0, objArray_lh); -+ __ beq(lh, t0, L_objArray); ++ // x10: exception oop ++ // t0: exception handler ++ // x14: exception pc ++ // Jump to handler + -+ // if [src->klass() != dst->klass()] then return -1 -+ __ load_klass(t1, dst); -+ __ bne(t1, scratch_src_klass, L_failed); ++ __ jr(t0); + -+ // if [src->is_Array() != NULL] then return -1 -+ // i.e. (lh >= 0) -+ __ andi(t0, lh, 1UL << 31); -+ __ beqz(t0, L_failed); ++ // Make sure all code is generated ++ masm->flush(); + -+ // At this point, it is known to be a typeArray (array_tag 0x3). -+#ifdef ASSERT -+ { -+ BLOCK_COMMENT("assert primitive array {"); -+ Label L; -+ __ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift); -+ __ bge(lh, t1, L); -+ __ stop("must be a primitive array"); -+ __ bind(L); -+ BLOCK_COMMENT("} assert primitive array done"); -+ } -+#endif ++ // Set exception blob ++ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); ++} ++#endif // COMPILER2 +diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +new file mode 100644 +index 00000000000..b3fdd04db1b +--- /dev/null ++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +@@ -0,0 +1,3864 @@ ++/* ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, -+ t1, L_failed); ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "compiler/oopMap.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/universe.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/instanceOop.hpp" ++#include "oops/method.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "utilities/align.hpp" ++#include "utilities/powerOfTwo.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++#if INCLUDE_ZGC ++#include "gc/z/zThreadLocalData.hpp" ++#endif + -+ // TypeArrayKlass -+ // -+ // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize) -+ // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize) -+ // ++// Declaration and definition of StubGenerator (no .hpp file). ++// For a more detailed description of the stub routine structure ++// see the comment in stubRoutines.hpp + -+ const Register t0_offset = t0; // array offset -+ const Register x22_elsize = lh; // element size ++#undef __ ++#define __ _masm-> + -+ // Get array_header_in_bytes() -+ int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1); -+ int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width; -+ __ slli(t0_offset, lh, XLEN - lh_header_size_msb); // left shift to remove 24 ~ 32; -+ __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif + -+ __ add(src, src, t0_offset); // src array offset -+ __ add(dst, dst, t0_offset); // dst array offset -+ BLOCK_COMMENT("choose copy loop based on element size"); ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + -+ // next registers should be set before the jump to corresponding stub -+ const Register from = c_rarg0; // source array address -+ const Register to = c_rarg1; // destination array address -+ const Register count = c_rarg2; // elements count ++// Stub Code definitions + -+ // 'from', 'to', 'count' registers should be set in such order -+ // since they are the same as 'src', 'src_pos', 'dst'. ++class StubGenerator: public StubCodeGenerator { ++ private: + -+ assert(Klass::_lh_log2_element_size_shift == 0, "fix this code"); ++#ifdef PRODUCT ++#define inc_counter_np(counter) ((void)0) ++#else ++ void inc_counter_np_(int& counter) { ++ __ la(t1, ExternalAddress((address)&counter)); ++ __ lwu(t0, Address(t1, 0)); ++ __ addiw(t0, t0, 1); ++ __ sw(t0, Address(t1, 0)); ++ } ++#define inc_counter_np(counter) \ ++ BLOCK_COMMENT("inc_counter " #counter); \ ++ inc_counter_np_(counter); ++#endif + -+ // The possible values of elsize are 0-3, i.e. exact_log2(element -+ // size in bytes). We do a simple bitwise binary search. -+ __ BIND(L_copy_bytes); -+ __ andi(t0, x22_elsize, 2); -+ __ bnez(t0, L_copy_ints); -+ __ andi(t0, x22_elsize, 1); -+ __ bnez(t0, L_copy_shorts); -+ __ add(from, src, src_pos); // src_addr -+ __ add(to, dst, dst_pos); // dst_addr -+ __ addw(count, scratch_length, zr); // length -+ __ j(RuntimeAddress(byte_copy_entry)); ++ // Call stubs are used to call Java from C ++ // ++ // Arguments: ++ // c_rarg0: call wrapper address address ++ // c_rarg1: result address ++ // c_rarg2: result type BasicType ++ // c_rarg3: method Method* ++ // c_rarg4: (interpreter) entry point address ++ // c_rarg5: parameters intptr_t* ++ // c_rarg6: parameter size (in words) int ++ // c_rarg7: thread Thread* ++ // ++ // There is no return from the stub itself as any Java result ++ // is written to result ++ // ++ // we save x1 (ra) as the return PC at the base of the frame and ++ // link x8 (fp) below it as the frame pointer installing sp (x2) ++ // into fp. ++ // ++ // we save x10-x17, which accounts for all the c arguments. ++ // ++ // TODO: strictly do we need to save them all? they are treated as ++ // volatile by C so could we omit saving the ones we are going to ++ // place in global registers (thread? method?) or those we only use ++ // during setup of the Java call? ++ // ++ // we don't need to save x5 which C uses as an indirect result location ++ // return register. ++ // ++ // we don't need to save x6-x7 and x28-x31 which both C and Java treat as ++ // volatile ++ // ++ // we save x18-x27 which Java uses as temporary registers and C ++ // expects to be callee-save ++ // ++ // so the stub frame looks like this when we enter Java code ++ // ++ // [ return_from_Java ] <--- sp ++ // [ argument word n ] ++ // ... ++ // -22 [ argument word 1 ] ++ // -21 [ saved x27 ] <--- sp_after_call ++ // -20 [ saved x26 ] ++ // -19 [ saved x25 ] ++ // -18 [ saved x24 ] ++ // -17 [ saved x23 ] ++ // -16 [ saved x22 ] ++ // -15 [ saved x21 ] ++ // -14 [ saved x20 ] ++ // -13 [ saved x19 ] ++ // -12 [ saved x18 ] ++ // -11 [ saved x9 ] ++ // -10 [ call wrapper (x10) ] ++ // -9 [ result (x11) ] ++ // -8 [ result type (x12) ] ++ // -7 [ method (x13) ] ++ // -6 [ entry point (x14) ] ++ // -5 [ parameters (x15) ] ++ // -4 [ parameter size (x16) ] ++ // -3 [ thread (x17) ] ++ // -2 [ saved fp (x8) ] ++ // -1 [ saved ra (x1) ] ++ // 0 [ ] <--- fp == saved sp (x2) + -+ __ BIND(L_copy_shorts); -+ __ shadd(from, src_pos, src, t0, 1); // src_addr -+ __ shadd(to, dst_pos, dst, t0, 1); // dst_addr -+ __ addw(count, scratch_length, zr); // length -+ __ j(RuntimeAddress(short_copy_entry)); ++ // Call stub stack layout word offsets from fp ++ enum call_stub_layout { ++ sp_after_call_off = -21, + -+ __ BIND(L_copy_ints); -+ __ andi(t0, x22_elsize, 1); -+ __ bnez(t0, L_copy_longs); -+ __ shadd(from, src_pos, src, t0, 2); // src_addr -+ __ shadd(to, dst_pos, dst, t0, 2); // dst_addr -+ __ addw(count, scratch_length, zr); // length -+ __ j(RuntimeAddress(int_copy_entry)); ++ x27_off = -21, ++ x26_off = -20, ++ x25_off = -19, ++ x24_off = -18, ++ x23_off = -17, ++ x22_off = -16, ++ x21_off = -15, ++ x20_off = -14, ++ x19_off = -13, ++ x18_off = -12, ++ x9_off = -11, ++ ++ call_wrapper_off = -10, ++ result_off = -9, ++ result_type_off = -8, ++ method_off = -7, ++ entry_point_off = -6, ++ parameters_off = -5, ++ parameter_size_off = -4, ++ thread_off = -3, ++ fp_f = -2, ++ retaddr_off = -1, ++ }; ++ ++ address generate_call_stub(address& return_address) { ++ assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 && ++ (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, ++ "adjust this code"); ++ ++ StubCodeMark mark(this, "StubRoutines", "call_stub"); ++ address start = __ pc(); ++ ++ const Address sp_after_call (fp, sp_after_call_off * wordSize); ++ ++ const Address call_wrapper (fp, call_wrapper_off * wordSize); ++ const Address result (fp, result_off * wordSize); ++ const Address result_type (fp, result_type_off * wordSize); ++ const Address method (fp, method_off * wordSize); ++ const Address entry_point (fp, entry_point_off * wordSize); ++ const Address parameters (fp, parameters_off * wordSize); ++ const Address parameter_size(fp, parameter_size_off * wordSize); ++ ++ const Address thread (fp, thread_off * wordSize); ++ ++ const Address x27_save (fp, x27_off * wordSize); ++ const Address x26_save (fp, x26_off * wordSize); ++ const Address x25_save (fp, x25_off * wordSize); ++ const Address x24_save (fp, x24_off * wordSize); ++ const Address x23_save (fp, x23_off * wordSize); ++ const Address x22_save (fp, x22_off * wordSize); ++ const Address x21_save (fp, x21_off * wordSize); ++ const Address x20_save (fp, x20_off * wordSize); ++ const Address x19_save (fp, x19_off * wordSize); ++ const Address x18_save (fp, x18_off * wordSize); ++ ++ const Address x9_save (fp, x9_off * wordSize); ++ ++ // stub code ++ ++ address riscv_entry = __ pc(); ++ ++ // set up frame and move sp to end of save area ++ __ enter(); ++ __ addi(sp, fp, sp_after_call_off * wordSize); ++ ++ // save register parameters and Java temporary/global registers ++ // n.b. we save thread even though it gets installed in ++ // xthread because we want to sanity check tp later ++ __ sd(c_rarg7, thread); ++ __ sw(c_rarg6, parameter_size); ++ __ sd(c_rarg5, parameters); ++ __ sd(c_rarg4, entry_point); ++ __ sd(c_rarg3, method); ++ __ sd(c_rarg2, result_type); ++ __ sd(c_rarg1, result); ++ __ sd(c_rarg0, call_wrapper); ++ ++ __ sd(x9, x9_save); ++ ++ __ sd(x18, x18_save); ++ __ sd(x19, x19_save); ++ __ sd(x20, x20_save); ++ __ sd(x21, x21_save); ++ __ sd(x22, x22_save); ++ __ sd(x23, x23_save); ++ __ sd(x24, x24_save); ++ __ sd(x25, x25_save); ++ __ sd(x26, x26_save); ++ __ sd(x27, x27_save); ++ ++ // install Java thread in global register now we have saved ++ // whatever value it held ++ __ mv(xthread, c_rarg7); ++ ++ // And method ++ __ mv(xmethod, c_rarg3); ++ ++ // set up the heapbase register ++ __ reinit_heapbase(); + -+ __ BIND(L_copy_longs); +#ifdef ASSERT ++ // make sure we have no pending exceptions + { -+ BLOCK_COMMENT("assert long copy {"); + Label L; -+ __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize -+ __ addw(lh, lh, zr); -+ __ mvw(t0, LogBytesPerLong); -+ __ beq(x22_elsize, t0, L); -+ __ stop("must be long copy, but elsize is wrong"); -+ __ bind(L); -+ BLOCK_COMMENT("} assert long copy done"); ++ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ beqz(t0, L); ++ __ stop("StubRoutines::call_stub: entered with pending exception"); ++ __ BIND(L); + } +#endif -+ __ shadd(from, src_pos, src, t0, 3); // src_addr -+ __ shadd(to, dst_pos, dst, t0, 3); // dst_addr -+ __ addw(count, scratch_length, zr); // length -+ __ j(RuntimeAddress(long_copy_entry)); -+ -+ // ObjArrayKlass -+ __ BIND(L_objArray); -+ // live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos] ++ // pass parameters if any ++ __ mv(esp, sp); ++ __ slli(t0, c_rarg6, LogBytesPerWord); ++ __ sub(t0, sp, t0); // Move SP out of the way ++ __ andi(sp, t0, -2 * wordSize); + -+ Label L_plain_copy, L_checkcast_copy; -+ // test array classes for subtyping -+ __ load_klass(t2, dst); -+ __ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality ++ BLOCK_COMMENT("pass parameters if any"); ++ Label parameters_done; ++ // parameter count is still in c_rarg6 ++ // and parameter pointer identifying param 1 is in c_rarg5 ++ __ beqz(c_rarg6, parameters_done); + -+ // Identically typed arrays can be copied without element-wise checks. -+ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, -+ t1, L_failed); ++ address loop = __ pc(); ++ __ ld(t0, c_rarg5, 0); ++ __ addi(c_rarg5, c_rarg5, wordSize); ++ __ addi(c_rarg6, c_rarg6, -1); ++ __ push_reg(t0); ++ __ bgtz(c_rarg6, loop); + -+ __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop); -+ __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); -+ __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop); -+ __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); -+ __ addw(count, scratch_length, zr); // length -+ __ BIND(L_plain_copy); -+ __ j(RuntimeAddress(oop_copy_entry)); ++ __ BIND(parameters_done); + -+ __ BIND(L_checkcast_copy); -+ // live at this point: scratch_src_klass, scratch_length, t2 (dst_klass) -+ { -+ // Before looking at dst.length, make sure dst is also an objArray. -+ __ lwu(t0, Address(t2, lh_offset)); -+ __ mvw(t1, objArray_lh); -+ __ bne(t0, t1, L_failed); ++ // call Java entry -- passing methdoOop, and current sp ++ // xmethod: Method* ++ // x30: sender sp ++ BLOCK_COMMENT("call Java function"); ++ __ mv(x30, sp); ++ __ jalr(c_rarg4); + -+ // It is safe to examine both src.length and dst.length. -+ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, -+ t2, L_failed); ++ // save current address for use by exception handling code + -+ __ load_klass(dst_klass, dst); // reload ++ return_address = __ pc(); + -+ // Marshal the base address arguments now, freeing registers. -+ __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop); -+ __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); -+ __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop); -+ __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); -+ __ addw(count, length, zr); // length (reloaded) -+ const Register sco_temp = c_rarg3; // this register is free now -+ assert_different_registers(from, to, count, sco_temp, -+ dst_klass, scratch_src_klass); ++ // store result depending on type (everything that is not ++ // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) ++ // n.b. this assumes Java returns an integral result in x10 ++ // and a floating result in j_farg0 ++ __ ld(j_rarg2, result); ++ Label is_long, is_float, is_double, exit; ++ __ ld(j_rarg1, result_type); ++ __ li(t0, (u1)T_OBJECT); ++ __ beq(j_rarg1, t0, is_long); ++ __ li(t0, (u1)T_LONG); ++ __ beq(j_rarg1, t0, is_long); ++ __ li(t0, (u1)T_FLOAT); ++ __ beq(j_rarg1, t0, is_float); ++ __ li(t0, (u1)T_DOUBLE); ++ __ beq(j_rarg1, t0, is_double); + -+ // Generate the type check. -+ const int sco_offset = in_bytes(Klass::super_check_offset_offset()); -+ __ lwu(sco_temp, Address(dst_klass, sco_offset)); ++ // handle T_INT case ++ __ sw(x10, Address(j_rarg2)); + -+ // Smashes t0, t1 -+ generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy); ++ __ BIND(exit); + -+ // Fetch destination element klass from the ObjArrayKlass header. -+ int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); -+ __ ld(dst_klass, Address(dst_klass, ek_offset)); -+ __ lwu(sco_temp, Address(dst_klass, sco_offset)); ++ // pop parameters ++ __ addi(esp, fp, sp_after_call_off * wordSize); + -+ // the checkcast_copy loop needs two extra arguments: -+ assert(c_rarg3 == sco_temp, "#3 already in place"); -+ // Set up arguments for checkcast_copy_entry. -+ __ mv(c_rarg4, dst_klass); // dst.klass.element_klass -+ __ j(RuntimeAddress(checkcast_copy_entry)); ++#ifdef ASSERT ++ // verify that threads correspond ++ { ++ Label L, S; ++ __ ld(t0, thread); ++ __ bne(xthread, t0, S); ++ __ get_thread(t0); ++ __ beq(xthread, t0, L); ++ __ BIND(S); ++ __ stop("StubRoutines::call_stub: threads must correspond"); ++ __ BIND(L); + } ++#endif + -+ __ BIND(L_failed); -+ __ mv(x10, -1); -+ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ // restore callee-save registers ++ __ ld(x27, x27_save); ++ __ ld(x26, x26_save); ++ __ ld(x25, x25_save); ++ __ ld(x24, x24_save); ++ __ ld(x23, x23_save); ++ __ ld(x22, x22_save); ++ __ ld(x21, x21_save); ++ __ ld(x20, x20_save); ++ __ ld(x19, x19_save); ++ __ ld(x18, x18_save); ++ ++ __ ld(x9, x9_save); ++ ++ __ ld(c_rarg0, call_wrapper); ++ __ ld(c_rarg1, result); ++ __ ld(c_rarg2, result_type); ++ __ ld(c_rarg3, method); ++ __ ld(c_rarg4, entry_point); ++ __ ld(c_rarg5, parameters); ++ __ ld(c_rarg6, parameter_size); ++ __ ld(c_rarg7, thread); ++ ++ // leave frame and return to caller ++ __ leave(); + __ ret(); + ++ // handle return types different from T_INT ++ ++ __ BIND(is_long); ++ __ sd(x10, Address(j_rarg2, 0)); ++ __ j(exit); ++ ++ __ BIND(is_float); ++ __ fsw(j_farg0, Address(j_rarg2, 0), t0); ++ __ j(exit); ++ ++ __ BIND(is_double); ++ __ fsd(j_farg0, Address(j_rarg2, 0), t0); ++ __ j(exit); ++ + return start; + } + ++ // Return point for a Java call if there's an exception thrown in ++ // Java code. The exception is caught and transformed into a ++ // pending exception stored in JavaThread that can be tested from ++ // within the VM. + // -+ // Generate stub for array fill. If "aligned" is true, the -+ // "to" address is assumed to be heapword aligned. -+ // -+ // Arguments for generated stub: -+ // to: c_rarg0 -+ // value: c_rarg1 -+ // count: c_rarg2 treated as signed ++ // Note: Usually the parameters are removed by the callee. In case ++ // of an exception crossing an activation frame boundary, that is ++ // not the case if the callee is compiled code => need to setup the ++ // sp. + // -+ address generate_fill(BasicType t, bool aligned, const char* name) { -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", name); ++ // x10: exception oop ++ ++ address generate_catch_exception() { ++ StubCodeMark mark(this, "StubRoutines", "catch_exception"); + address start = __ pc(); + -+ BLOCK_COMMENT("Entry:"); ++ // same as in generate_call_stub(): ++ const Address thread(fp, thread_off * wordSize); + -+ const Register to = c_rarg0; // source array address -+ const Register value = c_rarg1; // value -+ const Register count = c_rarg2; // elements count ++#ifdef ASSERT ++ // verify that threads correspond ++ { ++ Label L, S; ++ __ ld(t0, thread); ++ __ bne(xthread, t0, S); ++ __ get_thread(t0); ++ __ beq(xthread, t0, L); ++ __ bind(S); ++ __ stop("StubRoutines::catch_exception: threads must correspond"); ++ __ bind(L); ++ } ++#endif + -+ const Register bz_base = x28; // base for block_zero routine -+ const Register cnt_words = x29; // temp register -+ const Register tmp_reg = t1; ++ // set pending exception ++ __ verify_oop(x10); + -+ __ enter(); ++ __ sd(x10, Address(xthread, Thread::pending_exception_offset())); ++ __ mv(t0, (address)__FILE__); ++ __ sd(t0, Address(xthread, Thread::exception_file_offset())); ++ __ mv(t0, (int)__LINE__); ++ __ sw(t0, Address(xthread, Thread::exception_line_offset())); + -+ Label L_fill_elements, L_exit1; ++ // complete return to VM ++ assert(StubRoutines::_call_stub_return_address != NULL, ++ "_call_stub_return_address must have been generated before"); ++ __ j(StubRoutines::_call_stub_return_address); + -+ int shift = -1; -+ switch (t) { -+ case T_BYTE: -+ shift = 0; ++ return start; ++ } + -+ // Zero extend value -+ // 8 bit -> 16 bit -+ __ andi(value, value, 0xff); -+ __ mv(tmp_reg, value); -+ __ slli(tmp_reg, tmp_reg, 8); -+ __ orr(value, value, tmp_reg); -+ -+ // 16 bit -> 32 bit -+ __ mv(tmp_reg, value); -+ __ slli(tmp_reg, tmp_reg, 16); -+ __ orr(value, value, tmp_reg); ++ // Continuation point for runtime calls returning with a pending ++ // exception. The pending exception check happened in the runtime ++ // or native call stub. The pending exception in Thread is ++ // converted into a Java-level exception. ++ // ++ // Contract with Java-level exception handlers: ++ // x10: exception ++ // x13: throwing pc ++ // ++ // NOTE: At entry of this stub, exception-pc must be in RA !! + -+ __ mv(tmp_reg, 8 >> shift); // Short arrays (< 8 bytes) fill by element -+ __ bltu(count, tmp_reg, L_fill_elements); -+ break; -+ case T_SHORT: -+ shift = 1; -+ // Zero extend value -+ // 16 bit -> 32 bit -+ __ andi(value, value, 0xffff); -+ __ mv(tmp_reg, value); -+ __ slli(tmp_reg, tmp_reg, 16); -+ __ orr(value, value, tmp_reg); ++ // NOTE: this is always used as a jump target within generated code ++ // so it just needs to be generated code with no x86 prolog + -+ // Short arrays (< 8 bytes) fill by element -+ __ mv(tmp_reg, 8 >> shift); -+ __ bltu(count, tmp_reg, L_fill_elements); -+ break; -+ case T_INT: -+ shift = 2; ++ address generate_forward_exception() { ++ StubCodeMark mark(this, "StubRoutines", "forward exception"); ++ address start = __ pc(); + -+ // Short arrays (< 8 bytes) fill by element -+ __ mv(tmp_reg, 8 >> shift); -+ __ bltu(count, tmp_reg, L_fill_elements); -+ break; -+ default: ShouldNotReachHere(); -+ } ++ // Upon entry, RA points to the return address returning into ++ // Java (interpreted or compiled) code; i.e., the return address ++ // becomes the throwing pc. ++ // ++ // Arguments pushed before the runtime call are still on the stack ++ // but the exception handler will reset the stack pointer -> ++ // ignore them. A potential result in registers can be ignored as ++ // well. + -+ // Align source address at 8 bytes address boundary. -+ Label L_skip_align1, L_skip_align2, L_skip_align4; -+ if (!aligned) { -+ switch (t) { -+ case T_BYTE: -+ // One byte misalignment happens only for byte arrays. -+ __ andi(t0, to, 1); -+ __ beqz(t0, L_skip_align1); -+ __ sb(value, Address(to, 0)); -+ __ addi(to, to, 1); -+ __ addiw(count, count, -1); -+ __ bind(L_skip_align1); -+ // Fallthrough -+ case T_SHORT: -+ // Two bytes misalignment happens only for byte and short (char) arrays. -+ __ andi(t0, to, 2); -+ __ beqz(t0, L_skip_align2); -+ __ sh(value, Address(to, 0)); -+ __ addi(to, to, 2); -+ __ addiw(count, count, -(2 >> shift)); -+ __ bind(L_skip_align2); -+ // Fallthrough -+ case T_INT: -+ // Align to 8 bytes, we know we are 4 byte aligned to start. -+ __ andi(t0, to, 4); -+ __ beqz(t0, L_skip_align4); -+ __ sw(value, Address(to, 0)); -+ __ addi(to, to, 4); -+ __ addiw(count, count, -(4 >> shift)); -+ __ bind(L_skip_align4); -+ break; -+ default: ShouldNotReachHere(); -+ } ++#ifdef ASSERT ++ // make sure this code is only executed if there is a pending exception ++ { ++ Label L; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ bnez(t0, L); ++ __ stop("StubRoutines::forward exception: no pending exception (1)"); ++ __ bind(L); + } ++#endif + -+ // -+ // Fill large chunks -+ // -+ __ srliw(cnt_words, count, 3 - shift); // number of words ++ // compute exception handler into x9 + -+ // 32 bit -> 64 bit -+ __ andi(value, value, 0xffffffff); -+ __ mv(tmp_reg, value); -+ __ slli(tmp_reg, tmp_reg, 32); -+ __ orr(value, value, tmp_reg); ++ // call the VM to find the handler address associated with the ++ // caller address. pass thread in x10 and caller pc (ret address) ++ // in x11. n.b. the caller pc is in ra, unlike x86 where it is on ++ // the stack. ++ __ mv(c_rarg1, ra); ++ // ra will be trashed by the VM call so we move it to x9 ++ // (callee-saved) because we also need to pass it to the handler ++ // returned by this call. ++ __ mv(x9, ra); ++ BLOCK_COMMENT("call exception_handler_for_return_address"); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ++ SharedRuntime::exception_handler_for_return_address), ++ xthread, c_rarg1); ++ // we should not really care that ra is no longer the callee ++ // address. we saved the value the handler needs in x9 so we can ++ // just copy it to x13. however, the C2 handler will push its own ++ // frame and then calls into the VM and the VM code asserts that ++ // the PC for the frame above the handler belongs to a compiled ++ // Java method. So, we restore ra here to satisfy that assert. ++ __ mv(ra, x9); ++ // setup x10 & x13 & clear pending exception ++ __ mv(x13, x9); ++ __ mv(x9, x10); ++ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); ++ __ sd(zr, Address(xthread, Thread::pending_exception_offset())); + -+ __ slli(tmp_reg, cnt_words, 3 - shift); -+ __ subw(count, count, tmp_reg); ++#ifdef ASSERT ++ // make sure exception is set + { -+ __ fill_words(to, cnt_words, value); ++ Label L; ++ __ bnez(x10, L); ++ __ stop("StubRoutines::forward exception: no pending exception (2)"); ++ __ bind(L); + } ++#endif + -+ // Remaining count is less than 8 bytes. Fill it by a single store. -+ // Note that the total length is no less than 8 bytes. -+ if (t == T_BYTE || t == T_SHORT) { -+ __ beqz(count, L_exit1); -+ __ shadd(to, count, to, tmp_reg, shift); // points to the end -+ __ sd(value, Address(to, -8)); // overwrite some elements -+ __ bind(L_exit1); -+ __ leave(); -+ __ ret(); -+ } ++ // continue at exception handler ++ // x10: exception ++ // x13: throwing pc ++ // x9: exception handler ++ __ verify_oop(x10); ++ __ jr(x9); + -+ // Handle copies less than 8 bytes. -+ Label L_fill_2, L_fill_4, L_exit2; -+ __ bind(L_fill_elements); -+ switch (t) { -+ case T_BYTE: -+ __ andi(t0, count, 1); -+ __ beqz(t0, L_fill_2); -+ __ sb(value, Address(to, 0)); -+ __ addi(to, to, 1); -+ __ bind(L_fill_2); -+ __ andi(t0, count, 2); -+ __ beqz(t0, L_fill_4); -+ __ sh(value, Address(to, 0)); -+ __ addi(to, to, 2); -+ __ bind(L_fill_4); -+ __ andi(t0, count, 4); -+ __ beqz(t0, L_exit2); -+ __ sw(value, Address(to, 0)); -+ break; -+ case T_SHORT: -+ __ andi(t0, count, 1); -+ __ beqz(t0, L_fill_4); -+ __ sh(value, Address(to, 0)); -+ __ addi(to, to, 2); -+ __ bind(L_fill_4); -+ __ andi(t0, count, 2); -+ __ beqz(t0, L_exit2); -+ __ sw(value, Address(to, 0)); -+ break; -+ case T_INT: -+ __ beqz(count, L_exit2); -+ __ sw(value, Address(to, 0)); -+ break; -+ default: ShouldNotReachHere(); -+ } -+ __ bind(L_exit2); -+ __ leave(); -+ __ ret(); + return start; + } + -+ void generate_arraycopy_stubs() { -+ address entry = NULL; -+ address entry_jbyte_arraycopy = NULL; -+ address entry_jshort_arraycopy = NULL; -+ address entry_jint_arraycopy = NULL; -+ address entry_oop_arraycopy = NULL; -+ address entry_jlong_arraycopy = NULL; -+ address entry_checkcast_arraycopy = NULL; -+ -+ StubRoutines::riscv::_zero_blocks = generate_zero_blocks(); ++ // Non-destructive plausibility checks for oops ++ // ++ // Arguments: ++ // x10: oop to verify ++ // t0: error message ++ // ++ // Stack after saving c_rarg3: ++ // [tos + 0]: saved c_rarg3 ++ // [tos + 1]: saved c_rarg2 ++ // [tos + 2]: saved ra ++ // [tos + 3]: saved t1 ++ // [tos + 4]: saved x10 ++ // [tos + 5]: saved t0 ++ address generate_verify_oop() { + -+ //*** jbyte -+ // Always need aligned and unaligned versions -+ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry, -+ "jbyte_disjoint_arraycopy"); -+ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry, -+ &entry_jbyte_arraycopy, -+ "jbyte_arraycopy"); -+ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry, -+ "arrayof_jbyte_disjoint_arraycopy"); -+ StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL, -+ "arrayof_jbyte_arraycopy"); ++ StubCodeMark mark(this, "StubRoutines", "verify_oop"); ++ address start = __ pc(); + -+ //*** jshort -+ // Always need aligned and unaligned versions -+ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry, -+ "jshort_disjoint_arraycopy"); -+ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, -+ &entry_jshort_arraycopy, -+ "jshort_arraycopy"); -+ StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry, -+ "arrayof_jshort_disjoint_arraycopy"); -+ StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL, -+ "arrayof_jshort_arraycopy"); ++ Label exit, error; + -+ //*** jint -+ // Aligned versions -+ StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry, -+ "arrayof_jint_disjoint_arraycopy"); -+ StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy, -+ "arrayof_jint_arraycopy"); -+ // In 64 bit we need both aligned and unaligned versions of jint arraycopy. -+ // entry_jint_arraycopy always points to the unaligned version -+ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry, -+ "jint_disjoint_arraycopy"); -+ StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry, -+ &entry_jint_arraycopy, -+ "jint_arraycopy"); ++ __ push_reg(0x3000, sp); // save c_rarg2 and c_rarg3 + -+ //*** jlong -+ // It is always aligned -+ StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry, -+ "arrayof_jlong_disjoint_arraycopy"); -+ StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy, -+ "arrayof_jlong_arraycopy"); -+ StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy; -+ StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy; ++ __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr())); ++ __ ld(c_rarg3, Address(c_rarg2)); ++ __ add(c_rarg3, c_rarg3, 1); ++ __ sd(c_rarg3, Address(c_rarg2)); + -+ //*** oops -+ { -+ // With compressed oops we need unaligned versions; notice that -+ // we overwrite entry_oop_arraycopy. -+ bool aligned = !UseCompressedOops; ++ // object is in x10 ++ // make sure object is 'reasonable' ++ __ beqz(x10, exit); // if obj is NULL it is OK + -+ StubRoutines::_arrayof_oop_disjoint_arraycopy -+ = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy", -+ /*dest_uninitialized*/false); -+ StubRoutines::_arrayof_oop_arraycopy -+ = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy", -+ /*dest_uninitialized*/false); -+ // Aligned versions without pre-barriers -+ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit -+ = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit", -+ /*dest_uninitialized*/true); -+ StubRoutines::_arrayof_oop_arraycopy_uninit -+ = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit", -+ /*dest_uninitialized*/true); ++#if INCLUDE_ZGC ++ if (UseZGC) { ++ // Check if mask is good. ++ // verifies that ZAddressBadMask & x10 == 0 ++ __ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(c_rarg2, x10, c_rarg3); ++ __ bnez(c_rarg2, error); + } ++#endif + -+ StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy; -+ StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy; -+ StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit; -+ StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit; ++ // Check if the oop is in the right area of memory ++ __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask()); ++ __ andr(c_rarg2, x10, c_rarg3); ++ __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits()); + -+ StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); -+ StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, -+ /*dest_uninitialized*/true); ++ // Compare c_rarg2 and c_rarg3. ++ __ bne(c_rarg2, c_rarg3, error); + ++ // make sure klass is 'reasonable', which is not zero. ++ __ load_klass(x10, x10); // get klass ++ __ beqz(x10, error); // if klass is NULL it is broken + -+ StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy", -+ entry_jbyte_arraycopy, -+ entry_jshort_arraycopy, -+ entry_jint_arraycopy, -+ entry_jlong_arraycopy); ++ // return if everything seems ok ++ __ bind(exit); + -+ StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy", -+ entry_jbyte_arraycopy, -+ entry_jshort_arraycopy, -+ entry_jint_arraycopy, -+ entry_oop_arraycopy, -+ entry_jlong_arraycopy, -+ entry_checkcast_arraycopy); ++ __ pop_reg(0x3000, sp); // pop c_rarg2 and c_rarg3 ++ __ ret(); + -+ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); -+ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); -+ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); -+ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); -+ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); -+ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); ++ // handle errors ++ __ bind(error); ++ __ pop_reg(0x3000, sp); // pop c_rarg2 and c_rarg3 ++ ++ __ pusha(); ++ // debug(char* msg, int64_t pc, int64_t regs[]) ++ __ mv(c_rarg0, t0); // pass address of error message ++ __ mv(c_rarg1, ra); // pass return address ++ __ mv(c_rarg2, sp); // pass address of regs on stack ++#ifndef PRODUCT ++ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#endif ++ BLOCK_COMMENT("call MacroAssembler::debug"); ++ int32_t offset = 0; ++ __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset); ++ __ jalr(x1, t0, offset); ++ __ ebreak(); ++ ++ return start; + } + -+ // Safefetch stubs. -+ void generate_safefetch(const char* name, int size, address* entry, -+ address* fault_pc, address* continuation_pc) { -+ // safefetch signatures: -+ // int SafeFetch32(int* adr, int errValue) -+ // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue) -+ // -+ // arguments: -+ // c_rarg0 = adr -+ // c_rarg1 = errValue -+ // -+ // result: -+ // PPC_RET = *adr or errValue -+ assert_cond(entry != NULL && fault_pc != NULL && continuation_pc != NULL); -+ StubCodeMark mark(this, "StubRoutines", name); ++ // The inner part of zero_words(). ++ // ++ // Inputs: ++ // x28: the HeapWord-aligned base address of an array to zero. ++ // x29: the count in HeapWords, x29 > 0. ++ // ++ // Returns x28 and x29, adjusted for the caller to clear. ++ // x28: the base address of the tail of words left to clear. ++ // x29: the number of words in the tail. ++ // x29 < MacroAssembler::zero_words_block_size. + -+ // Entry point, pc or function descriptor. -+ *entry = __ pc(); ++ address generate_zero_blocks() { ++ Label done; + -+ // Load *adr into c_rarg1, may fault. -+ *fault_pc = __ pc(); -+ switch (size) { -+ case 4: -+ // int32_t -+ __ lw(c_rarg1, Address(c_rarg0, 0)); -+ break; -+ case 8: -+ // int64_t -+ __ ld(c_rarg1, Address(c_rarg0, 0)); -+ break; -+ default: -+ ShouldNotReachHere(); ++ const Register base = x28, cnt = x29; ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "zero_blocks"); ++ address start = __ pc(); ++ ++ { ++ // Clear the remaining blocks. ++ Label loop; ++ __ sub(cnt, cnt, MacroAssembler::zero_words_block_size); ++ __ bltz(cnt, done); ++ __ bind(loop); ++ for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) { ++ __ sd(zr, Address(base, 0)); ++ __ add(base, base, 8); ++ } ++ __ sub(cnt, cnt, MacroAssembler::zero_words_block_size); ++ __ bgez(cnt, loop); ++ __ bind(done); ++ __ add(cnt, cnt, MacroAssembler::zero_words_block_size); + } + -+ // return errValue or *adr -+ *continuation_pc = __ pc(); -+ __ mv(x10, c_rarg1); + __ ret(); -+ } + -+#ifdef COMPILER2 -+ // code for comparing 16 bytes of strings with same encoding -+ void compare_string_16_bytes_same(Label& DIFF1, Label& DIFF2) { -+ const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31; -+ __ ld(tmp5, Address(str1)); -+ __ addi(str1, str1, wordSize); -+ __ xorr(tmp4, tmp1, tmp2); -+ __ ld(cnt1, Address(str2)); -+ __ addi(str2, str2, wordSize); -+ __ bnez(tmp4, DIFF1); -+ __ ld(tmp1, Address(str1)); -+ __ addi(str1, str1, wordSize); -+ __ xorr(tmp4, tmp5, cnt1); -+ __ ld(tmp2, Address(str2)); -+ __ addi(str2, str2, wordSize); -+ __ bnez(tmp4, DIFF2); ++ return start; + } + -+ // code for comparing 8 characters of strings with Latin1 and Utf16 encoding -+ void compare_string_8_x_LU(Register tmpL, Register tmpU, Register strL, Register strU, Label& DIFF) { -+ const Register tmp = x30; -+ __ ld(tmpL, Address(strL)); -+ __ addi(strL, strL, wordSize); -+ __ ld(tmpU, Address(strU)); -+ __ addi(strU, strU, wordSize); -+ __ inflate_lo32(tmp, tmpL); -+ __ mv(t0, tmp); -+ __ xorr(tmp, tmpU, t0); -+ __ bnez(tmp, DIFF); ++ typedef enum { ++ copy_forwards = 1, ++ copy_backwards = -1 ++ } copy_direction; + -+ __ ld(tmpU, Address(strU)); -+ __ addi(strU, strU, wordSize); -+ __ inflate_hi32(tmp, tmpL); -+ __ mv(t0, tmp); -+ __ xorr(tmp, tmpU, t0); -+ __ bnez(tmp, DIFF); -+ } ++ // Bulk copy of blocks of 8 words. ++ // ++ // count is a count of words. ++ // ++ // Precondition: count >= 8 ++ // ++ // Postconditions: ++ // ++ // The least significant bit of count contains the remaining count ++ // of words to copy. The rest of count is trash. ++ // ++ // s and d are adjusted to point to the remaining words to copy ++ // ++ void generate_copy_longs(Label &start, Register s, Register d, Register count, ++ copy_direction direction) { ++ int unit = wordSize * direction; ++ int bias = wordSize; + -+ // x10 = result -+ // x11 = str1 -+ // x12 = cnt1 -+ // x13 = str2 -+ // x14 = cnt2 -+ // x28 = tmp1 -+ // x29 = tmp2 -+ // x30 = tmp3 -+ address generate_compare_long_string_different_encoding(bool isLU) { ++ const Register tmp_reg0 = x13, tmp_reg1 = x14, tmp_reg2 = x15, tmp_reg3 = x16, ++ tmp_reg4 = x17, tmp_reg5 = x7, tmp_reg6 = x28, tmp_reg7 = x29; ++ ++ const Register stride = x30; ++ ++ assert_different_registers(t0, tmp_reg0, tmp_reg1, tmp_reg2, tmp_reg3, ++ tmp_reg4, tmp_reg5, tmp_reg6, tmp_reg7); ++ assert_different_registers(s, d, count, t0); ++ ++ Label again, drain; ++ const char* stub_name = NULL; ++ if (direction == copy_forwards) { ++ stub_name = "forward_copy_longs"; ++ } else { ++ stub_name = "backward_copy_longs"; ++ } ++ StubCodeMark mark(this, "StubRoutines", stub_name); + __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", isLU ? "compare_long_string_different_encoding LU" : "compare_long_string_different_encoding UL"); -+ address entry = __ pc(); -+ Label SMALL_LOOP, TAIL, LOAD_LAST, DIFF, DONE, CALCULATE_DIFFERENCE; -+ const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14, -+ tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31; -+ RegSet spilled_regs = RegSet::of(tmp4, tmp5); ++ __ bind(start); + -+ // cnt2 == amount of characters left to compare -+ // Check already loaded first 4 symbols -+ __ inflate_lo32(tmp3, isLU ? tmp1 : tmp2); -+ __ mv(isLU ? tmp1 : tmp2, tmp3); -+ __ addi(str1, str1, isLU ? wordSize / 2 : wordSize); -+ __ addi(str2, str2, isLU ? wordSize : wordSize / 2); -+ __ sub(cnt2, cnt2, wordSize / 2); // Already loaded 4 symbols. -+ __ push_reg(spilled_regs, sp); ++ if (direction == copy_forwards) { ++ __ sub(s, s, bias); ++ __ sub(d, d, bias); ++ } + -+ __ xorr(tmp3, tmp1, tmp2); -+ __ mv(tmp5, tmp2); -+ __ bnez(tmp3, CALCULATE_DIFFERENCE); ++#ifdef ASSERT ++ // Make sure we are never given < 8 words ++ { ++ Label L; + -+ Register strU = isLU ? str2 : str1, -+ strL = isLU ? str1 : str2, -+ tmpU = isLU ? tmp5 : tmp1, // where to keep U for comparison -+ tmpL = isLU ? tmp1 : tmp5; // where to keep L for comparison ++ __ li(t0, 8); ++ __ bge(count, t0, L); ++ __ stop("genrate_copy_longs called with < 8 words"); ++ __ bind(L); ++ } ++#endif + -+ // make sure main loop is byte-aligned, we should load another 4 bytes from strL -+ __ beqz(cnt2, DONE); // no characters left -+ __ lwu(tmpL, Address(strL)); -+ __ addi(strL, strL, wordSize / 2); -+ __ ld(tmpU, Address(strU)); -+ __ addi(strU, strU, wordSize); -+ __ inflate_lo32(tmp3, tmpL); -+ __ mv(tmpL, tmp3); -+ __ xorr(tmp3, tmpU, tmpL); -+ __ bnez(tmp3, CALCULATE_DIFFERENCE); -+ __ addi(cnt2, cnt2, -wordSize / 2); ++ __ ld(tmp_reg0, Address(s, 1 * unit)); ++ __ ld(tmp_reg1, Address(s, 2 * unit)); ++ __ ld(tmp_reg2, Address(s, 3 * unit)); ++ __ ld(tmp_reg3, Address(s, 4 * unit)); ++ __ ld(tmp_reg4, Address(s, 5 * unit)); ++ __ ld(tmp_reg5, Address(s, 6 * unit)); ++ __ ld(tmp_reg6, Address(s, 7 * unit)); ++ __ ld(tmp_reg7, Address(s, 8 * unit)); ++ __ addi(s, s, 8 * unit); ++ ++ __ sub(count, count, 16); ++ __ bltz(count, drain); ++ ++ __ bind(again); ++ ++ __ sd(tmp_reg0, Address(d, 1 * unit)); ++ __ sd(tmp_reg1, Address(d, 2 * unit)); ++ __ sd(tmp_reg2, Address(d, 3 * unit)); ++ __ sd(tmp_reg3, Address(d, 4 * unit)); ++ __ sd(tmp_reg4, Address(d, 5 * unit)); ++ __ sd(tmp_reg5, Address(d, 6 * unit)); ++ __ sd(tmp_reg6, Address(d, 7 * unit)); ++ __ sd(tmp_reg7, Address(d, 8 * unit)); ++ ++ __ ld(tmp_reg0, Address(s, 1 * unit)); ++ __ ld(tmp_reg1, Address(s, 2 * unit)); ++ __ ld(tmp_reg2, Address(s, 3 * unit)); ++ __ ld(tmp_reg3, Address(s, 4 * unit)); ++ __ ld(tmp_reg4, Address(s, 5 * unit)); ++ __ ld(tmp_reg5, Address(s, 6 * unit)); ++ __ ld(tmp_reg6, Address(s, 7 * unit)); ++ __ ld(tmp_reg7, Address(s, 8 * unit)); ++ ++ __ addi(s, s, 8 * unit); ++ __ addi(d, d, 8 * unit); ++ ++ __ sub(count, count, 8); ++ __ bgez(count, again); ++ ++ // Drain ++ __ bind(drain); ++ ++ __ sd(tmp_reg0, Address(d, 1 * unit)); ++ __ sd(tmp_reg1, Address(d, 2 * unit)); ++ __ sd(tmp_reg2, Address(d, 3 * unit)); ++ __ sd(tmp_reg3, Address(d, 4 * unit)); ++ __ sd(tmp_reg4, Address(d, 5 * unit)); ++ __ sd(tmp_reg5, Address(d, 6 * unit)); ++ __ sd(tmp_reg6, Address(d, 7 * unit)); ++ __ sd(tmp_reg7, Address(d, 8 * unit)); ++ __ addi(d, d, 8 * unit); + -+ __ beqz(cnt2, DONE); // no character left -+ __ sub(cnt2, cnt2, wordSize * 2); -+ __ bltz(cnt2, TAIL); -+ __ bind(SMALL_LOOP); // smaller loop -+ __ sub(cnt2, cnt2, wordSize * 2); -+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); -+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); -+ __ bgez(cnt2, SMALL_LOOP); -+ __ addi(t0, cnt2, wordSize * 2); -+ __ beqz(t0, DONE); -+ __ bind(TAIL); // 1..15 characters left -+ if (AvoidUnalignedAccesses) { -+ // Aligned access. Load bytes from byte-aligned address, -+ // which may contain invalid bytes in last load. -+ // Invalid bytes should be removed before comparison. -+ Label LOAD_LAST, WORD_CMP; -+ __ addi(t0, cnt2, wordSize); -+ __ bgtz(t0, LOAD_LAST); -+ // remaining characters is greater than or equals to 8, we can do one compare_string_8_x_LU -+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); -+ __ addi(cnt2, cnt2, wordSize); -+ __ beqz(cnt2, DONE); // no character left -+ __ bind(LOAD_LAST); // 1..7 characters left -+ __ lwu(tmpL, Address(strL)); -+ __ addi(strL, strL, wordSize / 2); -+ __ ld(tmpU, Address(strU)); -+ __ addi(strU, strU, wordSize); -+ __ inflate_lo32(tmp3, tmpL); -+ __ mv(tmpL, tmp3); -+ __ addi(t0, cnt2, wordSize / 2); -+ __ blez(t0, WORD_CMP); -+ __ slli(t0, t0, 1); // now in bytes -+ __ slli(t0, t0, LogBitsPerByte); -+ __ sll(tmpL, tmpL, t0); -+ __ sll(tmpU, tmpU, t0); -+ // remaining characters is greater than or equals to 4, we can do one full 4-byte comparison -+ __ bind(WORD_CMP); -+ __ xorr(tmp3, tmpU, tmpL); -+ __ bnez(tmp3, CALCULATE_DIFFERENCE); -+ __ addi(cnt2, cnt2, wordSize / 2); -+ __ bltz(cnt2, LOAD_LAST); // 1..3 characters left -+ __ j(DONE); // no character left ++ { ++ Label L1, L2; ++ __ andi(t0, count, 4); ++ __ beqz(t0, L1); ++ ++ __ ld(tmp_reg0, Address(s, 1 * unit)); ++ __ ld(tmp_reg1, Address(s, 2 * unit)); ++ __ ld(tmp_reg2, Address(s, 3 * unit)); ++ __ ld(tmp_reg3, Address(s, 4 * unit)); ++ __ addi(s, s, 4 * unit); ++ ++ __ sd(tmp_reg0, Address(d, 1 * unit)); ++ __ sd(tmp_reg1, Address(d, 2 * unit)); ++ __ sd(tmp_reg2, Address(d, 3 * unit)); ++ __ sd(tmp_reg3, Address(d, 4 * unit)); ++ __ addi(d, d, 4 * unit); ++ ++ __ bind(L1); ++ ++ if (direction == copy_forwards) { ++ __ addi(s, s, bias); ++ __ addi(d, d, bias); ++ } ++ ++ __ andi(t0, count, 2); ++ __ beqz(t0, L2); ++ if (direction == copy_backwards) { ++ __ addi(s, s, 2 * unit); ++ __ ld(tmp_reg0, Address(s)); ++ __ ld(tmp_reg1, Address(s, wordSize)); ++ __ addi(d, d, 2 * unit); ++ __ sd(tmp_reg0, Address(d)); ++ __ sd(tmp_reg1, Address(d, wordSize)); + } else { -+ // Unaligned accesses. Load from non-byte aligned address. -+ __ shadd(strU, cnt2, strU, t0, 1); // convert cnt2 into bytes and get Address of last 8 bytes in UTF-16 string -+ __ add(strL, strL, cnt2); // Address of last 16 bytes in Latin1 string -+ // last 16 characters -+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); -+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); -+ __ j(DONE); ++ __ ld(tmp_reg0, Address(s)); ++ __ ld(tmp_reg1, Address(s, wordSize)); ++ __ addi(s, s, 2 * unit); ++ __ sd(tmp_reg0, Address(d)); ++ __ sd(tmp_reg1, Address(d, wordSize)); ++ __ addi(d, d, 2 * unit); + } -+ __ bind(DIFF); -+ __ mv(tmpL, t0); -+ // Find the first different characters in the longwords and -+ // compute their difference. -+ __ bind(CALCULATE_DIFFERENCE); -+ __ ctzc_bit(tmp4, tmp3); -+ __ srl(tmp1, tmp1, tmp4); -+ __ srl(tmp5, tmp5, tmp4); -+ __ andi(tmp1, tmp1, 0xFFFF); -+ __ andi(tmp5, tmp5, 0xFFFF); -+ __ sub(result, tmp1, tmp5); -+ __ bind(DONE); -+ __ pop_reg(spilled_regs, sp); -+ __ ret(); -+ return entry; ++ __ bind(L2); ++ } ++ ++ __ ret(); + } + -+ // x10 = result -+ // x11 = str1 -+ // x12 = cnt1 -+ // x13 = str2 -+ // x14 = cnt2 -+ // x28 = tmp1 -+ // x29 = tmp2 -+ // x30 = tmp3 -+ // x31 = tmp4 -+ address generate_compare_long_string_same_encoding(bool isLL) { -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", isLL ? -+ "compare_long_string_same_encoding LL" : "compare_long_string_same_encoding UU"); -+ address entry = __ pc(); -+ Label SMALL_LOOP, CHECK_LAST, DIFF2, TAIL, -+ LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF; -+ const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14, -+ tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31; -+ RegSet spilled_regs = RegSet::of(tmp4, tmp5); ++ Label copy_f, copy_b; + -+ // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used -+ // update cnt2 counter with already loaded 8 bytes -+ __ sub(cnt2, cnt2, wordSize / (isLL ? 1 : 2)); -+ // update pointers, because of previous read -+ __ add(str1, str1, wordSize); -+ __ add(str2, str2, wordSize); -+ // less than 16 bytes left? -+ __ sub(cnt2, cnt2, isLL ? 2 * wordSize : wordSize); -+ __ push_reg(spilled_regs, sp); -+ __ bltz(cnt2, TAIL); -+ __ bind(SMALL_LOOP); -+ compare_string_16_bytes_same(DIFF, DIFF2); -+ __ sub(cnt2, cnt2, isLL ? 2 * wordSize : wordSize); -+ __ bgez(cnt2, SMALL_LOOP); -+ __ bind(TAIL); -+ __ addi(cnt2, cnt2, isLL ? 2 * wordSize : wordSize); -+ __ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); -+ __ sub(cnt2, cnt2, isLL ? wordSize : wordSize / 2); -+ __ blez(cnt2, CHECK_LAST); -+ __ xorr(tmp4, tmp1, tmp2); -+ __ bnez(tmp4, DIFF); -+ __ ld(tmp1, Address(str1)); -+ __ addi(str1, str1, wordSize); -+ __ ld(tmp2, Address(str2)); -+ __ addi(str2, str2, wordSize); -+ __ sub(cnt2, cnt2, isLL ? wordSize : wordSize / 2); -+ __ bind(CHECK_LAST); -+ if (!isLL) { -+ __ add(cnt2, cnt2, cnt2); // now in bytes -+ } -+ __ xorr(tmp4, tmp1, tmp2); -+ __ bnez(tmp4, DIFF); -+ if (AvoidUnalignedAccesses) { -+ // Aligned access. Load bytes from byte-aligned address, -+ // which may contain invalid bytes in last load. -+ // Invalid bytes should be removed before comparison. -+ __ ld(tmp5, Address(str1)); -+ __ ld(cnt1, Address(str2)); -+ __ neg(cnt2, cnt2); -+ __ slli(cnt2, cnt2, LogBitsPerByte); -+ __ sll(tmp5, tmp5, cnt2); -+ __ sll(cnt1, cnt1, cnt2); -+ } else { -+ // Unaligned access. Load from non-byte aligned address. -+ __ add(str1, str1, cnt2); -+ __ ld(tmp5, Address(str1)); -+ __ add(str2, str2, cnt2); -+ __ ld(cnt1, Address(str2)); -+ } ++ // All-singing all-dancing memory copy. ++ // ++ // Copy count units of memory from s to d. The size of a unit is ++ // step, which can be positive or negative depending on the direction ++ // of copy. If is_aligned is false, we align the source address. ++ // ++ /* ++ * if (is_aligned) { ++ * goto copy_8_bytes; ++ * } ++ * bool is_backwards = step < 0; ++ * int granularity = uabs(step); ++ * count = count * granularity; * count bytes ++ * ++ * if (is_backwards) { ++ * s += count; ++ * d += count; ++ * } ++ * ++ * count limit maybe greater than 16, for better performance ++ * if (count < 16) { ++ * goto copy_small; ++ * } ++ * ++ * if ((dst % 8) == (src % 8)) { ++ * aligned; ++ * goto copy8; ++ * } ++ * ++ * copy_small: ++ * load element one by one; ++ * done; ++ */ + -+ __ xorr(tmp4, tmp5, cnt1); -+ __ beqz(tmp4, LENGTH_DIFF); -+ // Find the first different characters in the longwords and -+ // compute their difference. -+ __ bind(DIFF2); -+ __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb -+ __ srl(tmp5, tmp5, tmp3); -+ __ srl(cnt1, cnt1, tmp3); -+ if (isLL) { -+ __ andi(tmp5, tmp5, 0xFF); -+ __ andi(cnt1, cnt1, 0xFF); -+ } else { -+ __ andi(tmp5, tmp5, 0xFFFF); -+ __ andi(cnt1, cnt1, 0xFFFF); -+ } -+ __ sub(result, tmp5, cnt1); -+ __ j(LENGTH_DIFF); -+ __ bind(DIFF); -+ __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb -+ __ srl(tmp1, tmp1, tmp3); -+ __ srl(tmp2, tmp2, tmp3); -+ if (isLL) { -+ __ andi(tmp1, tmp1, 0xFF); -+ __ andi(tmp2, tmp2, 0xFF); -+ } else { -+ __ andi(tmp1, tmp1, 0xFFFF); -+ __ andi(tmp2, tmp2, 0xFFFF); -+ } -+ __ sub(result, tmp1, tmp2); -+ __ j(LENGTH_DIFF); -+ __ bind(LAST_CHECK_AND_LENGTH_DIFF); -+ __ xorr(tmp4, tmp1, tmp2); -+ __ bnez(tmp4, DIFF); -+ __ bind(LENGTH_DIFF); -+ __ pop_reg(spilled_regs, sp); -+ __ ret(); -+ return entry; -+ } ++ typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp); + -+ void generate_compare_long_strings() { -+ StubRoutines::riscv::_compare_long_string_LL = generate_compare_long_string_same_encoding(true); -+ StubRoutines::riscv::_compare_long_string_UU = generate_compare_long_string_same_encoding(false); -+ StubRoutines::riscv::_compare_long_string_LU = generate_compare_long_string_different_encoding(true); -+ StubRoutines::riscv::_compare_long_string_UL = generate_compare_long_string_different_encoding(false); -+ } ++ void copy_memory_v(Register s, Register d, Register count, Register tmp, int step) { ++ bool is_backward = step < 0; ++ int granularity = uabs(step); + -+ // x10 result -+ // x11 src -+ // x12 src count -+ // x13 pattern -+ // x14 pattern count -+ address generate_string_indexof_linear(bool needle_isL, bool haystack_isL) -+ { -+ const char* stubName = needle_isL -+ ? (haystack_isL ? "indexof_linear_ll" : "indexof_linear_ul") -+ : "indexof_linear_uu"; -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", stubName); -+ address entry = __ pc(); ++ const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17; ++ assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2); ++ Assembler::SEW sew = Assembler::elembytes_to_sew(granularity); ++ Label loop_forward, loop_backward, done; + -+ int needle_chr_size = needle_isL ? 1 : 2; -+ int haystack_chr_size = haystack_isL ? 1 : 2; -+ int needle_chr_shift = needle_isL ? 0 : 1; -+ int haystack_chr_shift = haystack_isL ? 0 : 1; -+ bool isL = needle_isL && haystack_isL; -+ // parameters -+ Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14; -+ // temporary registers -+ Register mask1 = x20, match_mask = x21, first = x22, trailing_zero = x23, mask2 = x24, tmp = x25; -+ // redefinitions -+ Register ch1 = x28, ch2 = x29; -+ RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29); ++ __ mv(dst, d); ++ __ mv(src, s); ++ __ mv(cnt, count); + -+ __ push_reg(spilled_regs, sp); ++ __ bind(loop_forward); ++ __ vsetvli(vl, cnt, sew, Assembler::m8); ++ if (is_backward) { ++ __ bne(vl, cnt, loop_backward); ++ } + -+ Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO, -+ L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED, -+ L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP, -+ L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH, -+ L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2, -+ L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH; ++ __ vlex_v(v0, src, sew); ++ __ sub(cnt, cnt, vl); ++ __ slli(vl, vl, (int)sew); ++ __ add(src, src, vl); + -+ __ ld(ch1, Address(needle)); -+ __ ld(ch2, Address(haystack)); -+ // src.length - pattern.length -+ __ sub(haystack_len, haystack_len, needle_len); ++ __ vsex_v(v0, dst, sew); ++ __ add(dst, dst, vl); ++ __ bnez(cnt, loop_forward); + -+ // first is needle[0] -+ __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first); -+ __ mv(mask1, haystack_isL ? 0x0101010101010101 : 0x0001000100010001); -+ __ mul(first, first, mask1); -+ __ mv(mask2, haystack_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); -+ if (needle_isL != haystack_isL) { -+ __ mv(tmp, ch1); -+ } -+ __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size - 1); -+ __ blez(haystack_len, L_SMALL); ++ if (is_backward) { ++ __ j(done); + -+ if (needle_isL != haystack_isL) { -+ __ inflate_lo32(ch1, tmp, match_mask, trailing_zero); ++ __ bind(loop_backward); ++ __ sub(tmp, cnt, vl); ++ __ slli(tmp, tmp, sew); ++ __ add(tmp1, s, tmp); ++ __ vlex_v(v0, tmp1, sew); ++ __ add(tmp2, d, tmp); ++ __ vsex_v(v0, tmp2, sew); ++ __ sub(cnt, cnt, vl); ++ __ bnez(cnt, loop_forward); ++ __ bind(done); + } -+ // xorr, sub, orr, notr, andr -+ // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i] -+ // eg: -+ // first: aa aa aa aa aa aa aa aa -+ // ch2: aa aa li nx jd ka aa aa -+ // match_mask: 80 80 00 00 00 00 80 80 -+ __ compute_match_mask(ch2, first, match_mask, mask1, mask2); ++ } + -+ // search first char of needle, if success, goto L_HAS_ZERO; -+ __ bnez(match_mask, L_HAS_ZERO); -+ __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size); -+ __ add(result, result, wordSize / haystack_chr_size); -+ __ add(haystack, haystack, wordSize); -+ __ bltz(haystack_len, L_POST_LOOP); ++ void copy_memory(bool is_aligned, Register s, Register d, ++ Register count, Register tmp, int step) { ++ if (UseRVV) { ++ return copy_memory_v(s, d, count, tmp, step); ++ } + -+ __ bind(L_LOOP); -+ __ ld(ch2, Address(haystack)); -+ __ compute_match_mask(ch2, first, match_mask, mask1, mask2); -+ __ bnez(match_mask, L_HAS_ZERO); ++ bool is_backwards = step < 0; ++ int granularity = uabs(step); + -+ __ bind(L_LOOP_PROCEED); -+ __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size); -+ __ add(haystack, haystack, wordSize); -+ __ add(result, result, wordSize / haystack_chr_size); -+ __ bgez(haystack_len, L_LOOP); ++ const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17; + -+ __ bind(L_POST_LOOP); -+ __ mv(ch2, -wordSize / haystack_chr_size); -+ __ ble(haystack_len, ch2, NOMATCH); // no extra characters to check -+ __ ld(ch2, Address(haystack)); -+ __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift); -+ __ neg(haystack_len, haystack_len); -+ __ xorr(ch2, first, ch2); -+ __ sub(match_mask, ch2, mask1); -+ __ orr(ch2, ch2, mask2); -+ __ mv(trailing_zero, -1); // all bits set -+ __ j(L_SMALL_PROCEED); ++ Label same_aligned; ++ Label copy8, copy_small, done; + -+ __ align(OptoLoopAlignment); -+ __ bind(L_SMALL); -+ __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift); -+ __ neg(haystack_len, haystack_len); -+ if (needle_isL != haystack_isL) { -+ __ inflate_lo32(ch1, tmp, match_mask, trailing_zero); ++ copy_insn ld_arr = NULL, st_arr = NULL; ++ switch (granularity) { ++ case 1 : ++ ld_arr = (copy_insn)&MacroAssembler::lbu; ++ st_arr = (copy_insn)&MacroAssembler::sb; ++ break; ++ case 2 : ++ ld_arr = (copy_insn)&MacroAssembler::lhu; ++ st_arr = (copy_insn)&MacroAssembler::sh; ++ break; ++ case 4 : ++ ld_arr = (copy_insn)&MacroAssembler::lwu; ++ st_arr = (copy_insn)&MacroAssembler::sw; ++ break; ++ case 8 : ++ ld_arr = (copy_insn)&MacroAssembler::ld; ++ st_arr = (copy_insn)&MacroAssembler::sd; ++ break; ++ default : ++ ShouldNotReachHere(); + } -+ __ xorr(ch2, first, ch2); -+ __ sub(match_mask, ch2, mask1); -+ __ orr(ch2, ch2, mask2); -+ __ mv(trailing_zero, -1); // all bits set + -+ __ bind(L_SMALL_PROCEED); -+ __ srl(trailing_zero, trailing_zero, haystack_len); // mask. zeroes on useless bits. -+ __ notr(ch2, ch2); -+ __ andr(match_mask, match_mask, ch2); -+ __ andr(match_mask, match_mask, trailing_zero); // clear useless bits and check -+ __ beqz(match_mask, NOMATCH); ++ __ beqz(count, done); ++ __ slli(cnt, count, exact_log2(granularity)); ++ if (is_backwards) { ++ __ add(src, s, cnt); ++ __ add(dst, d, cnt); ++ } else { ++ __ mv(src, s); ++ __ mv(dst, d); ++ } + -+ __ bind(L_SMALL_HAS_ZERO_LOOP); -+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, ch2, tmp); // count trailing zeros -+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); -+ __ mv(ch2, wordSize / haystack_chr_size); -+ __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2); -+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); -+ __ mv(trailing_zero, wordSize / haystack_chr_size); -+ __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); ++ if (is_aligned) { ++ __ addi(tmp, cnt, -8); ++ __ bgez(tmp, copy8); ++ __ j(copy_small); ++ } + -+ __ bind(L_SMALL_CMP_LOOP); -+ __ shadd(first, trailing_zero, needle, first, needle_chr_shift); -+ __ shadd(ch2, trailing_zero, haystack, ch2, haystack_chr_shift); -+ needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first)); -+ haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); -+ __ add(trailing_zero, trailing_zero, 1); -+ __ bge(trailing_zero, needle_len, L_SMALL_CMP_LOOP_LAST_CMP); -+ __ beq(first, ch2, L_SMALL_CMP_LOOP); ++ __ mv(tmp, 16); ++ __ blt(cnt, tmp, copy_small); + -+ __ bind(L_SMALL_CMP_LOOP_NOMATCH); -+ __ beqz(match_mask, NOMATCH); -+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2); -+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); -+ __ add(result, result, 1); -+ __ add(haystack, haystack, haystack_chr_size); -+ __ j(L_SMALL_HAS_ZERO_LOOP); ++ __ xorr(tmp, src, dst); ++ __ andi(tmp, tmp, 0b111); ++ __ bnez(tmp, copy_small); + -+ __ align(OptoLoopAlignment); -+ __ bind(L_SMALL_CMP_LOOP_LAST_CMP); -+ __ bne(first, ch2, L_SMALL_CMP_LOOP_NOMATCH); -+ __ j(DONE); ++ __ bind(same_aligned); ++ __ andi(tmp, src, 0b111); ++ __ beqz(tmp, copy8); ++ if (is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); ++ } ++ (_masm->*ld_arr)(tmp3, Address(src), t0); ++ (_masm->*st_arr)(tmp3, Address(dst), t0); ++ if (!is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); ++ } ++ __ addi(cnt, cnt, -granularity); ++ __ beqz(cnt, done); ++ __ j(same_aligned); + -+ __ align(OptoLoopAlignment); -+ __ bind(L_SMALL_CMP_LOOP_LAST_CMP2); -+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); -+ __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); -+ __ j(DONE); ++ __ bind(copy8); ++ if (is_backwards) { ++ __ addi(src, src, -wordSize); ++ __ addi(dst, dst, -wordSize); ++ } ++ __ ld(tmp3, Address(src)); ++ __ sd(tmp3, Address(dst)); ++ if (!is_backwards) { ++ __ addi(src, src, wordSize); ++ __ addi(dst, dst, wordSize); ++ } ++ __ addi(cnt, cnt, -wordSize); ++ __ addi(tmp4, cnt, -8); ++ __ bgez(tmp4, copy8); // cnt >= 8, do next loop + -+ __ align(OptoLoopAlignment); -+ __ bind(L_HAS_ZERO); -+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2); -+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); -+ __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2); -+ __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits) -+ __ sub(result, result, 1); // array index from 0, so result -= 1 ++ __ beqz(cnt, done); + -+ __ bind(L_HAS_ZERO_LOOP); -+ __ mv(needle_len, wordSize / haystack_chr_size); -+ __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2); -+ __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2); -+ // load next 8 bytes from haystack, and increase result index -+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); -+ __ add(result, result, 1); -+ __ mv(trailing_zero, wordSize / haystack_chr_size); -+ __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); ++ __ bind(copy_small); ++ if (is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); ++ } ++ (_masm->*ld_arr)(tmp3, Address(src), t0); ++ (_masm->*st_arr)(tmp3, Address(dst), t0); ++ if (!is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); ++ } ++ __ addi(cnt, cnt, -granularity); ++ __ bgtz(cnt, copy_small); + -+ // compare one char -+ __ bind(L_CMP_LOOP); -+ __ shadd(needle_len, trailing_zero, needle, needle_len, needle_chr_shift); -+ needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len)); -+ __ shadd(ch2, trailing_zero, haystack, ch2, haystack_chr_shift); -+ haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); -+ __ add(trailing_zero, trailing_zero, 1); // next char index -+ __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2); -+ __ bge(trailing_zero, tmp, L_CMP_LOOP_LAST_CMP); -+ __ beq(needle_len, ch2, L_CMP_LOOP); ++ __ bind(done); ++ } + -+ __ bind(L_CMP_LOOP_NOMATCH); -+ __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH); -+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index -+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); -+ __ add(haystack, haystack, haystack_chr_size); -+ __ j(L_HAS_ZERO_LOOP); ++ // Scan over array at a for count oops, verifying each one. ++ // Preserves a and count, clobbers t0 and t1. ++ void verify_oop_array(size_t size, Register a, Register count, Register temp) { ++ Label loop, end; ++ __ mv(t1, zr); ++ __ slli(t0, count, exact_log2(size)); ++ __ bind(loop); ++ __ bgeu(t1, t0, end); + -+ __ align(OptoLoopAlignment); -+ __ bind(L_CMP_LOOP_LAST_CMP); -+ __ bne(needle_len, ch2, L_CMP_LOOP_NOMATCH); -+ __ j(DONE); ++ __ add(temp, a, t1); ++ if (size == (size_t)wordSize) { ++ __ ld(temp, Address(temp, 0)); ++ __ verify_oop(temp); ++ } else { ++ __ lwu(temp, Address(temp, 0)); ++ __ decode_heap_oop(temp); // calls verify_oop ++ } ++ __ add(t1, t1, size); ++ __ j(loop); ++ __ bind(end); ++ } + -+ __ align(OptoLoopAlignment); -+ __ bind(L_CMP_LOOP_LAST_CMP2); -+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); -+ __ add(result, result, 1); -+ __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); -+ __ j(DONE); ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry, ++ const char* name, bool dest_uninitialized = false) { ++ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; ++ RegSet saved_reg = RegSet::of(s, d, count); ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ __ enter(); + -+ __ align(OptoLoopAlignment); -+ __ bind(L_HAS_ZERO_LOOP_NOMATCH); -+ // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until -+ // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP, -+ // so, result was increased at max by wordSize/str2_chr_size - 1, so, -+ // respective high bit wasn't changed. L_LOOP_PROCEED will increase -+ // result by analyzed characters value, so, we can just reset lower bits -+ // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL -+ // 2) restore needle_len and haystack_len values from "compressed" haystack_len -+ // 3) advance haystack value to represent next haystack octet. result & 7/3 is -+ // index of last analyzed substring inside current octet. So, haystack in at -+ // respective start address. We need to advance it to next octet -+ __ andi(match_mask, result, wordSize / haystack_chr_size - 1); -+ __ srli(needle_len, haystack_len, BitsPerByte * wordSize / 2); -+ __ andi(result, result, haystack_isL ? -8 : -4); -+ __ slli(tmp, match_mask, haystack_chr_shift); -+ __ sub(haystack, haystack, tmp); -+ __ addw(haystack_len, haystack_len, zr); -+ __ j(L_LOOP_PROCEED); ++ if (entry != NULL) { ++ *entry = __ pc(); ++ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) ++ BLOCK_COMMENT("Entry:"); ++ } + -+ __ align(OptoLoopAlignment); -+ __ bind(NOMATCH); -+ __ mv(result, -1); ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } + -+ __ bind(DONE); -+ __ pop_reg(spilled_regs, sp); -+ __ ret(); -+ return entry; -+ } ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg); + -+ void generate_string_indexof_stubs() -+ { -+ StubRoutines::riscv::_string_indexof_linear_ll = generate_string_indexof_linear(true, true); -+ StubRoutines::riscv::_string_indexof_linear_uu = generate_string_indexof_linear(false, false); -+ StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false); -+ } ++ if (is_oop) { ++ // save regs before copy_memory ++ __ push_reg(RegSet::of(d, count), sp); ++ } + -+ address generate_mulAdd() -+ { -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", "mulAdd"); ++ { ++ // UnsafeCopyMemory page error: continue after ucm ++ bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); ++ UnsafeCopyMemoryMark ucmm(this, add_entry, true); ++ copy_memory(aligned, s, d, count, t0, size); ++ } + -+ address start = __ pc(); ++ if (is_oop) { ++ __ pop_reg(RegSet::of(d, count), sp); ++ if (VerifyOops) { ++ verify_oop_array(size, d, count, t2); ++ } ++ } + -+ const Register out = x10; -+ const Register in = x11; -+ const Register offset = x12; -+ const Register len = x13; -+ const Register k = x14; -+ const Register tmp1 = x28; -+ const Register tmp2 = x29; ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet()); + -+ BLOCK_COMMENT("Entry:"); -+ __ enter(); -+ __ mul_add(out, in, offset, len, k, tmp1, tmp2); + __ leave(); ++ __ mv(x10, zr); // return 0 + __ ret(); -+ + return start; + } + -+ /** -+ * Arguments: -+ * -+ * Input: -+ * c_rarg0 - x address -+ * c_rarg1 - x length -+ * c_rarg2 - y address -+ * c_rarg3 - y lenth -+ * c_rarg4 - z address -+ * c_rarg5 - z length -+ */ -+ address generate_multiplyToLen() -+ { -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target, ++ address* entry, const char* name, ++ bool dest_uninitialized = false) { ++ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; ++ RegSet saved_regs = RegSet::of(s, d, count); ++ StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); ++ __ enter(); + -+ const Register x = x10; -+ const Register xlen = x11; -+ const Register y = x12; -+ const Register ylen = x13; -+ const Register z = x14; -+ const Register zlen = x15; -+ -+ const Register tmp1 = x16; -+ const Register tmp2 = x17; -+ const Register tmp3 = x7; -+ const Register tmp4 = x28; -+ const Register tmp5 = x29; -+ const Register tmp6 = x30; -+ const Register tmp7 = x31; ++ if (entry != NULL) { ++ *entry = __ pc(); ++ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) ++ BLOCK_COMMENT("Entry:"); ++ } + -+ RegSet spilled_regs = RegSet::of(tmp1, tmp2); -+ BLOCK_COMMENT("Entry:"); -+ __ enter(); // required for proper stackwalking of RuntimeStub frame -+ __ push_reg(spilled_regs, sp); -+ __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); -+ __ pop_reg(spilled_regs, sp); -+ __ leave(); // required for proper stackwalking of RuntimeStub frame -+ __ ret(); ++ // use fwd copy when (d-s) above_equal (count*size) ++ __ sub(t0, d, s); ++ __ slli(t1, count, exact_log2(size)); ++ __ bgeu(t0, t1, nooverlap_target); + -+ return start; -+ } ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } + -+ address generate_squareToLen() -+ { -+ // squareToLen algorithm for sizes 1..127 described in java code works -+ // faster than multiply_to_len on some CPUs and slower on others, but -+ // multiply_to_len shows a bit better overall results -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", "squareToLen"); -+ address start = __ pc(); ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs); + -+ const Register x = x10; -+ const Register xlen = x11; -+ const Register z = x12; -+ const Register zlen = x13; -+ const Register y = x14; // == x -+ const Register ylen = x15; // == xlen ++ if (is_oop) { ++ // save regs before copy_memory ++ __ push_reg(RegSet::of(d, count), sp); ++ } + -+ const Register tmp1 = x16; -+ const Register tmp2 = x17; -+ const Register tmp3 = x7; -+ const Register tmp4 = x28; -+ const Register tmp5 = x29; -+ const Register tmp6 = x30; -+ const Register tmp7 = x31; ++ { ++ // UnsafeCopyMemory page error: continue after ucm ++ bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); ++ UnsafeCopyMemoryMark ucmm(this, add_entry, true); ++ copy_memory(aligned, s, d, count, t0, -size); ++ } + -+ RegSet spilled_regs = RegSet::of(y, tmp2); -+ BLOCK_COMMENT("Entry:"); -+ __ enter(); -+ __ push_reg(spilled_regs, sp); -+ __ mv(y, x); -+ __ mv(ylen, xlen); -+ __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); -+ __ pop_reg(spilled_regs, sp); ++ if (is_oop) { ++ __ pop_reg(RegSet::of(d, count), sp); ++ if (VerifyOops) { ++ verify_oop_array(size, d, count, t2); ++ } ++ } ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet()); + __ leave(); ++ __ mv(x10, zr); // return 0 + __ ret(); -+ + return start; + } -+#endif // COMPILER2 + -+ // Continuation point for throwing of implicit exceptions that are -+ // not handled in the current activation. Fabricates an exception -+ // oop and initiates normal exception dispatching in this -+ // frame. Since we need to preserve callee-saved values (currently -+ // only for C2, but done for C1 as well) we need a callee-saved oop -+ // map and therefore have to make these stubs into RuntimeStubs -+ // rather than BufferBlobs. If the compiler needs all registers to -+ // be preserved between the fault point and the exception handler -+ // then it must assume responsibility for that in -+ // AbstractCompiler::continuation_for_implicit_null_exception or -+ // continuation_for_implicit_division_by_zero_exception. All other -+ // implicit exceptions (e.g., NullPointerException or -+ // AbstractMethodError on entry) are either at call sites or -+ // otherwise assume that stack unwinding will be initiated, so -+ // caller saved registers were assumed volatile in the compiler. ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_byte_copy_entry is set to the no-overlap entry point // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_byte_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_byte_copy(). ++ // ++ address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) { ++ const bool not_oop = false; ++ return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name); ++ } + -+#undef __ -+#define __ masm-> ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, ++ address* entry, const char* name) { ++ const bool not_oop = false; ++ return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name); ++ } + -+ address generate_throw_exception(const char* name, -+ address runtime_entry, -+ Register arg1 = noreg, -+ Register arg2 = noreg) { -+ // Information about frame layout at time of blocking runtime call. -+ // Note that we only have to preserve callee-saved registers since -+ // the compilers are responsible for supplying a continuation point -+ // if they expect all registers to be preserved. -+ // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0 -+ assert_cond(runtime_entry != NULL); -+ enum layout { -+ fp_off = 0, -+ fp_off2, -+ return_off, -+ return_off2, -+ framesize // inclusive of return address -+ }; ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we ++ // let the hardware handle it. The two or four words within dwords ++ // or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_short_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_short_copy(). ++ // ++ address generate_disjoint_short_copy(bool aligned, ++ address* entry, const char* name) { ++ const bool not_oop = false; ++ return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name); ++ } + -+ const int insts_size = 512; -+ const int locs_size = 64; ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we ++ // let the hardware handle it. The two or four words within dwords ++ // or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_short_copy(bool aligned, address nooverlap_target, ++ address* entry, const char* name) { ++ const bool not_oop = false; ++ return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name); ++ } + -+ CodeBuffer code(name, insts_size, locs_size); -+ OopMapSet* oop_maps = new OopMapSet(); -+ MacroAssembler* masm = new MacroAssembler(&code); -+ assert_cond(oop_maps != NULL && masm != NULL); ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_int_copy(bool aligned, address* entry, ++ const char* name, bool dest_uninitialized = false) { ++ const bool not_oop = false; ++ return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name); ++ } + -+ address start = __ pc(); ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_int_copy(bool aligned, address nooverlap_target, ++ address* entry, const char* name, ++ bool dest_uninitialized = false) { ++ const bool not_oop = false; ++ return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name); ++ } + -+ // This is an inlined and slightly modified version of call_VM -+ // which has the ability to fetch the return PC out of -+ // thread-local storage and also sets up last_Java_sp slightly -+ // differently than the real call_VM + -+ __ enter(); // Save FP and RA before call ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as size_t, can be zero ++ // ++ // Side Effects: ++ // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the ++ // no-overlap entry point used by generate_conjoint_long_oop_copy(). ++ // ++ address generate_disjoint_long_copy(bool aligned, address* entry, ++ const char* name, bool dest_uninitialized = false) { ++ const bool not_oop = false; ++ return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name); ++ } + -+ assert(is_even(framesize / 2), "sp not 16-byte aligned"); ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as size_t, can be zero ++ // ++ address generate_conjoint_long_copy(bool aligned, ++ address nooverlap_target, address* entry, ++ const char* name, bool dest_uninitialized = false) { ++ const bool not_oop = false; ++ return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name); ++ } + -+ // ra and fp are already in place -+ __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as size_t, can be zero ++ // ++ // Side Effects: ++ // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the ++ // no-overlap entry point used by generate_conjoint_long_oop_copy(). ++ // ++ address generate_disjoint_oop_copy(bool aligned, address* entry, ++ const char* name, bool dest_uninitialized) { ++ const bool is_oop = true; ++ const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); ++ return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized); ++ } + -+ int frame_complete = __ pc() - start; ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as size_t, can be zero ++ // ++ address generate_conjoint_oop_copy(bool aligned, ++ address nooverlap_target, address* entry, ++ const char* name, bool dest_uninitialized) { ++ const bool is_oop = true; ++ const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); ++ return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry, ++ name, dest_uninitialized); ++ } + -+ // Set up last_Java_sp and last_Java_fp -+ address the_pc = __ pc(); -+ __ set_last_Java_frame(sp, fp, the_pc, t0); ++ // Helper for generating a dynamic type check. ++ // Smashes t0, t1. ++ void generate_type_check(Register sub_klass, ++ Register super_check_offset, ++ Register super_klass, ++ Label& L_success) { ++ assert_different_registers(sub_klass, super_check_offset, super_klass); + -+ // Call runtime -+ if (arg1 != noreg) { -+ assert(arg2 != c_rarg1, "clobbered"); -+ __ mv(c_rarg1, arg1); -+ } -+ if (arg2 != noreg) { -+ __ mv(c_rarg2, arg2); -+ } -+ __ mv(c_rarg0, xthread); -+ BLOCK_COMMENT("call runtime_entry"); -+ int32_t offset = 0; -+ __ movptr_with_offset(t0, runtime_entry, offset); -+ __ jalr(x1, t0, offset); ++ BLOCK_COMMENT("type_check:"); + -+ // Generate oop map -+ OopMap* map = new OopMap(framesize, 0); -+ assert_cond(map != NULL); ++ Label L_miss; + -+ oop_maps->add_gc_map(the_pc - start, map); ++ __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset); ++ __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL); + -+ __ reset_last_Java_frame(true); ++ // Fall through on failure! ++ __ BIND(L_miss); ++ } + -+ __ leave(); ++ // ++ // Generate checkcasting array copy stub ++ // ++ // Input: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // c_rarg3 - size_t ckoff (super_check_offset) ++ // c_rarg4 - oop ckval (super_klass) ++ // ++ // Output: ++ // x10 == 0 - success ++ // x10 == -1^K - failure, where K is partial transfer count ++ // ++ address generate_checkcast_copy(const char* name, address* entry, ++ bool dest_uninitialized = false) { ++ Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop; + -+ // check for pending exceptions -+#ifdef ASSERT -+ Label L; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ bnez(t0, L); -+ __ should_not_reach_here(); -+ __ bind(L); -+#endif // ASSERT -+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); ++ // Input registers (after setup_arg_regs) ++ const Register from = c_rarg0; // source array address ++ const Register to = c_rarg1; // destination array address ++ const Register count = c_rarg2; // elementscount ++ const Register ckoff = c_rarg3; // super_check_offset ++ const Register ckval = c_rarg4; // super_klass + ++ RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4); ++ RegSet wb_post_saved_regs = RegSet::of(count); + -+ // codeBlob framesize is in words (not VMRegImpl::slot_size) -+ RuntimeStub* stub = -+ RuntimeStub::new_runtime_stub(name, -+ &code, -+ frame_complete, -+ (framesize >> (LogBytesPerWord - LogBytesPerInt)), -+ oop_maps, false); -+ assert(stub != NULL, "create runtime stub fail!"); -+ return stub->entry_point(); -+ } ++ // Registers used as temps (x7, x9, x18 are save-on-entry) ++ const Register count_save = x19; // orig elementscount ++ const Register start_to = x18; // destination array start address ++ const Register copied_oop = x7; // actual oop copied ++ const Register r9_klass = x9; // oop._klass + -+#ifdef COMPILER2 -+ class MontgomeryMultiplyGenerator : public MacroAssembler { ++ //--------------------------------------------------------------- ++ // Assembler stub will be used for this call to arraycopy ++ // if the two arrays are subtypes of Object[] but the ++ // destination array type is not equal to or a supertype ++ // of the source type. Each element must be separately ++ // checked. + -+ Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn, -+ Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2, Ri, Rj; ++ assert_different_registers(from, to, count, ckoff, ckval, start_to, ++ copied_oop, r9_klass, count_save); + -+ RegSet _toSave; -+ bool _squaring; ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); + -+ public: -+ MontgomeryMultiplyGenerator (Assembler *as, bool squaring) -+ : MacroAssembler(as->code()), _squaring(squaring) { ++ __ enter(); // required for proper stackwalking of RuntimeStub frame + -+ // Register allocation ++ // Caller of this entry point must set up the argument registers. ++ if (entry != NULL) { ++ *entry = __ pc(); ++ BLOCK_COMMENT("Entry:"); ++ } + -+ Register reg = c_rarg0; -+ Pa_base = reg; // Argument registers -+ if (squaring) { -+ Pb_base = Pa_base; -+ } else { -+ Pb_base = ++reg; -+ } -+ Pn_base = ++reg; -+ Rlen= ++reg; -+ inv = ++reg; -+ Pm_base = ++reg; ++ // Empty array: Nothing to do ++ __ beqz(count, L_done); + -+ // Working registers: -+ Ra = ++reg; // The current digit of a, b, n, and m. -+ Rb = ++reg; -+ Rm = ++reg; -+ Rn = ++reg; ++ __ push_reg(RegSet::of(x7, x9, x18, x19), sp); + -+ Pa = ++reg; // Pointers to the current/next digit of a, b, n, and m. -+ Pb = ++reg; -+ Pm = ++reg; -+ Pn = ++reg; ++#ifdef ASSERT ++ BLOCK_COMMENT("assert consistent ckoff/ckval"); ++ // The ckoff and ckval must be mutually consistent, ++ // even though caller generates both. ++ { Label L; ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ __ lwu(start_to, Address(ckval, sco_offset)); ++ __ beq(ckoff, start_to, L); ++ __ stop("super_check_offset inconsistent"); ++ __ bind(L); ++ } ++#endif //ASSERT + -+ tmp0 = ++reg; // Three registers which form a -+ tmp1 = ++reg; // triple-precision accumuator. -+ tmp2 = ++reg; ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT; ++ bool is_oop = true; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } + -+ Ri = x6; // Inner and outer loop indexes. -+ Rj = x7; ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs); + -+ Rhi_ab = x28; // Product registers: low and high parts -+ Rlo_ab = x29; // of a*b and m*n. -+ Rhi_mn = x30; -+ Rlo_mn = x31; ++ // save the original count ++ __ mv(count_save, count); + -+ // x18 and up are callee-saved. -+ _toSave = RegSet::range(x18, reg) + Pm_base; -+ } ++ // Copy from low to high addresses ++ __ mv(start_to, to); // Save destination array start address ++ __ j(L_load_element); + -+ private: -+ void save_regs() { -+ push_reg(_toSave, sp); -+ } ++ // ======== begin loop ======== ++ // (Loop is rotated; its entry is L_load_element.) ++ // Loop control: ++ // for count to 0 do ++ // copied_oop = load_heap_oop(from++) ++ // ... generate_type_check ... ++ // store_heap_oop(to++, copied_oop) ++ // end + -+ void restore_regs() { -+ pop_reg(_toSave, sp); -+ } ++ __ align(OptoLoopAlignment); + -+ template -+ void unroll_2(Register count, T block) { -+ Label loop, end, odd; -+ beqz(count, end); -+ andi(t0, count, 0x1); -+ bnez(t0, odd); -+ align(16); -+ bind(loop); -+ (this->*block)(); -+ bind(odd); -+ (this->*block)(); -+ addi(count, count, -2); -+ bgtz(count, loop); -+ bind(end); -+ } ++ __ BIND(L_store_element); ++ __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, AS_RAW); // store the oop ++ __ add(to, to, UseCompressedOops ? 4 : 8); ++ __ sub(count, count, 1); ++ __ beqz(count, L_do_card_marks); + -+ template -+ void unroll_2(Register count, T block, Register d, Register s, Register tmp) { -+ Label loop, end, odd; -+ beqz(count, end); -+ andi(tmp, count, 0x1); -+ bnez(tmp, odd); -+ align(16); -+ bind(loop); -+ (this->*block)(d, s, tmp); -+ bind(odd); -+ (this->*block)(d, s, tmp); -+ addi(count, count, -2); -+ bgtz(count, loop); -+ bind(end); -+ } ++ // ======== loop entry is here ======== ++ __ BIND(L_load_element); ++ __ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop ++ __ add(from, from, UseCompressedOops ? 4 : 8); ++ __ beqz(copied_oop, L_store_element); + -+ void pre1(RegisterOrConstant i) { -+ block_comment("pre1"); -+ // Pa = Pa_base; -+ // Pb = Pb_base + i; -+ // Pm = Pm_base; -+ // Pn = Pn_base + i; -+ // Ra = *Pa; -+ // Rb = *Pb; -+ // Rm = *Pm; -+ // Rn = *Pn; -+ if (i.is_register()) { -+ slli(t0, i.as_register(), LogBytesPerWord); -+ } else { -+ mv(t0, i.as_constant()); -+ slli(t0, t0, LogBytesPerWord); -+ } ++ __ load_klass(r9_klass, copied_oop);// query the object klass ++ generate_type_check(r9_klass, ckoff, ckval, L_store_element); ++ // ======== end loop ======== + -+ mv(Pa, Pa_base); -+ add(Pb, Pb_base, t0); -+ mv(Pm, Pm_base); -+ add(Pn, Pn_base, t0); ++ // It was a real error; we must depend on the caller to finish the job. ++ // Register count = remaining oops, count_orig = total oops. ++ // Emit GC store barriers for the oops we have copied and report ++ // their number to the caller. + -+ ld(Ra, Address(Pa)); -+ ld(Rb, Address(Pb)); -+ ld(Rm, Address(Pm)); -+ ld(Rn, Address(Pn)); ++ __ sub(count, count_save, count); // K = partially copied oop count ++ __ xori(count, count, -1); // report (-1^K) to caller ++ __ beqz(count, L_done_pop); + -+ // Zero the m*n result. -+ mv(Rhi_mn, zr); -+ mv(Rlo_mn, zr); -+ } ++ __ BIND(L_do_card_marks); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs); + -+ // The core multiply-accumulate step of a Montgomery -+ // multiplication. The idea is to schedule operations as a -+ // pipeline so that instructions with long latencies (loads and -+ // multiplies) have time to complete before their results are -+ // used. This most benefits in-order implementations of the -+ // architecture but out-of-order ones also benefit. -+ void step() { -+ block_comment("step"); -+ // MACC(Ra, Rb, tmp0, tmp1, tmp2); -+ // Ra = *++Pa; -+ // Rb = *--Pb; -+ mulhu(Rhi_ab, Ra, Rb); -+ mul(Rlo_ab, Ra, Rb); -+ addi(Pa, Pa, wordSize); -+ ld(Ra, Address(Pa)); -+ addi(Pb, Pb, -wordSize); -+ ld(Rb, Address(Pb)); -+ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the -+ // previous iteration. -+ // MACC(Rm, Rn, tmp0, tmp1, tmp2); -+ // Rm = *++Pm; -+ // Rn = *--Pn; -+ mulhu(Rhi_mn, Rm, Rn); -+ mul(Rlo_mn, Rm, Rn); -+ addi(Pm, Pm, wordSize); -+ ld(Rm, Address(Pm)); -+ addi(Pn, Pn, -wordSize); -+ ld(Rn, Address(Pn)); -+ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); -+ } ++ __ bind(L_done_pop); ++ __ pop_reg(RegSet::of(x7, x9, x18, x19), sp); ++ inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); + -+ void post1() { -+ block_comment("post1"); ++ __ bind(L_done); ++ __ mv(x10, count); ++ __ leave(); ++ __ ret(); + -+ // MACC(Ra, Rb, tmp0, tmp1, tmp2); -+ // Ra = *++Pa; -+ // Rb = *--Pb; -+ mulhu(Rhi_ab, Ra, Rb); -+ mul(Rlo_ab, Ra, Rb); -+ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n -+ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); ++ return start; ++ } + -+ // *Pm = Rm = tmp0 * inv; -+ mul(Rm, tmp0, inv); -+ sd(Rm, Address(Pm)); ++ // Perform range checks on the proposed arraycopy. ++ // Kills temp, but nothing else. ++ // Also, clean the sign bits of src_pos and dst_pos. ++ void arraycopy_range_checks(Register src, // source array oop (c_rarg0) ++ Register src_pos, // source position (c_rarg1) ++ Register dst, // destination array oo (c_rarg2) ++ Register dst_pos, // destination position (c_rarg3) ++ Register length, ++ Register temp, ++ Label& L_failed) { ++ BLOCK_COMMENT("arraycopy_range_checks:"); + -+ // MACC(Rm, Rn, tmp0, tmp1, tmp2); -+ // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; -+ mulhu(Rhi_mn, Rm, Rn); ++ assert_different_registers(t0, temp); + -+#ifndef PRODUCT -+ // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply"); -+ { -+ mul(Rlo_mn, Rm, Rn); -+ add(Rlo_mn, tmp0, Rlo_mn); -+ Label ok; -+ beqz(Rlo_mn, ok); -+ stop("broken Montgomery multiply"); -+ bind(ok); -+ } -+#endif -+ // We have very carefully set things up so that -+ // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate -+ // the lower half of Rm * Rn because we know the result already: -+ // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff -+ // tmp0 != 0. So, rather than do a mul and an cad we just set -+ // the carry flag iff tmp0 is nonzero. -+ // -+ // mul(Rlo_mn, Rm, Rn); -+ // cad(zr, tmp0, Rlo_mn); -+ addi(t0, tmp0, -1); -+ sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero -+ cadc(tmp0, tmp1, Rhi_mn, t0); -+ adc(tmp1, tmp2, zr, t0); -+ mv(tmp2, zr); -+ } ++ // if [src_pos + length > arrayOop(src)->length()] then FAIL ++ __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes())); ++ __ addw(temp, length, src_pos); ++ __ bgtu(temp, t0, L_failed); + -+ void pre2(Register i, Register len) { -+ block_comment("pre2"); -+ // Pa = Pa_base + i-len; -+ // Pb = Pb_base + len; -+ // Pm = Pm_base + i-len; -+ // Pn = Pn_base + len; ++ // if [dst_pos + length > arrayOop(dst)->length()] then FAIL ++ __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes())); ++ __ addw(temp, length, dst_pos); ++ __ bgtu(temp, t0, L_failed); + -+ sub(Rj, i, len); -+ // Rj == i-len ++ // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'. ++ __ zero_extend(src_pos, src_pos, 32); ++ __ zero_extend(dst_pos, dst_pos, 32); + -+ // Ra as temp register -+ shadd(Pa, Rj, Pa_base, Ra, LogBytesPerWord); -+ shadd(Pm, Rj, Pm_base, Ra, LogBytesPerWord); -+ shadd(Pb, len, Pb_base, Ra, LogBytesPerWord); -+ shadd(Pn, len, Pn_base, Ra, LogBytesPerWord); ++ BLOCK_COMMENT("arraycopy_range_checks done"); ++ } + -+ // Ra = *++Pa; -+ // Rb = *--Pb; -+ // Rm = *++Pm; -+ // Rn = *--Pn; -+ add(Pa, Pa, wordSize); -+ ld(Ra, Address(Pa)); -+ add(Pb, Pb, -wordSize); -+ ld(Rb, Address(Pb)); -+ add(Pm, Pm, wordSize); -+ ld(Rm, Address(Pm)); -+ add(Pn, Pn, -wordSize); -+ ld(Rn, Address(Pn)); ++ // ++ // Generate 'unsafe' array copy stub ++ // Though just as safe as the other stubs, it takes an unscaled ++ // size_t argument instead of an element count. ++ // ++ // Input: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - byte count, treated as ssize_t, can be zero ++ // ++ // Examines the alignment of the operands and dispatches ++ // to a long, int, short, or byte copy loop. ++ // ++ address generate_unsafe_copy(const char* name, ++ address byte_copy_entry, ++ address short_copy_entry, ++ address int_copy_entry, ++ address long_copy_entry) { ++ assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL && ++ int_copy_entry != NULL && long_copy_entry != NULL); ++ Label L_long_aligned, L_int_aligned, L_short_aligned; ++ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; + -+ mv(Rhi_mn, zr); -+ mv(Rlo_mn, zr); -+ } ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ __ enter(); // required for proper stackwalking of RuntimeStub frame + -+ void post2(Register i, Register len) { -+ block_comment("post2"); -+ sub(Rj, i, len); ++ // bump this on entry, not on exit: ++ inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); + -+ cad(tmp0, tmp0, Rlo_mn, t0); // The pending m*n, low part ++ __ orr(t0, s, d); ++ __ orr(t0, t0, count); + -+ // As soon as we know the least significant digit of our result, -+ // store it. -+ // Pm_base[i-len] = tmp0; -+ // Rj as temp register -+ shadd(Rj, Rj, Pm_base, Rj, LogBytesPerWord); -+ sd(tmp0, Address(Rj)); ++ __ andi(t0, t0, BytesPerLong - 1); ++ __ beqz(t0, L_long_aligned); ++ __ andi(t0, t0, BytesPerInt - 1); ++ __ beqz(t0, L_int_aligned); ++ __ andi(t0, t0, 1); ++ __ beqz(t0, L_short_aligned); ++ __ j(RuntimeAddress(byte_copy_entry)); + -+ // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; -+ cadc(tmp0, tmp1, Rhi_mn, t0); // The pending m*n, high part -+ adc(tmp1, tmp2, zr, t0); -+ mv(tmp2, zr); -+ } ++ __ BIND(L_short_aligned); ++ __ srli(count, count, LogBytesPerShort); // size => short_count ++ __ j(RuntimeAddress(short_copy_entry)); ++ __ BIND(L_int_aligned); ++ __ srli(count, count, LogBytesPerInt); // size => int_count ++ __ j(RuntimeAddress(int_copy_entry)); ++ __ BIND(L_long_aligned); ++ __ srli(count, count, LogBytesPerLong); // size => long_count ++ __ j(RuntimeAddress(long_copy_entry)); + -+ // A carry in tmp0 after Montgomery multiplication means that we -+ // should subtract multiples of n from our result in m. We'll -+ // keep doing that until there is no carry. -+ void normalize(Register len) { -+ block_comment("normalize"); -+ // while (tmp0) -+ // tmp0 = sub(Pm_base, Pn_base, tmp0, len); -+ Label loop, post, again; -+ Register cnt = tmp1, i = tmp2; // Re-use registers; we're done with them now -+ beqz(tmp0, post); { -+ bind(again); { -+ mv(i, zr); -+ mv(cnt, len); -+ slli(Rn, i, LogBytesPerWord); -+ add(Rm, Pm_base, Rn); -+ ld(Rm, Address(Rm)); -+ add(Rn, Pn_base, Rn); -+ ld(Rn, Address(Rn)); -+ mv(t0, 1); // set carry flag, i.e. no borrow -+ align(16); -+ bind(loop); { -+ notr(Rn, Rn); -+ add(Rm, Rm, t0); -+ add(Rm, Rm, Rn); -+ sltu(t0, Rm, Rn); -+ shadd(Rn, i, Pm_base, Rn, LogBytesPerWord); // Rn as temp register -+ sd(Rm, Address(Rn)); -+ add(i, i, 1); -+ slli(Rn, i, LogBytesPerWord); -+ add(Rm, Pm_base, Rn); -+ ld(Rm, Address(Rm)); -+ add(Rn, Pn_base, Rn); -+ ld(Rn, Address(Rn)); -+ sub(cnt, cnt, 1); -+ } bnez(cnt, loop); -+ addi(tmp0, tmp0, -1); -+ add(tmp0, tmp0, t0); -+ } bnez(tmp0, again); -+ } bind(post); -+ } ++ return start; ++ } + -+ // Move memory at s to d, reversing words. -+ // Increments d to end of copied memory -+ // Destroys tmp1, tmp2 -+ // Preserves len -+ // Leaves s pointing to the address which was in d at start -+ void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { -+ assert(tmp1 < x28 && tmp2 < x28, "register corruption"); ++ // ++ // Generate generic array copy stubs ++ // ++ // Input: ++ // c_rarg0 - src oop ++ // c_rarg1 - src_pos (32-bits) ++ // c_rarg2 - dst oop ++ // c_rarg3 - dst_pos (32-bits) ++ // c_rarg4 - element count (32-bits) ++ // ++ // Output: ++ // x10 == 0 - success ++ // x10 == -1^K - failure, where K is partial transfer count ++ // ++ address generate_generic_copy(const char* name, ++ address byte_copy_entry, address short_copy_entry, ++ address int_copy_entry, address oop_copy_entry, ++ address long_copy_entry, address checkcast_copy_entry) { ++ assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL && ++ int_copy_entry != NULL && oop_copy_entry != NULL && ++ long_copy_entry != NULL && checkcast_copy_entry != NULL); ++ Label L_failed, L_failed_0, L_objArray; ++ Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; + -+ shadd(s, len, s, tmp1, LogBytesPerWord); -+ mv(tmp1, len); -+ unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); -+ slli(tmp1, len, LogBytesPerWord); -+ sub(s, d, tmp1); -+ } -+ // [63...0] -> [31...0][63...32] -+ void reverse1(Register d, Register s, Register tmp) { -+ addi(s, s, -wordSize); -+ ld(tmp, Address(s)); -+ ror_imm(tmp, tmp, 32, t0); -+ sd(tmp, Address(d)); -+ addi(d, d, wordSize); -+ } ++ // Input registers ++ const Register src = c_rarg0; // source array oop ++ const Register src_pos = c_rarg1; // source position ++ const Register dst = c_rarg2; // destination array oop ++ const Register dst_pos = c_rarg3; // destination position ++ const Register length = c_rarg4; + -+ void step_squaring() { -+ // An extra ACC -+ step(); -+ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); -+ } ++ // Registers used as temps ++ const Register dst_klass = c_rarg5; + -+ void last_squaring(Register i) { -+ Label dont; -+ // if ((i & 1) == 0) { -+ andi(t0, i, 0x1); -+ bnez(t0, dont); { -+ // MACC(Ra, Rb, tmp0, tmp1, tmp2); -+ // Ra = *++Pa; -+ // Rb = *--Pb; -+ mulhu(Rhi_ab, Ra, Rb); -+ mul(Rlo_ab, Ra, Rb); -+ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); -+ } bind(dont); -+ } ++ __ align(CodeEntryAlignment); + -+ void extra_step_squaring() { -+ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n ++ StubCodeMark mark(this, "StubRoutines", name); + -+ // MACC(Rm, Rn, tmp0, tmp1, tmp2); -+ // Rm = *++Pm; -+ // Rn = *--Pn; -+ mulhu(Rhi_mn, Rm, Rn); -+ mul(Rlo_mn, Rm, Rn); -+ addi(Pm, Pm, wordSize); -+ ld(Rm, Address(Pm)); -+ addi(Pn, Pn, -wordSize); -+ ld(Rn, Address(Pn)); -+ } ++ address start = __ pc(); + ++ __ enter(); // required for proper stackwalking of RuntimeStub frame + -+ void post1_squaring() { -+ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n ++ // bump this on entry, not on exit: ++ inc_counter_np(SharedRuntime::_generic_array_copy_ctr); + -+ // *Pm = Rm = tmp0 * inv; -+ mul(Rm, tmp0, inv); -+ sd(Rm, Address(Pm)); ++ //----------------------------------------------------------------------- ++ // Assembler stub will be used for this call to arraycopy ++ // if the following conditions are met: ++ // ++ // (1) src and dst must not be null. ++ // (2) src_pos must not be negative. ++ // (3) dst_pos must not be negative. ++ // (4) length must not be negative. ++ // (5) src klass and dst klass should be the same and not NULL. ++ // (6) src and dst should be arrays. ++ // (7) src_pos + length must not exceed length of src. ++ // (8) dst_pos + length must not exceed length of dst. ++ // + -+ // MACC(Rm, Rn, tmp0, tmp1, tmp2); -+ // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; -+ mulhu(Rhi_mn, Rm, Rn); ++ // if [src == NULL] then return -1 ++ __ beqz(src, L_failed); + -+#ifndef PRODUCT -+ // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply"); -+ { -+ mul(Rlo_mn, Rm, Rn); -+ add(Rlo_mn, tmp0, Rlo_mn); -+ Label ok; -+ beqz(Rlo_mn, ok); { -+ stop("broken Montgomery multiply"); -+ } bind(ok); -+ } -+#endif -+ // We have very carefully set things up so that -+ // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate -+ // the lower half of Rm * Rn because we know the result already: -+ // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff -+ // tmp0 != 0. So, rather than do a mul and a cad we just set -+ // the carry flag iff tmp0 is nonzero. -+ // -+ // mul(Rlo_mn, Rm, Rn); -+ // cad(zr, tmp0, Rlo_mn); -+ addi(t0, tmp0, -1); -+ sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero -+ cadc(tmp0, tmp1, Rhi_mn, t0); -+ adc(tmp1, tmp2, zr, t0); -+ mv(tmp2, zr); -+ } ++ // if [src_pos < 0] then return -1 ++ // i.e. sign bit set ++ __ andi(t0, src_pos, 1UL << 31); ++ __ bnez(t0, L_failed); + -+ // use t0 as carry -+ void acc(Register Rhi, Register Rlo, -+ Register tmp0, Register tmp1, Register tmp2) { -+ cad(tmp0, tmp0, Rlo, t0); -+ cadc(tmp1, tmp1, Rhi, t0); -+ adc(tmp2, tmp2, zr, t0); -+ } ++ // if [dst == NULL] then return -1 ++ __ beqz(dst, L_failed); + -+ public: -+ /** -+ * Fast Montgomery multiplication. The derivation of the -+ * algorithm is in A Cryptographic Library for the Motorola -+ * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. -+ * -+ * Arguments: -+ * -+ * Inputs for multiplication: -+ * c_rarg0 - int array elements a -+ * c_rarg1 - int array elements b -+ * c_rarg2 - int array elements n (the modulus) -+ * c_rarg3 - int length -+ * c_rarg4 - int inv -+ * c_rarg5 - int array elements m (the result) -+ * -+ * Inputs for squaring: -+ * c_rarg0 - int array elements a -+ * c_rarg1 - int array elements n (the modulus) -+ * c_rarg2 - int length -+ * c_rarg3 - int inv -+ * c_rarg4 - int array elements m (the result) -+ * -+ */ -+ address generate_multiply() { -+ Label argh, nothing; -+ bind(argh); -+ stop("MontgomeryMultiply total_allocation must be <= 8192"); ++ // if [dst_pos < 0] then return -1 ++ // i.e. sign bit set ++ __ andi(t0, dst_pos, 1UL << 31); ++ __ bnez(t0, L_failed); + -+ align(CodeEntryAlignment); -+ address entry = pc(); ++ // registers used as temp ++ const Register scratch_length = x28; // elements count to copy ++ const Register scratch_src_klass = x29; // array klass ++ const Register lh = x30; // layout helper + -+ beqz(Rlen, nothing); ++ // if [length < 0] then return -1 ++ __ addw(scratch_length, length, zr); // length (elements count, 32-bits value) ++ // i.e. sign bit set ++ __ andi(t0, scratch_length, 1UL << 31); ++ __ bnez(t0, L_failed); + -+ enter(); ++ __ load_klass(scratch_src_klass, src); ++#ifdef ASSERT ++ { ++ BLOCK_COMMENT("assert klasses not null {"); ++ Label L1, L2; ++ __ bnez(scratch_src_klass, L2); // it is broken if klass is NULL ++ __ bind(L1); ++ __ stop("broken null klass"); ++ __ bind(L2); ++ __ load_klass(t0, dst); ++ __ beqz(t0, L1); // this would be broken also ++ BLOCK_COMMENT("} assert klasses not null done"); ++ } ++#endif + -+ // Make room. -+ mv(Ra, 512); -+ bgt(Rlen, Ra, argh); -+ slli(Ra, Rlen, exact_log2(4 * sizeof(jint))); -+ sub(Ra, sp, Ra); -+ andi(sp, Ra, -2 * wordSize); ++ // Load layout helper (32-bits) ++ // ++ // |array_tag| | header_size | element_type | |log2_element_size| ++ // 32 30 24 16 8 2 0 ++ // ++ // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 ++ // + -+ srliw(Rlen, Rlen, 1); // length in longwords = len/2 ++ const int lh_offset = in_bytes(Klass::layout_helper_offset()); + -+ { -+ // Copy input args, reversing as we go. We use Ra as a -+ // temporary variable. -+ reverse(Ra, Pa_base, Rlen, Ri, Rj); -+ if (!_squaring) -+ reverse(Ra, Pb_base, Rlen, Ri, Rj); -+ reverse(Ra, Pn_base, Rlen, Ri, Rj); -+ } ++ // Handle objArrays completely differently... ++ const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); ++ __ lw(lh, Address(scratch_src_klass, lh_offset)); ++ __ mvw(t0, objArray_lh); ++ __ beq(lh, t0, L_objArray); + -+ // Push all call-saved registers and also Pm_base which we'll need -+ // at the end. -+ save_regs(); ++ // if [src->klass() != dst->klass()] then return -1 ++ __ load_klass(t1, dst); ++ __ bne(t1, scratch_src_klass, L_failed); + -+#ifndef PRODUCT -+ // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); -+ { -+ ld(Rn, Address(Pn_base)); -+ mul(Rlo_mn, Rn, inv); -+ mv(t0, -1); -+ Label ok; -+ beq(Rlo_mn, t0, ok); -+ stop("broken inverse in Montgomery multiply"); -+ bind(ok); -+ } -+#endif ++ // if [src->is_Array() != NULL] then return -1 ++ // i.e. (lh >= 0) ++ __ andi(t0, lh, 1UL << 31); ++ __ beqz(t0, L_failed); + -+ mv(Pm_base, Ra); ++ // At this point, it is known to be a typeArray (array_tag 0x3). ++#ifdef ASSERT ++ { ++ BLOCK_COMMENT("assert primitive array {"); ++ Label L; ++ __ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift); ++ __ bge(lh, t1, L); ++ __ stop("must be a primitive array"); ++ __ bind(L); ++ BLOCK_COMMENT("} assert primitive array done"); ++ } ++#endif + -+ mv(tmp0, zr); -+ mv(tmp1, zr); -+ mv(tmp2, zr); ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, ++ t1, L_failed); + -+ block_comment("for (int i = 0; i < len; i++) {"); -+ mv(Ri, zr); { -+ Label loop, end; -+ bge(Ri, Rlen, end); ++ // TypeArrayKlass ++ // ++ // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize) ++ // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize) ++ // + -+ bind(loop); -+ pre1(Ri); ++ const Register t0_offset = t0; // array offset ++ const Register x22_elsize = lh; // element size + -+ block_comment(" for (j = i; j; j--) {"); { -+ mv(Rj, Ri); -+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step); -+ } block_comment(" } // j"); -+ -+ post1(); -+ addw(Ri, Ri, 1); -+ blt(Ri, Rlen, loop); -+ bind(end); -+ block_comment("} // i"); -+ } -+ -+ block_comment("for (int i = len; i < 2*len; i++) {"); -+ mv(Ri, Rlen); { -+ Label loop, end; -+ slli(Rj, Rlen, 1); // Rj as temp register -+ bge(Ri, Rj, end); -+ -+ bind(loop); -+ pre2(Ri, Rlen); ++ // Get array_header_in_bytes() ++ int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1); ++ int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width; ++ __ slli(t0_offset, lh, XLEN - lh_header_size_msb); // left shift to remove 24 ~ 32; ++ __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset + -+ block_comment(" for (j = len*2-i-1; j; j--) {"); { -+ slliw(Rj, Rlen, 1); -+ subw(Rj, Rj, Ri); -+ subw(Rj, Rj, 1); -+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step); -+ } block_comment(" } // j"); ++ __ add(src, src, t0_offset); // src array offset ++ __ add(dst, dst, t0_offset); // dst array offset ++ BLOCK_COMMENT("choose copy loop based on element size"); + -+ post2(Ri, Rlen); -+ addw(Ri, Ri, 1); -+ slli(Rj, Rlen, 1); -+ blt(Ri, Rj, loop); -+ bind(end); -+ } -+ block_comment("} // i"); ++ // next registers should be set before the jump to corresponding stub ++ const Register from = c_rarg0; // source array address ++ const Register to = c_rarg1; // destination array address ++ const Register count = c_rarg2; // elements count + ++ // 'from', 'to', 'count' registers should be set in such order ++ // since they are the same as 'src', 'src_pos', 'dst'. + -+ normalize(Rlen); ++ assert(Klass::_lh_log2_element_size_shift == 0, "fix this code"); + -+ mv(Ra, Pm_base); // Save Pm_base in Ra -+ restore_regs(); // Restore caller's Pm_base ++ // The possible values of elsize are 0-3, i.e. exact_log2(element ++ // size in bytes). We do a simple bitwise binary search. ++ __ BIND(L_copy_bytes); ++ __ andi(t0, x22_elsize, 2); ++ __ bnez(t0, L_copy_ints); ++ __ andi(t0, x22_elsize, 1); ++ __ bnez(t0, L_copy_shorts); ++ __ add(from, src, src_pos); // src_addr ++ __ add(to, dst, dst_pos); // dst_addr ++ __ addw(count, scratch_length, zr); // length ++ __ j(RuntimeAddress(byte_copy_entry)); + -+ // Copy our result into caller's Pm_base -+ reverse(Pm_base, Ra, Rlen, Ri, Rj); ++ __ BIND(L_copy_shorts); ++ __ shadd(from, src_pos, src, t0, 1); // src_addr ++ __ shadd(to, dst_pos, dst, t0, 1); // dst_addr ++ __ addw(count, scratch_length, zr); // length ++ __ j(RuntimeAddress(short_copy_entry)); + -+ leave(); -+ bind(nothing); -+ ret(); ++ __ BIND(L_copy_ints); ++ __ andi(t0, x22_elsize, 1); ++ __ bnez(t0, L_copy_longs); ++ __ shadd(from, src_pos, src, t0, 2); // src_addr ++ __ shadd(to, dst_pos, dst, t0, 2); // dst_addr ++ __ addw(count, scratch_length, zr); // length ++ __ j(RuntimeAddress(int_copy_entry)); + -+ return entry; ++ __ BIND(L_copy_longs); ++#ifdef ASSERT ++ { ++ BLOCK_COMMENT("assert long copy {"); ++ Label L; ++ __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize ++ __ addw(lh, lh, zr); ++ __ mvw(t0, LogBytesPerLong); ++ __ beq(x22_elsize, t0, L); ++ __ stop("must be long copy, but elsize is wrong"); ++ __ bind(L); ++ BLOCK_COMMENT("} assert long copy done"); + } ++#endif ++ __ shadd(from, src_pos, src, t0, 3); // src_addr ++ __ shadd(to, dst_pos, dst, t0, 3); // dst_addr ++ __ addw(count, scratch_length, zr); // length ++ __ j(RuntimeAddress(long_copy_entry)); + -+ /** -+ * -+ * Arguments: -+ * -+ * Inputs: -+ * c_rarg0 - int array elements a -+ * c_rarg1 - int array elements n (the modulus) -+ * c_rarg2 - int length -+ * c_rarg3 - int inv -+ * c_rarg4 - int array elements m (the result) -+ * -+ */ -+ address generate_square() { -+ Label argh; -+ bind(argh); -+ stop("MontgomeryMultiply total_allocation must be <= 8192"); -+ -+ align(CodeEntryAlignment); -+ address entry = pc(); -+ -+ enter(); ++ // ObjArrayKlass ++ __ BIND(L_objArray); ++ // live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos] + -+ // Make room. -+ mv(Ra, 512); -+ bgt(Rlen, Ra, argh); -+ slli(Ra, Rlen, exact_log2(4 * sizeof(jint))); -+ sub(Ra, sp, Ra); -+ andi(sp, Ra, -2 * wordSize); ++ Label L_plain_copy, L_checkcast_copy; ++ // test array classes for subtyping ++ __ load_klass(t2, dst); ++ __ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality + -+ srliw(Rlen, Rlen, 1); // length in longwords = len/2 ++ // Identically typed arrays can be copied without element-wise checks. ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, ++ t1, L_failed); + -+ { -+ // Copy input args, reversing as we go. We use Ra as a -+ // temporary variable. -+ reverse(Ra, Pa_base, Rlen, Ri, Rj); -+ reverse(Ra, Pn_base, Rlen, Ri, Rj); -+ } ++ __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop); ++ __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop); ++ __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ addw(count, scratch_length, zr); // length ++ __ BIND(L_plain_copy); ++ __ j(RuntimeAddress(oop_copy_entry)); + -+ // Push all call-saved registers and also Pm_base which we'll need -+ // at the end. -+ save_regs(); ++ __ BIND(L_checkcast_copy); ++ // live at this point: scratch_src_klass, scratch_length, t2 (dst_klass) ++ { ++ // Before looking at dst.length, make sure dst is also an objArray. ++ __ lwu(t0, Address(t2, lh_offset)); ++ __ mvw(t1, objArray_lh); ++ __ bne(t0, t1, L_failed); + -+ mv(Pm_base, Ra); ++ // It is safe to examine both src.length and dst.length. ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, ++ t2, L_failed); + -+ mv(tmp0, zr); -+ mv(tmp1, zr); -+ mv(tmp2, zr); ++ __ load_klass(dst_klass, dst); // reload + -+ block_comment("for (int i = 0; i < len; i++) {"); -+ mv(Ri, zr); { -+ Label loop, end; -+ bind(loop); -+ bge(Ri, Rlen, end); ++ // Marshal the base address arguments now, freeing registers. ++ __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop); ++ __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop); ++ __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ addw(count, length, zr); // length (reloaded) ++ const Register sco_temp = c_rarg3; // this register is free now ++ assert_different_registers(from, to, count, sco_temp, ++ dst_klass, scratch_src_klass); + -+ pre1(Ri); ++ // Generate the type check. ++ const int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ __ lwu(sco_temp, Address(dst_klass, sco_offset)); + -+ block_comment("for (j = (i+1)/2; j; j--) {"); { -+ addi(Rj, Ri, 1); -+ srliw(Rj, Rj, 1); -+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); -+ } block_comment(" } // j"); ++ // Smashes t0, t1 ++ generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy); + -+ last_squaring(Ri); ++ // Fetch destination element klass from the ObjArrayKlass header. ++ int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); ++ __ ld(dst_klass, Address(dst_klass, ek_offset)); ++ __ lwu(sco_temp, Address(dst_klass, sco_offset)); + -+ block_comment(" for (j = i/2; j; j--) {"); { -+ srliw(Rj, Ri, 1); -+ unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); -+ } block_comment(" } // j"); ++ // the checkcast_copy loop needs two extra arguments: ++ assert(c_rarg3 == sco_temp, "#3 already in place"); ++ // Set up arguments for checkcast_copy_entry. ++ __ mv(c_rarg4, dst_klass); // dst.klass.element_klass ++ __ j(RuntimeAddress(checkcast_copy_entry)); ++ } + -+ post1_squaring(); -+ addi(Ri, Ri, 1); -+ blt(Ri, Rlen, loop); ++ __ BIND(L_failed); ++ __ li(x10, -1); ++ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ __ ret(); + -+ bind(end); -+ block_comment("} // i"); -+ } ++ return start; ++ } + -+ block_comment("for (int i = len; i < 2*len; i++) {"); -+ mv(Ri, Rlen); { -+ Label loop, end; -+ bind(loop); -+ slli(Rj, Rlen, 1); -+ bge(Ri, Rj, end); ++ // ++ // Generate stub for array fill. If "aligned" is true, the ++ // "to" address is assumed to be heapword aligned. ++ // ++ // Arguments for generated stub: ++ // to: c_rarg0 ++ // value: c_rarg1 ++ // count: c_rarg2 treated as signed ++ // ++ address generate_fill(BasicType t, bool aligned, const char* name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); + -+ pre2(Ri, Rlen); ++ BLOCK_COMMENT("Entry:"); + -+ block_comment(" for (j = (2*len-i-1)/2; j; j--) {"); { -+ slli(Rj, Rlen, 1); -+ sub(Rj, Rj, Ri); -+ sub(Rj, Rj, 1); -+ srliw(Rj, Rj, 1); -+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); -+ } block_comment(" } // j"); ++ const Register to = c_rarg0; // source array address ++ const Register value = c_rarg1; // value ++ const Register count = c_rarg2; // elements count + -+ last_squaring(Ri); ++ const Register bz_base = x28; // base for block_zero routine ++ const Register cnt_words = x29; // temp register ++ const Register tmp_reg = t1; + -+ block_comment(" for (j = (2*len-i)/2; j; j--) {"); { -+ slli(Rj, Rlen, 1); -+ sub(Rj, Rj, Ri); -+ srliw(Rj, Rj, 1); -+ unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); -+ } block_comment(" } // j"); ++ __ enter(); + -+ post2(Ri, Rlen); -+ addi(Ri, Ri, 1); -+ slli(t0, Rlen, 1); -+ blt(Ri, t0, loop); ++ Label L_fill_elements, L_exit1; + -+ bind(end); -+ block_comment("} // i"); -+ } ++ int shift = -1; ++ switch (t) { ++ case T_BYTE: ++ shift = 0; + -+ normalize(Rlen); ++ // Zero extend value ++ // 8 bit -> 16 bit ++ __ andi(value, value, 0xff); ++ __ mv(tmp_reg, value); ++ __ slli(tmp_reg, tmp_reg, 8); ++ __ orr(value, value, tmp_reg); + -+ mv(Ra, Pm_base); // Save Pm_base in Ra -+ restore_regs(); // Restore caller's Pm_base ++ // 16 bit -> 32 bit ++ __ mv(tmp_reg, value); ++ __ slli(tmp_reg, tmp_reg, 16); ++ __ orr(value, value, tmp_reg); + -+ // Copy our result into caller's Pm_base -+ reverse(Pm_base, Ra, Rlen, Ri, Rj); ++ __ mv(tmp_reg, 8 >> shift); // Short arrays (< 8 bytes) fill by element ++ __ bltu(count, tmp_reg, L_fill_elements); ++ break; ++ case T_SHORT: ++ shift = 1; ++ // Zero extend value ++ // 16 bit -> 32 bit ++ __ andi(value, value, 0xffff); ++ __ mv(tmp_reg, value); ++ __ slli(tmp_reg, tmp_reg, 16); ++ __ orr(value, value, tmp_reg); + -+ leave(); -+ ret(); ++ // Short arrays (< 8 bytes) fill by element ++ __ mv(tmp_reg, 8 >> shift); ++ __ bltu(count, tmp_reg, L_fill_elements); ++ break; ++ case T_INT: ++ shift = 2; + -+ return entry; ++ // Short arrays (< 8 bytes) fill by element ++ __ mv(tmp_reg, 8 >> shift); ++ __ bltu(count, tmp_reg, L_fill_elements); ++ break; ++ default: ShouldNotReachHere(); + } -+ }; -+#endif // COMPILER2 -+ -+ // Initialization -+ void generate_initial() { -+ // Generate initial stubs and initializes the entry points -+ -+ // entry points that exist in all platforms Note: This is code -+ // that could be shared among different platforms - however the -+ // benefit seems to be smaller than the disadvantage of having a -+ // much more complicated generator structure. See also comment in -+ // stubRoutines.hpp. -+ -+ StubRoutines::_forward_exception_entry = generate_forward_exception(); -+ -+ StubRoutines::_call_stub_entry = -+ generate_call_stub(StubRoutines::_call_stub_return_address); -+ -+ // is referenced by megamorphic call -+ StubRoutines::_catch_exception_entry = generate_catch_exception(); -+ -+ // Build this early so it's available for the interpreter. -+ StubRoutines::_throw_StackOverflowError_entry = -+ generate_throw_exception("StackOverflowError throw_exception", -+ CAST_FROM_FN_PTR(address, -+ SharedRuntime::throw_StackOverflowError)); -+ StubRoutines::_throw_delayed_StackOverflowError_entry = -+ generate_throw_exception("delayed StackOverflowError throw_exception", -+ CAST_FROM_FN_PTR(address, -+ SharedRuntime::throw_delayed_StackOverflowError)); -+ } + -+ void generate_all() { -+ // support for verify_oop (must happen after universe_init) -+ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); -+ StubRoutines::_throw_AbstractMethodError_entry = -+ generate_throw_exception("AbstractMethodError throw_exception", -+ CAST_FROM_FN_PTR(address, -+ SharedRuntime:: -+ throw_AbstractMethodError)); ++ // Align source address at 8 bytes address boundary. ++ Label L_skip_align1, L_skip_align2, L_skip_align4; ++ if (!aligned) { ++ switch (t) { ++ case T_BYTE: ++ // One byte misalignment happens only for byte arrays. ++ __ andi(t0, to, 1); ++ __ beqz(t0, L_skip_align1); ++ __ sb(value, Address(to, 0)); ++ __ addi(to, to, 1); ++ __ addiw(count, count, -1); ++ __ bind(L_skip_align1); ++ // Fallthrough ++ case T_SHORT: ++ // Two bytes misalignment happens only for byte and short (char) arrays. ++ __ andi(t0, to, 2); ++ __ beqz(t0, L_skip_align2); ++ __ sh(value, Address(to, 0)); ++ __ addi(to, to, 2); ++ __ addiw(count, count, -(2 >> shift)); ++ __ bind(L_skip_align2); ++ // Fallthrough ++ case T_INT: ++ // Align to 8 bytes, we know we are 4 byte aligned to start. ++ __ andi(t0, to, 4); ++ __ beqz(t0, L_skip_align4); ++ __ sw(value, Address(to, 0)); ++ __ addi(to, to, 4); ++ __ addiw(count, count, -(4 >> shift)); ++ __ bind(L_skip_align4); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ } + -+ StubRoutines::_throw_IncompatibleClassChangeError_entry = -+ generate_throw_exception("IncompatibleClassChangeError throw_exception", -+ CAST_FROM_FN_PTR(address, -+ SharedRuntime:: -+ throw_IncompatibleClassChangeError)); ++ // ++ // Fill large chunks ++ // ++ __ srliw(cnt_words, count, 3 - shift); // number of words + -+ StubRoutines::_throw_NullPointerException_at_call_entry = -+ generate_throw_exception("NullPointerException at call throw_exception", -+ CAST_FROM_FN_PTR(address, -+ SharedRuntime:: -+ throw_NullPointerException_at_call)); -+ // arraycopy stubs used by compilers -+ generate_arraycopy_stubs(); ++ // 32 bit -> 64 bit ++ __ andi(value, value, 0xffffffff); ++ __ mv(tmp_reg, value); ++ __ slli(tmp_reg, tmp_reg, 32); ++ __ orr(value, value, tmp_reg); + -+#ifdef COMPILER2 -+ if (UseMulAddIntrinsic) { -+ StubRoutines::_mulAdd = generate_mulAdd(); ++ __ slli(tmp_reg, cnt_words, 3 - shift); ++ __ subw(count, count, tmp_reg); ++ { ++ __ fill_words(to, cnt_words, value); + } + -+ if (UseMultiplyToLenIntrinsic) { -+ StubRoutines::_multiplyToLen = generate_multiplyToLen(); ++ // Remaining count is less than 8 bytes. Fill it by a single store. ++ // Note that the total length is no less than 8 bytes. ++ if (t == T_BYTE || t == T_SHORT) { ++ __ beqz(count, L_exit1); ++ __ shadd(to, count, to, tmp_reg, shift); // points to the end ++ __ sd(value, Address(to, -8)); // overwrite some elements ++ __ bind(L_exit1); ++ __ leave(); ++ __ ret(); + } + -+ if (UseSquareToLenIntrinsic) { -+ StubRoutines::_squareToLen = generate_squareToLen(); ++ // Handle copies less than 8 bytes. ++ Label L_fill_2, L_fill_4, L_exit2; ++ __ bind(L_fill_elements); ++ switch (t) { ++ case T_BYTE: ++ __ andi(t0, count, 1); ++ __ beqz(t0, L_fill_2); ++ __ sb(value, Address(to, 0)); ++ __ addi(to, to, 1); ++ __ bind(L_fill_2); ++ __ andi(t0, count, 2); ++ __ beqz(t0, L_fill_4); ++ __ sh(value, Address(to, 0)); ++ __ addi(to, to, 2); ++ __ bind(L_fill_4); ++ __ andi(t0, count, 4); ++ __ beqz(t0, L_exit2); ++ __ sw(value, Address(to, 0)); ++ break; ++ case T_SHORT: ++ __ andi(t0, count, 1); ++ __ beqz(t0, L_fill_4); ++ __ sh(value, Address(to, 0)); ++ __ addi(to, to, 2); ++ __ bind(L_fill_4); ++ __ andi(t0, count, 2); ++ __ beqz(t0, L_exit2); ++ __ sw(value, Address(to, 0)); ++ break; ++ case T_INT: ++ __ beqz(count, L_exit2); ++ __ sw(value, Address(to, 0)); ++ break; ++ default: ShouldNotReachHere(); + } ++ __ bind(L_exit2); ++ __ leave(); ++ __ ret(); ++ return start; ++ } + -+ generate_compare_long_strings(); ++ void generate_arraycopy_stubs() { ++ address entry = NULL; ++ address entry_jbyte_arraycopy = NULL; ++ address entry_jshort_arraycopy = NULL; ++ address entry_jint_arraycopy = NULL; ++ address entry_oop_arraycopy = NULL; ++ address entry_jlong_arraycopy = NULL; ++ address entry_checkcast_arraycopy = NULL; + -+ generate_string_indexof_stubs(); ++ generate_copy_longs(copy_f, c_rarg0, c_rarg1, t1, copy_forwards); ++ generate_copy_longs(copy_b, c_rarg0, c_rarg1, t1, copy_backwards); + -+ if (UseMontgomeryMultiplyIntrinsic) { -+ StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); -+ MontgomeryMultiplyGenerator g(_masm, /*squaring*/false); -+ StubRoutines::_montgomeryMultiply = g.generate_multiply(); -+ } ++ StubRoutines::riscv::_zero_blocks = generate_zero_blocks(); + -+ if (UseMontgomerySquareIntrinsic) { -+ StubCodeMark mark(this, "StubRoutines", "montgomerySquare"); -+ MontgomeryMultiplyGenerator g(_masm, /*squaring*/true); -+ StubRoutines::_montgomerySquare = g.generate_square(); -+ } -+#endif // COMPILER2 -+ // Safefetch stubs. -+ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, -+ &StubRoutines::_safefetch32_fault_pc, -+ &StubRoutines::_safefetch32_continuation_pc); -+ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, -+ &StubRoutines::_safefetchN_fault_pc, -+ &StubRoutines::_safefetchN_continuation_pc); ++ //*** jbyte ++ // Always need aligned and unaligned versions ++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry, ++ "jbyte_disjoint_arraycopy"); ++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry, ++ &entry_jbyte_arraycopy, ++ "jbyte_arraycopy"); ++ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry, ++ "arrayof_jbyte_disjoint_arraycopy"); ++ StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL, ++ "arrayof_jbyte_arraycopy"); + -+ StubRoutines::riscv::set_completed(); -+ } ++ //*** jshort ++ // Always need aligned and unaligned versions ++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry, ++ "jshort_disjoint_arraycopy"); ++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, ++ &entry_jshort_arraycopy, ++ "jshort_arraycopy"); ++ StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry, ++ "arrayof_jshort_disjoint_arraycopy"); ++ StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL, ++ "arrayof_jshort_arraycopy"); + -+ public: -+ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { -+ if (all) { -+ generate_all(); -+ } else { -+ generate_initial(); ++ //*** jint ++ // Aligned versions ++ StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry, ++ "arrayof_jint_disjoint_arraycopy"); ++ StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy, ++ "arrayof_jint_arraycopy"); ++ // In 64 bit we need both aligned and unaligned versions of jint arraycopy. ++ // entry_jint_arraycopy always points to the unaligned version ++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry, ++ "jint_disjoint_arraycopy"); ++ StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry, ++ &entry_jint_arraycopy, ++ "jint_arraycopy"); ++ ++ //*** jlong ++ // It is always aligned ++ StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry, ++ "arrayof_jlong_disjoint_arraycopy"); ++ StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy, ++ "arrayof_jlong_arraycopy"); ++ StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy; ++ StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy; ++ ++ //*** oops ++ { ++ // With compressed oops we need unaligned versions; notice that ++ // we overwrite entry_oop_arraycopy. ++ bool aligned = !UseCompressedOops; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy ++ = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy", ++ /*dest_uninitialized*/false); ++ StubRoutines::_arrayof_oop_arraycopy ++ = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy", ++ /*dest_uninitialized*/false); ++ // Aligned versions without pre-barriers ++ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit ++ = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit", ++ /*dest_uninitialized*/true); ++ StubRoutines::_arrayof_oop_arraycopy_uninit ++ = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit", ++ /*dest_uninitialized*/true); + } -+ } + -+ ~StubGenerator() {} -+}; // end class declaration ++ StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy; ++ StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy; ++ StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit; ++ StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit; + -+void StubGenerator_generate(CodeBuffer* code, bool all) { -+ StubGenerator g(code, all); -+} -diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp -new file mode 100644 -index 000000000..633108b95 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp -@@ -0,0 +1,60 @@ -+/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); ++ StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, ++ /*dest_uninitialized*/true); + -+#include "precompiled.hpp" -+#include "runtime/deoptimization.hpp" -+#include "runtime/frame.inline.hpp" -+#include "runtime/stubRoutines.hpp" -+#include "runtime/thread.inline.hpp" -+#include "utilities/globalDefinitions.hpp" + -+// Implementation of the platform-specific part of StubRoutines - for -+// a description of how to extend it, see the stubRoutines.hpp file. ++ StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy", ++ entry_jbyte_arraycopy, ++ entry_jshort_arraycopy, ++ entry_jint_arraycopy, ++ entry_jlong_arraycopy); + -+address StubRoutines::riscv::_get_previous_fp_entry = NULL; -+address StubRoutines::riscv::_get_previous_sp_entry = NULL; ++ StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy", ++ entry_jbyte_arraycopy, ++ entry_jshort_arraycopy, ++ entry_jint_arraycopy, ++ entry_oop_arraycopy, ++ entry_jlong_arraycopy, ++ entry_checkcast_arraycopy); + -+address StubRoutines::riscv::_f2i_fixup = NULL; -+address StubRoutines::riscv::_f2l_fixup = NULL; -+address StubRoutines::riscv::_d2i_fixup = NULL; -+address StubRoutines::riscv::_d2l_fixup = NULL; -+address StubRoutines::riscv::_float_sign_mask = NULL; -+address StubRoutines::riscv::_float_sign_flip = NULL; -+address StubRoutines::riscv::_double_sign_mask = NULL; -+address StubRoutines::riscv::_double_sign_flip = NULL; -+address StubRoutines::riscv::_zero_blocks = NULL; -+address StubRoutines::riscv::_has_negatives = NULL; -+address StubRoutines::riscv::_has_negatives_long = NULL; -+address StubRoutines::riscv::_compare_long_string_LL = NULL; -+address StubRoutines::riscv::_compare_long_string_UU = NULL; -+address StubRoutines::riscv::_compare_long_string_LU = NULL; -+address StubRoutines::riscv::_compare_long_string_UL = NULL; -+address StubRoutines::riscv::_string_indexof_linear_ll = NULL; -+address StubRoutines::riscv::_string_indexof_linear_uu = NULL; -+address StubRoutines::riscv::_string_indexof_linear_ul = NULL; -+address StubRoutines::riscv::_large_byte_array_inflate = NULL; ++ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); ++ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); ++ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); ++ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); ++ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); ++ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); ++ } + -+bool StubRoutines::riscv::_completed = false; -diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp -new file mode 100644 -index 000000000..8aa81980e ---- /dev/null -+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp -@@ -0,0 +1,179 @@ -+/* -+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ // Safefetch stubs. ++ void generate_safefetch(const char* name, int size, address* entry, ++ address* fault_pc, address* continuation_pc) { ++ // safefetch signatures: ++ // int SafeFetch32(int* adr, int errValue) ++ // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue) ++ // ++ // arguments: ++ // c_rarg0 = adr ++ // c_rarg1 = errValue ++ // ++ // result: ++ // PPC_RET = *adr or errValue ++ assert_cond(entry != NULL && fault_pc != NULL && continuation_pc != NULL); ++ StubCodeMark mark(this, "StubRoutines", name); + -+#ifndef CPU_RISCV_STUBROUTINES_RISCV_HPP -+#define CPU_RISCV_STUBROUTINES_RISCV_HPP ++ // Entry point, pc or function descriptor. ++ *entry = __ pc(); + -+// This file holds the platform specific parts of the StubRoutines -+// definition. See stubRoutines.hpp for a description on how to -+// extend it. ++ // Load *adr into c_rarg1, may fault. ++ *fault_pc = __ pc(); ++ switch (size) { ++ case 4: ++ // int32_t ++ __ lw(c_rarg1, Address(c_rarg0, 0)); ++ break; ++ case 8: ++ // int64_t ++ __ ld(c_rarg1, Address(c_rarg0, 0)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } + -+static bool returns_to_call_stub(address return_pc) { -+ return return_pc == _call_stub_return_address; -+} ++ // return errValue or *adr ++ *continuation_pc = __ pc(); ++ __ mv(x10, c_rarg1); ++ __ ret(); ++ } + -+enum platform_dependent_constants { -+ code_size1 = 19000, // simply increase if too small (assembler will crash if too small) -+ code_size2 = 36000 // simply increase if too small (assembler will crash if too small) -+}; ++ // code for comparing 16 bytes of strings with same encoding ++ void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) { ++ const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31; ++ __ ld(tmp5, Address(str1)); ++ __ addi(str1, str1, 8); ++ __ xorr(tmp4, tmp1, tmp2); ++ __ ld(cnt1, Address(str2)); ++ __ addi(str2, str2, 8); ++ __ bnez(tmp4, DIFF1); ++ __ ld(tmp1, Address(str1)); ++ __ addi(str1, str1, 8); ++ __ xorr(tmp4, tmp5, cnt1); ++ __ ld(tmp2, Address(str2)); ++ __ addi(str2, str2, 8); ++ __ bnez(tmp4, DIFF2); ++ } + -+class riscv { -+ friend class StubGenerator; ++ // code for comparing 8 characters of strings with Latin1 and Utf16 encoding ++ void compare_string_8_x_LU(Register tmpL, Register tmpU, Label &DIFF1, ++ Label &DIFF2) { ++ const Register strU = x12, curU = x7, strL = x29, tmp = x30; ++ __ ld(tmpL, Address(strL)); ++ __ addi(strL, strL, 8); ++ __ ld(tmpU, Address(strU)); ++ __ addi(strU, strU, 8); ++ __ inflate_lo32(tmp, tmpL); ++ __ mv(t0, tmp); ++ __ xorr(tmp, curU, t0); ++ __ bnez(tmp, DIFF2); + -+ private: -+ static address _get_previous_fp_entry; -+ static address _get_previous_sp_entry; ++ __ ld(curU, Address(strU)); ++ __ addi(strU, strU, 8); ++ __ inflate_hi32(tmp, tmpL); ++ __ mv(t0, tmp); ++ __ xorr(tmp, tmpU, t0); ++ __ bnez(tmp, DIFF1); ++ } + -+ static address _f2i_fixup; -+ static address _f2l_fixup; -+ static address _d2i_fixup; -+ static address _d2l_fixup; ++ // x10 = result ++ // x11 = str1 ++ // x12 = cnt1 ++ // x13 = str2 ++ // x14 = cnt2 ++ // x28 = tmp1 ++ // x29 = tmp2 ++ // x30 = tmp3 ++ address generate_compare_long_string_different_encoding(bool isLU) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", isLU ? "compare_long_string_different_encoding LU" : "compare_long_string_different_encoding UL"); ++ address entry = __ pc(); ++ Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2, ++ DONE, CALCULATE_DIFFERENCE; ++ const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14, ++ tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31; ++ RegSet spilled_regs = RegSet::of(tmp4, tmp5); + -+ static address _float_sign_mask; -+ static address _float_sign_flip; -+ static address _double_sign_mask; -+ static address _double_sign_flip; ++ // cnt2 == amount of characters left to compare ++ // Check already loaded first 4 symbols ++ __ inflate_lo32(tmp3, isLU ? tmp1 : tmp2); ++ __ mv(isLU ? tmp1 : tmp2, tmp3); ++ __ addi(str1, str1, isLU ? wordSize / 2 : wordSize); ++ __ addi(str2, str2, isLU ? wordSize : wordSize / 2); ++ __ sub(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case. ++ __ push_reg(spilled_regs, sp); + -+ static address _zero_blocks; ++ if (isLU) { ++ __ add(str1, str1, cnt2); ++ __ shadd(str2, cnt2, str2, t0, 1); ++ } else { ++ __ shadd(str1, cnt2, str1, t0, 1); ++ __ add(str2, str2, cnt2); ++ } ++ __ xorr(tmp3, tmp1, tmp2); ++ __ mv(tmp5, tmp2); ++ __ bnez(tmp3, CALCULATE_DIFFERENCE); + -+ static address _has_negatives; -+ static address _has_negatives_long; -+ static address _compare_long_string_LL; -+ static address _compare_long_string_LU; -+ static address _compare_long_string_UL; -+ static address _compare_long_string_UU; -+ static address _string_indexof_linear_ll; -+ static address _string_indexof_linear_uu; -+ static address _string_indexof_linear_ul; -+ static address _large_byte_array_inflate; -+ static bool _completed; ++ Register strU = isLU ? str2 : str1, ++ strL = isLU ? str1 : str2, ++ tmpU = isLU ? tmp5 : tmp1, // where to keep U for comparison ++ tmpL = isLU ? tmp1 : tmp5; // where to keep L for comparison + -+ public: ++ __ sub(tmp2, strL, cnt2); // strL pointer to load from ++ __ slli(t0, cnt2, 1); ++ __ sub(cnt1, strU, t0); // strU pointer to load from + -+ static address get_previous_fp_entry() -+ { -+ return _get_previous_fp_entry; -+ } ++ __ ld(tmp4, Address(cnt1)); ++ __ addi(cnt1, cnt1, 8); ++ __ beqz(cnt2, LOAD_LAST); // no characters left except last load ++ __ sub(cnt2, cnt2, 16); ++ __ bltz(cnt2, TAIL); ++ __ bind(SMALL_LOOP); // smaller loop ++ __ sub(cnt2, cnt2, 16); ++ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); ++ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); ++ __ bgez(cnt2, SMALL_LOOP); ++ __ addi(t0, cnt2, 16); ++ __ beqz(t0, LOAD_LAST); ++ __ bind(TAIL); // 1..15 characters left until last load (last 4 characters) ++ // Address of 8 bytes before last 4 characters in UTF-16 string ++ __ shadd(cnt1, cnt2, cnt1, t0, 1); ++ // Address of 16 bytes before last 4 characters in Latin1 string ++ __ add(tmp2, tmp2, cnt2); ++ __ ld(tmp4, Address(cnt1, -8)); ++ // last 16 characters before last load ++ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); ++ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); ++ __ j(LOAD_LAST); ++ __ bind(DIFF2); ++ __ mv(tmpU, tmp4); ++ __ bind(DIFF1); ++ __ mv(tmpL, t0); ++ __ j(CALCULATE_DIFFERENCE); ++ __ bind(LOAD_LAST); ++ // Last 4 UTF-16 characters are already pre-loaded into tmp4 by compare_string_8_x_LU. ++ // No need to load it again ++ __ mv(tmpU, tmp4); ++ __ ld(tmpL, Address(strL)); ++ __ inflate_lo32(tmp3, tmpL); ++ __ mv(tmpL, tmp3); ++ __ xorr(tmp3, tmpU, tmpL); ++ __ beqz(tmp3, DONE); + -+ static address get_previous_sp_entry() -+ { -+ return _get_previous_sp_entry; ++ // Find the first different characters in the longwords and ++ // compute their difference. ++ __ bind(CALCULATE_DIFFERENCE); ++ __ ctzc_bit(tmp4, tmp3); ++ __ srl(tmp1, tmp1, tmp4); ++ __ srl(tmp5, tmp5, tmp4); ++ __ andi(tmp1, tmp1, 0xFFFF); ++ __ andi(tmp5, tmp5, 0xFFFF); ++ __ sub(result, tmp1, tmp5); ++ __ bind(DONE); ++ __ pop_reg(spilled_regs, sp); ++ __ ret(); ++ return entry; + } + -+ static address f2i_fixup() -+ { -+ return _f2i_fixup; -+ } ++ address generate_method_entry_barrier() { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier"); + -+ static address f2l_fixup() -+ { -+ return _f2l_fixup; -+ } ++ Label deoptimize_label; + -+ static address d2i_fixup() -+ { -+ return _d2i_fixup; -+ } ++ address start = __ pc(); + -+ static address d2l_fixup() -+ { -+ return _d2l_fixup; -+ } ++ __ set_last_Java_frame(sp, fp, ra, t0); + -+ static address float_sign_mask() -+ { -+ return _float_sign_mask; -+ } ++ __ enter(); ++ __ add(t1, sp, wordSize); + -+ static address float_sign_flip() -+ { -+ return _float_sign_flip; -+ } ++ __ sub(sp, sp, 4 * wordSize); + -+ static address double_sign_mask() -+ { -+ return _double_sign_mask; -+ } ++ __ push_call_clobbered_registers(); + -+ static address double_sign_flip() -+ { -+ return _double_sign_flip; -+ } ++ __ mv(c_rarg0, t1); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1); + -+ static address zero_blocks() { -+ return _zero_blocks; -+ } ++ __ reset_last_Java_frame(true); + -+ static address has_negatives() { -+ return _has_negatives; -+ } ++ __ mv(t0, x10); + -+ static address has_negatives_long() { -+ return _has_negatives_long; -+ } ++ __ pop_call_clobbered_registers(); + -+ static address compare_long_string_LL() { -+ return _compare_long_string_LL; -+ } ++ __ bnez(t0, deoptimize_label); + -+ static address compare_long_string_LU() { -+ return _compare_long_string_LU; -+ } ++ __ leave(); ++ __ ret(); + -+ static address compare_long_string_UL() { -+ return _compare_long_string_UL; -+ } ++ __ BIND(deoptimize_label); + -+ static address compare_long_string_UU() { -+ return _compare_long_string_UU; -+ } ++ __ ld(t0, Address(sp, 0)); ++ __ ld(fp, Address(sp, wordSize)); ++ __ ld(ra, Address(sp, wordSize * 2)); ++ __ ld(t1, Address(sp, wordSize * 3)); + -+ static address string_indexof_linear_ul() { -+ return _string_indexof_linear_ul; -+ } ++ __ mv(sp, t0); ++ __ jr(t1); + -+ static address string_indexof_linear_ll() { -+ return _string_indexof_linear_ll; ++ return start; + } + -+ static address string_indexof_linear_uu() { -+ return _string_indexof_linear_uu; -+ } ++ // x10 = result ++ // x11 = str1 ++ // x12 = cnt1 ++ // x13 = str2 ++ // x14 = cnt2 ++ // x28 = tmp1 ++ // x29 = tmp2 ++ // x30 = tmp3 ++ // x31 = tmp4 ++ address generate_compare_long_string_same_encoding(bool isLL) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", isLL ? ++ "compare_long_string_same_encoding LL" : "compare_long_string_same_encoding UU"); ++ address entry = __ pc(); ++ Label SMALL_LOOP, CHECK_LAST, DIFF2, TAIL, ++ LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF; ++ const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14, ++ tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31; ++ RegSet spilled_regs = RegSet::of(tmp4, tmp5); + -+ static address large_byte_array_inflate() { -+ return _large_byte_array_inflate; ++ // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used ++ // update cnt2 counter with already loaded 8 bytes ++ __ sub(cnt2, cnt2, wordSize / (isLL ? 1 : 2)); ++ // update pointers, because of previous read ++ __ add(str1, str1, wordSize); ++ __ add(str2, str2, wordSize); ++ // less than 16 bytes left? ++ __ sub(cnt2, cnt2, isLL ? 16 : 8); ++ __ push_reg(spilled_regs, sp); ++ __ bltz(cnt2, TAIL); ++ __ bind(SMALL_LOOP); ++ compare_string_16_bytes_same(DIFF, DIFF2); ++ __ sub(cnt2, cnt2, isLL ? 16 : 8); ++ __ bgez(cnt2, SMALL_LOOP); ++ __ bind(TAIL); ++ __ addi(cnt2, cnt2, isLL ? 16 : 8); ++ __ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); ++ __ sub(cnt2, cnt2, isLL ? 8 : 4); ++ __ blez(cnt2, CHECK_LAST); ++ __ xorr(tmp4, tmp1, tmp2); ++ __ bnez(tmp4, DIFF); ++ __ ld(tmp1, Address(str1)); ++ __ addi(str1, str1, 8); ++ __ ld(tmp2, Address(str2)); ++ __ addi(str2, str2, 8); ++ __ sub(cnt2, cnt2, isLL ? 8 : 4); ++ __ bind(CHECK_LAST); ++ if (!isLL) { ++ __ add(cnt2, cnt2, cnt2); // now in bytes ++ } ++ __ xorr(tmp4, tmp1, tmp2); ++ __ bnez(tmp4, DIFF); ++ __ add(str1, str1, cnt2); ++ __ ld(tmp5, Address(str1)); ++ __ add(str2, str2, cnt2); ++ __ ld(cnt1, Address(str2)); ++ __ xorr(tmp4, tmp5, cnt1); ++ __ beqz(tmp4, LENGTH_DIFF); ++ // Find the first different characters in the longwords and ++ // compute their difference. ++ __ bind(DIFF2); ++ __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb ++ __ srl(tmp5, tmp5, tmp3); ++ __ srl(cnt1, cnt1, tmp3); ++ if (isLL) { ++ __ andi(tmp5, tmp5, 0xFF); ++ __ andi(cnt1, cnt1, 0xFF); ++ } else { ++ __ andi(tmp5, tmp5, 0xFFFF); ++ __ andi(cnt1, cnt1, 0xFFFF); ++ } ++ __ sub(result, tmp5, cnt1); ++ __ j(LENGTH_DIFF); ++ __ bind(DIFF); ++ __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb ++ __ srl(tmp1, tmp1, tmp3); ++ __ srl(tmp2, tmp2, tmp3); ++ if (isLL) { ++ __ andi(tmp1, tmp1, 0xFF); ++ __ andi(tmp2, tmp2, 0xFF); ++ } else { ++ __ andi(tmp1, tmp1, 0xFFFF); ++ __ andi(tmp2, tmp2, 0xFFFF); ++ } ++ __ sub(result, tmp1, tmp2); ++ __ j(LENGTH_DIFF); ++ __ bind(LAST_CHECK_AND_LENGTH_DIFF); ++ __ xorr(tmp4, tmp1, tmp2); ++ __ bnez(tmp4, DIFF); ++ __ bind(LENGTH_DIFF); ++ __ pop_reg(spilled_regs, sp); ++ __ ret(); ++ return entry; + } + -+ static bool complete() { -+ return _completed; ++ void generate_compare_long_strings() { ++ StubRoutines::riscv::_compare_long_string_LL = generate_compare_long_string_same_encoding(true); ++ StubRoutines::riscv::_compare_long_string_UU = generate_compare_long_string_same_encoding(false); ++ StubRoutines::riscv::_compare_long_string_LU = generate_compare_long_string_different_encoding(true); ++ StubRoutines::riscv::_compare_long_string_UL = generate_compare_long_string_different_encoding(false); + } + -+ static void set_completed() { -+ _completed = true; -+ } -+}; ++ // x10 result ++ // x11 src ++ // x12 src count ++ // x13 pattern ++ // x14 pattern count ++ address generate_string_indexof_linear(bool needle_isL, bool haystack_isL) ++ { ++ const char* stubName = needle_isL ++ ? (haystack_isL ? "indexof_linear_ll" : "indexof_linear_ul") ++ : "indexof_linear_uu"; ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", stubName); ++ address entry = __ pc(); + -+#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -new file mode 100644 -index 000000000..f5e212204 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -@@ -0,0 +1,1841 @@ -+/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ bool isL = needle_isL && haystack_isL; ++ // parameters ++ Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14; ++ // temporary registers ++ Register mask1 = x20, match_mask = x21, first = x22, trailing_zeros = x23, mask2 = x24, tmp = x25; ++ // redefinitions ++ Register ch1 = x28, ch2 = x29; ++ RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29); + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "classfile/javaClasses.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" -+#include "interpreter/bytecodeHistogram.hpp" -+#include "interpreter/bytecodeTracer.hpp" -+#include "interpreter/interp_masm.hpp" -+#include "interpreter/interpreter.hpp" -+#include "interpreter/interpreterRuntime.hpp" -+#include "interpreter/templateInterpreterGenerator.hpp" -+#include "interpreter/templateTable.hpp" -+#include "memory/resourceArea.hpp" -+#include "oops/arrayOop.hpp" -+#include "oops/method.hpp" -+#include "oops/methodData.hpp" -+#include "oops/oop.inline.hpp" -+#include "prims/jvmtiExport.hpp" -+#include "prims/jvmtiThreadState.hpp" -+#include "runtime/arguments.hpp" -+#include "runtime/deoptimization.hpp" -+#include "runtime/frame.inline.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/stubRoutines.hpp" -+#include "runtime/synchronizer.hpp" -+#include "runtime/timer.hpp" -+#include "runtime/vframeArray.hpp" -+#include "utilities/debug.hpp" -+#include "utilities/macros.hpp" -+#include ++ __ push_reg(spilled_regs, sp); + -+#ifndef PRODUCT -+#include "oops/method.hpp" -+#endif // !PRODUCT ++ Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO, ++ L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED, ++ L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP, ++ L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH, ++ L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2, ++ L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH; + -+// Size of interpreter code. Increase if too small. Interpreter will -+// fail with a guarantee ("not enough space for interpreter generation"); -+// if too small. -+// Run with +PrintInterpreter to get the VM to print out the size. -+// Max size with JVMTI -+int TemplateInterpreter::InterpreterCodeSize = 256 * 1024; ++ __ ld(ch1, Address(needle)); ++ __ ld(ch2, Address(haystack)); ++ // src.length - pattern.length ++ __ sub(haystack_len, haystack_len, needle_len); + -+#define __ _masm-> ++ // first is needle[0] ++ __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first); ++ uint64_t mask0101 = UCONST64(0x0101010101010101); ++ uint64_t mask0001 = UCONST64(0x0001000100010001); ++ __ mv(mask1, haystack_isL ? mask0101 : mask0001); ++ __ mul(first, first, mask1); ++ uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); ++ uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); ++ __ mv(mask2, haystack_isL ? mask7f7f : mask7fff); ++ if (needle_isL != haystack_isL) { ++ __ mv(tmp, ch1); ++ } ++ __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size - 1); ++ __ blez(haystack_len, L_SMALL); + -+//----------------------------------------------------------------------------- ++ if (needle_isL != haystack_isL) { ++ __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros); ++ } ++ // xorr, sub, orr, notr, andr ++ // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i] ++ // eg: ++ // first: aa aa aa aa aa aa aa aa ++ // ch2: aa aa li nx jd ka aa aa ++ // match_mask: 80 80 00 00 00 00 80 80 ++ __ compute_match_mask(ch2, first, match_mask, mask1, mask2); + -+address TemplateInterpreterGenerator::generate_slow_signature_handler() { -+ address entry = __ pc(); ++ // search first char of needle, if success, goto L_HAS_ZERO; ++ __ bnez(match_mask, L_HAS_ZERO); ++ __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size); ++ __ add(result, result, wordSize / haystack_chr_size); ++ __ add(haystack, haystack, wordSize); ++ __ bltz(haystack_len, L_POST_LOOP); + -+ __ andi(esp, esp, -16); -+ __ mv(c_rarg3, esp); -+ // xmethod -+ // xlocals -+ // c_rarg3: first stack arg - wordSize -+ // adjust sp ++ __ bind(L_LOOP); ++ __ ld(ch2, Address(haystack)); ++ __ compute_match_mask(ch2, first, match_mask, mask1, mask2); ++ __ bnez(match_mask, L_HAS_ZERO); + -+ __ addi(sp, c_rarg3, -18 * wordSize); -+ __ addi(sp, sp, -2 * wordSize); -+ __ sd(ra, Address(sp, 0)); ++ __ bind(L_LOOP_PROCEED); ++ __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size); ++ __ add(haystack, haystack, wordSize); ++ __ add(result, result, wordSize / haystack_chr_size); ++ __ bgez(haystack_len, L_LOOP); + -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::slow_signature_handler), -+ xmethod, xlocals, c_rarg3); ++ __ bind(L_POST_LOOP); ++ __ mv(ch2, -wordSize / haystack_chr_size); ++ __ ble(haystack_len, ch2, NOMATCH); // no extra characters to check ++ __ ld(ch2, Address(haystack)); ++ __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift); ++ __ neg(haystack_len, haystack_len); ++ __ xorr(ch2, first, ch2); ++ __ sub(match_mask, ch2, mask1); ++ __ orr(ch2, ch2, mask2); ++ __ mv(trailing_zeros, -1); // all bits set ++ __ j(L_SMALL_PROCEED); + -+ // x10: result handler ++ __ align(OptoLoopAlignment); ++ __ bind(L_SMALL); ++ __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift); ++ __ neg(haystack_len, haystack_len); ++ if (needle_isL != haystack_isL) { ++ __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros); ++ } ++ __ xorr(ch2, first, ch2); ++ __ sub(match_mask, ch2, mask1); ++ __ orr(ch2, ch2, mask2); ++ __ mv(trailing_zeros, -1); // all bits set + -+ // Stack layout: -+ // sp: return address <- sp -+ // 1 garbage -+ // 8 integer args (if static first is unused) -+ // 1 float/double identifiers -+ // 8 double args -+ // stack args <- esp -+ // garbage -+ // expression stack bottom -+ // bcp (NULL) -+ // ... ++ __ bind(L_SMALL_PROCEED); ++ __ srl(trailing_zeros, trailing_zeros, haystack_len); // mask. zeroes on useless bits. ++ __ notr(ch2, ch2); ++ __ andr(match_mask, match_mask, ch2); ++ __ andr(match_mask, match_mask, trailing_zeros); // clear useless bits and check ++ __ beqz(match_mask, NOMATCH); + -+ // Restore RA -+ __ ld(ra, Address(sp, 0)); -+ __ addi(sp, sp , 2 * wordSize); ++ __ bind(L_SMALL_HAS_ZERO_LOOP); ++ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, ch2, tmp); // count trailing zeros ++ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); ++ __ mv(ch2, wordSize / haystack_chr_size); ++ __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2); ++ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); ++ __ mv(trailing_zeros, wordSize / haystack_chr_size); ++ __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); + -+ // Do FP first so we can use c_rarg3 as temp -+ __ lwu(c_rarg3, Address(sp, 9 * wordSize)); // float/double identifiers ++ __ bind(L_SMALL_CMP_LOOP); ++ __ shadd(first, trailing_zeros, needle, first, needle_chr_shift); ++ __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift); ++ needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first)); ++ haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); ++ __ add(trailing_zeros, trailing_zeros, 1); ++ __ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP); ++ __ beq(first, ch2, L_SMALL_CMP_LOOP); + -+ for (int i = 0; i < Argument::n_float_register_parameters_c; i++) { -+ const FloatRegister r = g_FPArgReg[i]; -+ Label d, done; ++ __ bind(L_SMALL_CMP_LOOP_NOMATCH); ++ __ beqz(match_mask, NOMATCH); ++ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2); ++ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); ++ __ add(result, result, 1); ++ __ add(haystack, haystack, haystack_chr_size); ++ __ j(L_SMALL_HAS_ZERO_LOOP); + -+ __ andi(t0, c_rarg3, 1UL << i); -+ __ bnez(t0, d); -+ __ flw(r, Address(sp, (10 + i) * wordSize)); -+ __ j(done); -+ __ bind(d); -+ __ fld(r, Address(sp, (10 + i) * wordSize)); -+ __ bind(done); -+ } ++ __ align(OptoLoopAlignment); ++ __ bind(L_SMALL_CMP_LOOP_LAST_CMP); ++ __ bne(first, ch2, L_SMALL_CMP_LOOP_NOMATCH); ++ __ j(DONE); + -+ // c_rarg0 contains the result from the call of -+ // InterpreterRuntime::slow_signature_handler so we don't touch it -+ // here. It will be loaded with the JNIEnv* later. -+ for (int i = 1; i < Argument::n_int_register_parameters_c; i++) { -+ const Register rm = g_INTArgReg[i]; -+ __ ld(rm, Address(sp, i * wordSize)); -+ } ++ __ align(OptoLoopAlignment); ++ __ bind(L_SMALL_CMP_LOOP_LAST_CMP2); ++ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); ++ __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); ++ __ j(DONE); + -+ __ addi(sp, sp, 18 * wordSize); -+ __ ret(); ++ __ align(OptoLoopAlignment); ++ __ bind(L_HAS_ZERO); ++ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2); ++ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); ++ __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2); ++ __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits) ++ __ sub(result, result, 1); // array index from 0, so result -= 1 + -+ return entry; -+} ++ __ bind(L_HAS_ZERO_LOOP); ++ __ mv(needle_len, wordSize / haystack_chr_size); ++ __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2); ++ __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2); ++ // load next 8 bytes from haystack, and increase result index ++ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); ++ __ add(result, result, 1); ++ __ mv(trailing_zeros, wordSize / haystack_chr_size); ++ __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); + -+// Various method entries -+address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { -+ // xmethod: Method* -+ // x30: sender sp -+ // esp: args ++ // compare one char ++ __ bind(L_CMP_LOOP); ++ __ shadd(needle_len, trailing_zeros, needle, needle_len, needle_chr_shift); ++ needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len)); ++ __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift); ++ haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); ++ __ add(trailing_zeros, trailing_zeros, 1); // next char index ++ __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2); ++ __ bge(trailing_zeros, tmp, L_CMP_LOOP_LAST_CMP); ++ __ beq(needle_len, ch2, L_CMP_LOOP); + -+ if (!InlineIntrinsics) { -+ return NULL; // Generate a vanilla entry -+ } ++ __ bind(L_CMP_LOOP_NOMATCH); ++ __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH); ++ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index ++ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); ++ __ add(haystack, haystack, haystack_chr_size); ++ __ j(L_HAS_ZERO_LOOP); + -+ // These don't need a safepoint check because they aren't virtually -+ // callable. We won't enter these intrinsics from compiled code. -+ // If in the future we added an intrinsic which was virtually callable -+ // we'd have to worry about how to safepoint so that this code is used. ++ __ align(OptoLoopAlignment); ++ __ bind(L_CMP_LOOP_LAST_CMP); ++ __ bne(needle_len, ch2, L_CMP_LOOP_NOMATCH); ++ __ j(DONE); + -+ // mathematical functions inlined by compiler -+ // (interpreter must provide identical implementation -+ // in order to avoid monotonicity bugs when switching -+ // from interpreter to compiler in the middle of some -+ // computation) -+ // -+ // stack: -+ // [ arg ] <-- esp -+ // [ arg ] -+ // retaddr in ra ++ __ align(OptoLoopAlignment); ++ __ bind(L_CMP_LOOP_LAST_CMP2); ++ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); ++ __ add(result, result, 1); ++ __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); ++ __ j(DONE); + -+ address fn = NULL; -+ address entry_point = NULL; -+ Register continuation = ra; -+ switch (kind) { -+ case Interpreter::java_lang_math_abs: -+ entry_point = __ pc(); -+ __ fld(f10, Address(esp)); -+ __ fabs_d(f10, f10); -+ __ mv(sp, x30); // Restore caller's SP -+ break; -+ case Interpreter::java_lang_math_sqrt: -+ entry_point = __ pc(); -+ __ fld(f10, Address(esp)); -+ __ fsqrt_d(f10, f10); -+ __ mv(sp, x30); -+ break; -+ case Interpreter::java_lang_math_sin : -+ entry_point = __ pc(); -+ __ fld(f10, Address(esp)); -+ __ mv(sp, x30); -+ __ mv(x9, ra); -+ continuation = x9; // The first callee-saved register -+ if (StubRoutines::dsin() == NULL) { -+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); -+ } else { -+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin()); -+ } -+ __ mv(t0, fn); -+ __ jalr(t0); -+ break; -+ case Interpreter::java_lang_math_cos : -+ entry_point = __ pc(); -+ __ fld(f10, Address(esp)); -+ __ mv(sp, x30); -+ __ mv(x9, ra); -+ continuation = x9; // The first callee-saved register -+ if (StubRoutines::dcos() == NULL) { -+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); -+ } else { -+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos()); -+ } -+ __ mv(t0, fn); -+ __ jalr(t0); -+ break; -+ case Interpreter::java_lang_math_tan : -+ entry_point = __ pc(); -+ __ fld(f10, Address(esp)); -+ __ mv(sp, x30); -+ __ mv(x9, ra); -+ continuation = x9; // The first callee-saved register -+ if (StubRoutines::dtan() == NULL) { -+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); -+ } else { -+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan()); -+ } -+ __ mv(t0, fn); -+ __ jalr(t0); -+ break; -+ case Interpreter::java_lang_math_log : -+ entry_point = __ pc(); -+ __ fld(f10, Address(esp)); -+ __ mv(sp, x30); -+ __ mv(x9, ra); -+ continuation = x9; // The first callee-saved register -+ if (StubRoutines::dlog() == NULL) { -+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); -+ } else { -+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog()); -+ } -+ __ mv(t0, fn); -+ __ jalr(t0); -+ break; -+ case Interpreter::java_lang_math_log10 : -+ entry_point = __ pc(); -+ __ fld(f10, Address(esp)); -+ __ mv(sp, x30); -+ __ mv(x9, ra); -+ continuation = x9; // The first callee-saved register -+ if (StubRoutines::dlog10() == NULL) { -+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); -+ } else { -+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10()); -+ } -+ __ mv(t0, fn); -+ __ jalr(t0); -+ break; -+ case Interpreter::java_lang_math_exp : -+ entry_point = __ pc(); -+ __ fld(f10, Address(esp)); -+ __ mv(sp, x30); -+ __ mv(x9, ra); -+ continuation = x9; // The first callee-saved register -+ if (StubRoutines::dexp() == NULL) { -+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); -+ } else { -+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp()); -+ } -+ __ mv(t0, fn); -+ __ jalr(t0); -+ break; -+ case Interpreter::java_lang_math_pow : -+ entry_point = __ pc(); -+ __ mv(x9, ra); -+ continuation = x9; -+ __ fld(f10, Address(esp, 2 * Interpreter::stackElementSize)); -+ __ fld(f11, Address(esp)); -+ __ mv(sp, x30); -+ if (StubRoutines::dpow() == NULL) { -+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); -+ } else { -+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow()); -+ } -+ __ mv(t0, fn); -+ __ jalr(t0); -+ break; -+ case Interpreter::java_lang_math_fmaD : -+ if (UseFMA) { -+ entry_point = __ pc(); -+ __ fld(f10, Address(esp, 4 * Interpreter::stackElementSize)); -+ __ fld(f11, Address(esp, 2 * Interpreter::stackElementSize)); -+ __ fld(f12, Address(esp)); -+ __ fmadd_d(f10, f10, f11, f12); -+ __ mv(sp, x30); // Restore caller's SP -+ } -+ break; -+ case Interpreter::java_lang_math_fmaF : -+ if (UseFMA) { -+ entry_point = __ pc(); -+ __ flw(f10, Address(esp, 2 * Interpreter::stackElementSize)); -+ __ flw(f11, Address(esp, Interpreter::stackElementSize)); -+ __ flw(f12, Address(esp)); -+ __ fmadd_s(f10, f10, f11, f12); -+ __ mv(sp, x30); // Restore caller's SP -+ } -+ break; -+ default: -+ ; ++ __ align(OptoLoopAlignment); ++ __ bind(L_HAS_ZERO_LOOP_NOMATCH); ++ // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until ++ // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP, ++ // so, result was increased at max by wordSize/str2_chr_size - 1, so, ++ // respective high bit wasn't changed. L_LOOP_PROCEED will increase ++ // result by analyzed characters value, so, we can just reset lower bits ++ // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL ++ // 2) restore needle_len and haystack_len values from "compressed" haystack_len ++ // 3) advance haystack value to represent next haystack octet. result & 7/3 is ++ // index of last analyzed substring inside current octet. So, haystack in at ++ // respective start address. We need to advance it to next octet ++ __ andi(match_mask, result, wordSize / haystack_chr_size - 1); ++ __ srli(needle_len, haystack_len, BitsPerByte * wordSize / 2); ++ __ andi(result, result, haystack_isL ? -8 : -4); ++ __ slli(tmp, match_mask, haystack_chr_shift); ++ __ sub(haystack, haystack, tmp); ++ __ addw(haystack_len, haystack_len, zr); ++ __ j(L_LOOP_PROCEED); ++ ++ __ align(OptoLoopAlignment); ++ __ bind(NOMATCH); ++ __ mv(result, -1); ++ ++ __ bind(DONE); ++ __ pop_reg(spilled_regs, sp); ++ __ ret(); ++ return entry; + } -+ if (entry_point != NULL) { -+ __ jr(continuation); ++ ++ void generate_string_indexof_stubs() ++ { ++ StubRoutines::riscv::_string_indexof_linear_ll = generate_string_indexof_linear(true, true); ++ StubRoutines::riscv::_string_indexof_linear_uu = generate_string_indexof_linear(false, false); ++ StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false); + } + -+ return entry_point; -+} ++#ifdef COMPILER2 ++ address generate_mulAdd() ++ { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "mulAdd"); + -+// Abstract method entry -+// Attempt to execute abstract method. Throw exception -+address TemplateInterpreterGenerator::generate_abstract_entry(void) { -+ // xmethod: Method* -+ // x30: sender SP ++ address entry = __ pc(); + -+ address entry_point = __ pc(); ++ const Register out = x10; ++ const Register in = x11; ++ const Register offset = x12; ++ const Register len = x13; ++ const Register k = x14; ++ const Register tmp = x28; + -+ // abstract method entry ++ BLOCK_COMMENT("Entry:"); ++ __ enter(); ++ __ mul_add(out, in, offset, len, k, tmp); ++ __ leave(); ++ __ ret(); + -+ // pop return address, reset last_sp to NULL -+ __ empty_expression_stack(); -+ __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) -+ __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) ++ return entry; ++ } + -+ // throw exception -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::throw_AbstractMethodErrorWithMethod), -+ xmethod); -+ // the call_VM checks for exception, so we should never return here. -+ __ should_not_reach_here(); ++ /** ++ * Arguments: ++ * ++ * Input: ++ * c_rarg0 - x address ++ * c_rarg1 - x length ++ * c_rarg2 - y address ++ * c_rarg3 - y length ++ * c_rarg4 - z address ++ * c_rarg5 - z length ++ */ ++ address generate_multiplyToLen() ++ { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); ++ address entry = __ pc(); + -+ return entry_point; -+} ++ const Register x = x10; ++ const Register xlen = x11; ++ const Register y = x12; ++ const Register ylen = x13; ++ const Register z = x14; ++ const Register zlen = x15; + -+address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { -+ address entry = __ pc(); ++ const Register tmp1 = x16; ++ const Register tmp2 = x17; ++ const Register tmp3 = x7; ++ const Register tmp4 = x28; ++ const Register tmp5 = x29; ++ const Register tmp6 = x30; ++ const Register tmp7 = x31; + -+#ifdef ASSERT -+ { -+ Label L; -+ __ ld(t0, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize)); -+ __ mv(t1, sp); -+ // maximal sp for current fp (stack grows negative) -+ // check if frame is complete -+ __ bge(t0, t1, L); -+ __ stop ("interpreter frame not set up"); -+ __ bind(L); -+ } -+#endif // ASSERT -+ // Restore bcp under the assumption that the current frame is still -+ // interpreted -+ __ restore_bcp(); ++ BLOCK_COMMENT("Entry:"); ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); ++ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ __ ret(); + -+ // expression stack must be empty before entering the VM if an -+ // exception happened -+ __ empty_expression_stack(); -+ // throw exception -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); -+ return entry; -+} ++ return entry; ++ } + -+address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { -+ address entry = __ pc(); -+ // expression stack must be empty before entering the VM if an -+ // exception happened -+ __ empty_expression_stack(); -+ // setup parameters ++ address generate_squareToLen() ++ { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "squareToLen"); ++ address entry = __ pc(); + -+ // convention: expect aberrant index in register x11 -+ __ zero_extend(c_rarg2, x11, 32); -+ // convention: expect array in register x13 -+ __ mv(c_rarg1, x13); -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime:: -+ throw_ArrayIndexOutOfBoundsException), -+ c_rarg1, c_rarg2); -+ return entry; -+} ++ const Register x = x10; ++ const Register xlen = x11; ++ const Register z = x12; ++ const Register zlen = x13; ++ const Register y = x14; // == x ++ const Register ylen = x15; // == xlen + -+address TemplateInterpreterGenerator::generate_ClassCastException_handler() { -+ address entry = __ pc(); ++ const Register tmp1 = x16; ++ const Register tmp2 = x17; ++ const Register tmp3 = x7; ++ const Register tmp4 = x28; ++ const Register tmp5 = x29; ++ const Register tmp6 = x30; ++ const Register tmp7 = x31; + -+ // object is at TOS -+ __ pop_reg(c_rarg1); ++ BLOCK_COMMENT("Entry:"); ++ __ enter(); ++ __ mv(y, x); ++ __ mv(ylen, xlen); ++ __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); ++ __ leave(); ++ __ ret(); + -+ // expression stack must be empty before entering the VM if an -+ // exception happened -+ __ empty_expression_stack(); ++ return entry; ++ } + -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime:: -+ throw_ClassCastException), -+ c_rarg1); -+ return entry; -+} ++ // Arguments: ++ // ++ // Input: ++ // c_rarg0 - newArr address ++ // c_rarg1 - oldArr address ++ // c_rarg2 - newIdx ++ // c_rarg3 - shiftCount ++ // c_rarg4 - numIter ++ // ++ address generate_bigIntegerLeftShift() { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker"); ++ address entry = __ pc(); + -+address TemplateInterpreterGenerator::generate_exception_handler_common( -+ const char* name, const char* message, bool pass_oop) { -+ assert(!pass_oop || message == NULL, "either oop or message but not both"); -+ address entry = __ pc(); -+ if (pass_oop) { -+ // object is at TOS -+ __ pop_reg(c_rarg2); -+ } -+ // expression stack must be empty before entering the VM if an -+ // exception happened -+ __ empty_expression_stack(); -+ // setup parameters -+ __ la(c_rarg1, Address((address)name)); -+ if (pass_oop) { -+ __ call_VM(x10, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime:: -+ create_klass_exception), -+ c_rarg1, c_rarg2); -+ } else { -+ // kind of lame ExternalAddress can't take NULL because -+ // external_word_Relocation will assert. -+ if (message != NULL) { -+ __ la(c_rarg2, Address((address)message)); -+ } else { -+ __ mv(c_rarg2, NULL_WORD); -+ } -+ __ call_VM(x10, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), -+ c_rarg1, c_rarg2); -+ } -+ // throw exception -+ __ j(address(Interpreter::throw_exception_entry())); -+ return entry; -+} ++ Label loop, exit; + -+address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { -+ address entry = __ pc(); ++ Register newArr = c_rarg0; ++ Register oldArr = c_rarg1; ++ Register newIdx = c_rarg2; ++ Register shiftCount = c_rarg3; ++ Register numIter = c_rarg4; + -+ // Restore stack bottom in case i2c adjusted stack -+ __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); -+ // and NULL it as marker that esp is now tos until next java call -+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); -+ __ restore_bcp(); -+ __ restore_locals(); -+ __ restore_constant_pool_cache(); -+ __ get_method(xmethod); ++ Register shiftRevCount = c_rarg5; ++ Register oldArrNext = t1; + -+ if (state == atos) { -+ Register obj = x10; -+ Register mdp = x11; -+ Register tmp = x12; -+ __ ld(mdp, Address(xmethod, Method::method_data_offset())); -+ __ profile_return_type(mdp, obj, tmp); -+ } ++ __ beqz(numIter, exit); ++ __ shadd(newArr, newIdx, newArr, t0, 2); + -+ // Pop N words from the stack -+ __ get_cache_and_index_at_bcp(x11, x12, 1, index_size); -+ __ ld(x11, Address(x11, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); -+ __ andi(x11, x11, ConstantPoolCacheEntry::parameter_size_mask); ++ __ li(shiftRevCount, 32); ++ __ sub(shiftRevCount, shiftRevCount, shiftCount); + -+ __ shadd(esp, x11, esp, t0, 3); ++ __ bind(loop); ++ __ addi(oldArrNext, oldArr, 4); ++ __ vsetvli(t0, numIter, Assembler::e32, Assembler::m4); ++ __ vle32_v(v0, oldArr); ++ __ vle32_v(v4, oldArrNext); ++ __ vsll_vx(v0, v0, shiftCount); ++ __ vsrl_vx(v4, v4, shiftRevCount); ++ __ vor_vv(v0, v0, v4); ++ __ vse32_v(v0, newArr); ++ __ sub(numIter, numIter, t0); ++ __ shadd(oldArr, t0, oldArr, t1, 2); ++ __ shadd(newArr, t0, newArr, t1, 2); ++ __ bnez(numIter, loop); + -+ // Restore machine SP -+ __ ld(t0, Address(xmethod, Method::const_offset())); -+ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); -+ __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2); -+ __ ld(t1, -+ Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); -+ __ slli(t0, t0, 3); -+ __ sub(t0, t1, t0); -+ __ andi(sp, t0, -16); ++ __ bind(exit); ++ __ ret(); + -+ __ check_and_handle_popframe(xthread); -+ __ check_and_handle_earlyret(xthread); ++ return entry; ++ } + -+ __ get_dispatch(); -+ __ dispatch_next(state, step); ++ // Arguments: ++ // ++ // Input: ++ // c_rarg0 - newArr address ++ // c_rarg1 - oldArr address ++ // c_rarg2 - newIdx ++ // c_rarg3 - shiftCount ++ // c_rarg4 - numIter ++ // ++ address generate_bigIntegerRightShift() { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker"); ++ address entry = __ pc(); + -+ return entry; -+} ++ Label loop, exit; + -+address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, -+ int step, -+ address continuation) { -+ address entry = __ pc(); -+ __ restore_bcp(); -+ __ restore_locals(); -+ __ restore_constant_pool_cache(); -+ __ get_method(xmethod); -+ __ get_dispatch(); ++ Register newArr = c_rarg0; ++ Register oldArr = c_rarg1; ++ Register newIdx = c_rarg2; ++ Register shiftCount = c_rarg3; ++ Register numIter = c_rarg4; ++ Register idx = numIter; + -+ // Calculate stack limit -+ __ ld(t0, Address(xmethod, Method::const_offset())); -+ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); -+ __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2); -+ __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); -+ __ slli(t0, t0, 3); -+ __ sub(t0, t1, t0); -+ __ andi(sp, t0, -16); ++ Register shiftRevCount = c_rarg5; ++ Register oldArrNext = c_rarg6; ++ Register newArrCur = t0; ++ Register oldArrCur = t1; + -+ // Restore expression stack pointer -+ __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); -+ // NULL last_sp until next java call -+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ __ beqz(idx, exit); ++ __ shadd(newArr, newIdx, newArr, t0, 2); + -+ // handle exceptions -+ { -+ Label L; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ beqz(t0, L); -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); -+ __ should_not_reach_here(); -+ __ bind(L); -+ } ++ __ li(shiftRevCount, 32); ++ __ sub(shiftRevCount, shiftRevCount, shiftCount); + -+ if (continuation == NULL) { -+ __ dispatch_next(state, step); -+ } else { -+ __ jump_to_entry(continuation); -+ } -+ return entry; -+} ++ __ bind(loop); ++ __ vsetvli(t0, idx, Assembler::e32, Assembler::m4); ++ __ sub(idx, idx, t0); ++ __ shadd(oldArrNext, idx, oldArr, t1, 2); ++ __ shadd(newArrCur, idx, newArr, t1, 2); ++ __ addi(oldArrCur, oldArrNext, 4); ++ __ vle32_v(v0, oldArrCur); ++ __ vle32_v(v4, oldArrNext); ++ __ vsrl_vx(v0, v0, shiftCount); ++ __ vsll_vx(v4, v4, shiftRevCount); ++ __ vor_vv(v0, v0, v4); ++ __ vse32_v(v0, newArrCur); ++ __ bnez(idx, loop); + -+address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) { -+ address entry = __ pc(); -+ if (type == T_OBJECT) { -+ // retrieve result from frame -+ __ ld(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize)); -+ // and verify it -+ __ verify_oop(x10); -+ } else { -+ __ cast_primitive_type(type, x10); ++ __ bind(exit); ++ __ ret(); ++ ++ return entry; + } ++#endif + -+ __ ret(); // return from result handler -+ return entry; -+} ++#ifdef COMPILER2 ++ class MontgomeryMultiplyGenerator : public MacroAssembler { + -+address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, -+ address runtime_entry) { -+ assert_cond(runtime_entry != NULL); -+ address entry = __ pc(); -+ __ push(state); -+ __ call_VM(noreg, runtime_entry); -+ __ membar(MacroAssembler::AnyAny); -+ __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); -+ return entry; -+} ++ Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn, ++ Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2, Ri, Rj; + -+// Helpers for commoning out cases in the various type of method entries. -+// ++ RegSet _toSave; ++ bool _squaring; + ++ public: ++ MontgomeryMultiplyGenerator (Assembler *as, bool squaring) ++ : MacroAssembler(as->code()), _squaring(squaring) { + -+// increment invocation count & check for overflow -+// -+// Note: checking for negative value instead of overflow -+// so we have a 'sticky' overflow test -+// -+// xmethod: method -+// -+void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, -+ Label* profile_method, -+ Label* profile_method_continue) { -+ Label done; -+ // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. -+ if (TieredCompilation) { -+ int increment = InvocationCounter::count_increment; -+ Label no_mdo; -+ if (ProfileInterpreter) { -+ // Are we profiling? -+ __ ld(x10, Address(xmethod, Method::method_data_offset())); -+ __ beqz(x10, no_mdo); -+ // Increment counter in the MDO -+ const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) + -+ in_bytes(InvocationCounter::counter_offset())); -+ const Address mask(x10, in_bytes(MethodData::invoke_mask_offset())); -+ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow); -+ __ j(done); -+ } -+ __ bind(no_mdo); -+ // Increment counter in MethodCounters -+ const Address invocation_counter(t1, -+ MethodCounters::invocation_counter_offset() + -+ InvocationCounter::counter_offset()); -+ __ get_method_counters(xmethod, t1, done); -+ const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); -+ __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); -+ __ bind(done); -+ } else { // not TieredCompilation -+ const Address backedge_counter(t1, -+ MethodCounters::backedge_counter_offset() + -+ InvocationCounter::counter_offset()); -+ const Address invocation_counter(t1, -+ MethodCounters::invocation_counter_offset() + -+ InvocationCounter::counter_offset()); ++ // Register allocation + -+ __ get_method_counters(xmethod, t1, done); ++ Register reg = c_rarg0; ++ Pa_base = reg; // Argument registers ++ if (squaring) { ++ Pb_base = Pa_base; ++ } else { ++ Pb_base = ++reg; ++ } ++ Pn_base = ++reg; ++ Rlen= ++reg; ++ inv = ++reg; ++ Pm_base = ++reg; + -+ if (ProfileInterpreter) { // %%% Merge this into MethodData* -+ __ lwu(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset())); -+ __ addw(x11, x11, 1); -+ __ sw(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset())); -+ } -+ // Update standard invocation counters -+ __ lwu(x11, invocation_counter); -+ __ lwu(x10, backedge_counter); ++ // Working registers: ++ Ra = ++reg; // The current digit of a, b, n, and m. ++ Rb = ++reg; ++ Rm = ++reg; ++ Rn = ++reg; + -+ __ addw(x11, x11, InvocationCounter::count_increment); -+ __ andi(x10, x10, InvocationCounter::count_mask_value); ++ Pa = ++reg; // Pointers to the current/next digit of a, b, n, and m. ++ Pb = ++reg; ++ Pm = ++reg; ++ Pn = ++reg; + -+ __ sw(x11, invocation_counter); -+ __ addw(x10, x10, x11); // add both counters ++ tmp0 = ++reg; // Three registers which form a ++ tmp1 = ++reg; // triple-precision accumuator. ++ tmp2 = ++reg; + -+ // profile_method is non-null only for interpreted method so -+ // profile_method != NULL == !native_call ++ Ri = x6; // Inner and outer loop indexes. ++ Rj = x7; + -+ if (ProfileInterpreter && profile_method != NULL) { -+ // Test to see if we should create a method data oop -+ __ ld(t1, Address(xmethod, Method::method_counters_offset())); -+ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); -+ __ blt(x10, t1, *profile_method_continue); ++ Rhi_ab = x28; // Product registers: low and high parts ++ Rlo_ab = x29; // of a*b and m*n. ++ Rhi_mn = x30; ++ Rlo_mn = x31; + -+ // if no method data exists, go to profile_method -+ __ test_method_data_pointer(t1, *profile_method); ++ // x18 and up are callee-saved. ++ _toSave = RegSet::range(x18, reg) + Pm_base; + } + -+ { -+ __ ld(t1, Address(xmethod, Method::method_counters_offset())); -+ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset()))); -+ __ bltu(x10, t1, done); -+ __ j(*overflow); // offset is too large so we have to use j instead of bgeu here ++ private: ++ void save_regs() { ++ push_reg(_toSave, sp); + } -+ __ bind(done); -+ } -+} + -+void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { -+ __ mv(c_rarg1, zr); -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), c_rarg1); -+ __ j(do_continue); -+} ++ void restore_regs() { ++ pop_reg(_toSave, sp); ++ } + -+// See if we've got enough room on the stack for locals plus overhead -+// below JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError -+// without going through the signal handler, i.e., reserved and yellow zones -+// will not be made usable. The shadow zone must suffice to handle the -+// overflow. -+// The expression stack grows down incrementally, so the normal guard -+// page mechanism will work for that. -+// -+// NOTE: Since the additional locals are also always pushed (wasn't -+// obvious in generate_method_entry) so the guard should work for them -+// too. -+// -+// Args: -+// x13: number of additional locals this frame needs (what we must check) -+// xmethod: Method* -+// -+// Kills: -+// x10 -+void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { ++ template ++ void unroll_2(Register count, T block) { ++ Label loop, end, odd; ++ beqz(count, end); ++ andi(t0, count, 0x1); ++ bnez(t0, odd); ++ align(16); ++ bind(loop); ++ (this->*block)(); ++ bind(odd); ++ (this->*block)(); ++ addi(count, count, -2); ++ bgtz(count, loop); ++ bind(end); ++ } + -+ // monitor entry size: see picture of stack set -+ // (generate_method_entry) and frame_amd64.hpp -+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ template ++ void unroll_2(Register count, T block, Register d, Register s, Register tmp) { ++ Label loop, end, odd; ++ beqz(count, end); ++ andi(tmp, count, 0x1); ++ bnez(tmp, odd); ++ align(16); ++ bind(loop); ++ (this->*block)(d, s, tmp); ++ bind(odd); ++ (this->*block)(d, s, tmp); ++ addi(count, count, -2); ++ bgtz(count, loop); ++ bind(end); ++ } + -+ // total overhead size: entry_size + (saved fp through expr stack -+ // bottom). be sure to change this if you add/subtract anything -+ // to/from the overhead area -+ const int overhead_size = -+ -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size; ++ void pre1(RegisterOrConstant i) { ++ block_comment("pre1"); ++ // Pa = Pa_base; ++ // Pb = Pb_base + i; ++ // Pm = Pm_base; ++ // Pn = Pn_base + i; ++ // Ra = *Pa; ++ // Rb = *Pb; ++ // Rm = *Pm; ++ // Rn = *Pn; ++ if (i.is_register()) { ++ slli(t0, i.as_register(), LogBytesPerWord); ++ } else { ++ mv(t0, i.as_constant()); ++ slli(t0, t0, LogBytesPerWord); ++ } + -+ const int page_size = os::vm_page_size(); ++ mv(Pa, Pa_base); ++ add(Pb, Pb_base, t0); ++ mv(Pm, Pm_base); ++ add(Pn, Pn_base, t0); + -+ Label after_frame_check; ++ ld(Ra, Address(Pa)); ++ ld(Rb, Address(Pb)); ++ ld(Rm, Address(Pm)); ++ ld(Rn, Address(Pn)); + -+ // see if the frame is greater than one page in size. If so, -+ // then we need to verify there is enough stack space remaining -+ // for the additional locals. -+ __ mv(t0, (page_size - overhead_size) / Interpreter::stackElementSize); -+ __ bleu(x13, t0, after_frame_check); ++ // Zero the m*n result. ++ mv(Rhi_mn, zr); ++ mv(Rlo_mn, zr); ++ } + -+ // compute sp as if this were going to be the last frame on -+ // the stack before the red zone ++ // The core multiply-accumulate step of a Montgomery ++ // multiplication. The idea is to schedule operations as a ++ // pipeline so that instructions with long latencies (loads and ++ // multiplies) have time to complete before their results are ++ // used. This most benefits in-order implementations of the ++ // architecture but out-of-order ones also benefit. ++ void step() { ++ block_comment("step"); ++ // MACC(Ra, Rb, tmp0, tmp1, tmp2); ++ // Ra = *++Pa; ++ // Rb = *--Pb; ++ mulhu(Rhi_ab, Ra, Rb); ++ mul(Rlo_ab, Ra, Rb); ++ addi(Pa, Pa, wordSize); ++ ld(Ra, Address(Pa)); ++ addi(Pb, Pb, -wordSize); ++ ld(Rb, Address(Pb)); ++ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the ++ // previous iteration. ++ // MACC(Rm, Rn, tmp0, tmp1, tmp2); ++ // Rm = *++Pm; ++ // Rn = *--Pn; ++ mulhu(Rhi_mn, Rm, Rn); ++ mul(Rlo_mn, Rm, Rn); ++ addi(Pm, Pm, wordSize); ++ ld(Rm, Address(Pm)); ++ addi(Pn, Pn, -wordSize); ++ ld(Rn, Address(Pn)); ++ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); ++ } + -+ // locals + overhead, in bytes -+ __ mv(x10, overhead_size); -+ __ shadd(x10, x13, x10, t0, Interpreter::logStackElementSize); // 2 slots per parameter. ++ void post1() { ++ block_comment("post1"); + -+ const Address stack_limit(xthread, JavaThread::stack_overflow_limit_offset()); -+ __ ld(t0, stack_limit); ++ // MACC(Ra, Rb, tmp0, tmp1, tmp2); ++ // Ra = *++Pa; ++ // Rb = *--Pb; ++ mulhu(Rhi_ab, Ra, Rb); ++ mul(Rlo_ab, Ra, Rb); ++ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n ++ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); + -+#ifdef ASSERT -+ Label limit_okay; -+ // Verify that thread stack limit is non-zero. -+ __ bnez(t0, limit_okay); -+ __ stop("stack overflow limit is zero"); -+ __ bind(limit_okay); -+#endif ++ // *Pm = Rm = tmp0 * inv; ++ mul(Rm, tmp0, inv); ++ sd(Rm, Address(Pm)); + -+ // Add stack limit to locals. -+ __ add(x10, x10, t0); ++ // MACC(Rm, Rn, tmp0, tmp1, tmp2); ++ // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; ++ mulhu(Rhi_mn, Rm, Rn); + -+ // Check against the current stack bottom. -+ __ bgtu(sp, x10, after_frame_check); ++#ifndef PRODUCT ++ // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply"); ++ { ++ mul(Rlo_mn, Rm, Rn); ++ add(Rlo_mn, tmp0, Rlo_mn); ++ Label ok; ++ beqz(Rlo_mn, ok); ++ stop("broken Montgomery multiply"); ++ bind(ok); ++ } ++#endif ++ // We have very carefully set things up so that ++ // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate ++ // the lower half of Rm * Rn because we know the result already: ++ // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff ++ // tmp0 != 0. So, rather than do a mul and an cad we just set ++ // the carry flag iff tmp0 is nonzero. ++ // ++ // mul(Rlo_mn, Rm, Rn); ++ // cad(zr, tmp0, Rlo_mn); ++ addi(t0, tmp0, -1); ++ sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero ++ cadc(tmp0, tmp1, Rhi_mn, t0); ++ adc(tmp1, tmp2, zr, t0); ++ mv(tmp2, zr); ++ } + -+ // Remove the incoming args, peeling the machine SP back to where it -+ // was in the caller. This is not strictly necessary, but unless we -+ // do so the stack frame may have a garbage FP; this ensures a -+ // correct call stack that we can always unwind. The ANDI should be -+ // unnecessary because the sender SP in x30 is always aligned, but -+ // it doesn't hurt. -+ __ andi(sp, x30, -16); ++ void pre2(Register i, Register len) { ++ block_comment("pre2"); ++ // Pa = Pa_base + i-len; ++ // Pb = Pb_base + len; ++ // Pm = Pm_base + i-len; ++ // Pn = Pn_base + len; + -+ // Note: the restored frame is not necessarily interpreted. -+ // Use the shared runtime version of the StackOverflowError. -+ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); -+ __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry())); -+ -+ // all done with frame size check -+ __ bind(after_frame_check); -+} -+ -+// Allocate monitor and lock method (asm interpreter) -+// -+// Args: -+// xmethod: Method* -+// xlocals: locals -+// -+// Kills: -+// x10 -+// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs) -+// t0, t1 (temporary regs) -+void TemplateInterpreterGenerator::lock_method() { -+ // synchronize method -+ const Address access_flags(xmethod, Method::access_flags_offset()); -+ const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); -+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ sub(Rj, i, len); ++ // Rj == i-len + -+#ifdef ASSERT -+ __ lwu(x10, access_flags); -+ __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method doesn't need synchronization", false); -+#endif // ASSERT ++ // Ra as temp register ++ slli(Ra, Rj, LogBytesPerWord); ++ add(Pa, Pa_base, Ra); ++ add(Pm, Pm_base, Ra); ++ slli(Ra, len, LogBytesPerWord); ++ add(Pb, Pb_base, Ra); ++ add(Pn, Pn_base, Ra); + -+ // get synchronization object -+ { -+ Label done; -+ __ lwu(x10, access_flags); -+ __ andi(t0, x10, JVM_ACC_STATIC); -+ // get receiver (assume this is frequent case) -+ __ ld(x10, Address(xlocals, Interpreter::local_offset_in_bytes(0))); -+ __ beqz(t0, done); -+ __ load_mirror(x10, xmethod); ++ // Ra = *++Pa; ++ // Rb = *--Pb; ++ // Rm = *++Pm; ++ // Rn = *--Pn; ++ add(Pa, Pa, wordSize); ++ ld(Ra, Address(Pa)); ++ add(Pb, Pb, -wordSize); ++ ld(Rb, Address(Pb)); ++ add(Pm, Pm, wordSize); ++ ld(Rm, Address(Pm)); ++ add(Pn, Pn, -wordSize); ++ ld(Rn, Address(Pn)); + -+#ifdef ASSERT -+ { -+ Label L; -+ __ bnez(x10, L); -+ __ stop("synchronization object is NULL"); -+ __ bind(L); ++ mv(Rhi_mn, zr); ++ mv(Rlo_mn, zr); + } -+#endif // ASSERT + -+ __ bind(done); -+ } ++ void post2(Register i, Register len) { ++ block_comment("post2"); ++ sub(Rj, i, len); + -+ // add space for monitor & lock -+ __ add(sp, sp, - entry_size); // add space for a monitor entry -+ __ add(esp, esp, - entry_size); -+ __ mv(t0, esp); -+ __ sd(t0, monitor_block_top); // set new monitor block top -+ // store object -+ __ sd(x10, Address(esp, BasicObjectLock::obj_offset_in_bytes())); -+ __ mv(c_rarg1, esp); // object address -+ __ lock_object(c_rarg1); -+} ++ cad(tmp0, tmp0, Rlo_mn, t0); // The pending m*n, low part + -+// Generate a fixed interpreter frame. This is identical setup for -+// interpreted methods and for native methods hence the shared code. -+// -+// Args: -+// ra: return address -+// xmethod: Method* -+// xlocals: pointer to locals -+// xcpool: cp cache -+// stack_pointer: previous sp -+void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { -+ // initialize fixed part of activation frame -+ if (native_call) { -+ __ add(esp, sp, - 14 * wordSize); -+ __ mv(xbcp, zr); -+ __ add(sp, sp, - 14 * wordSize); -+ // add 2 zero-initialized slots for native calls -+ __ sd(zr, Address(sp, 13 * wordSize)); -+ __ sd(zr, Address(sp, 12 * wordSize)); -+ } else { -+ __ add(esp, sp, - 12 * wordSize); -+ __ ld(t0, Address(xmethod, Method::const_offset())); // get ConstMethod -+ __ add(xbcp, t0, in_bytes(ConstMethod::codes_offset())); // get codebase -+ __ add(sp, sp, - 12 * wordSize); -+ } -+ __ sd(xbcp, Address(sp, wordSize)); -+ __ sd(esp, Address(sp, 0)); ++ // As soon as we know the least significant digit of our result, ++ // store it. ++ // Pm_base[i-len] = tmp0; ++ // Rj as temp register ++ slli(Rj, Rj, LogBytesPerWord); ++ add(Rj, Pm_base, Rj); ++ sd(tmp0, Address(Rj)); + -+ if (ProfileInterpreter) { -+ Label method_data_continue; -+ __ ld(t0, Address(xmethod, Method::method_data_offset())); -+ __ beqz(t0, method_data_continue); -+ __ la(t0, Address(t0, in_bytes(MethodData::data_offset()))); -+ __ bind(method_data_continue); -+ } ++ // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; ++ cadc(tmp0, tmp1, Rhi_mn, t0); // The pending m*n, high part ++ adc(tmp1, tmp2, zr, t0); ++ mv(tmp2, zr); ++ } + -+ __ sd(xmethod, Address(sp, 7 * wordSize)); -+ __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize)); ++ // A carry in tmp0 after Montgomery multiplication means that we ++ // should subtract multiples of n from our result in m. We'll ++ // keep doing that until there is no carry. ++ void normalize(Register len) { ++ block_comment("normalize"); ++ // while (tmp0) ++ // tmp0 = sub(Pm_base, Pn_base, tmp0, len); ++ Label loop, post, again; ++ Register cnt = tmp1, i = tmp2; // Re-use registers; we're done with them now ++ beqz(tmp0, post); { ++ bind(again); { ++ mv(i, zr); ++ mv(cnt, len); ++ slli(Rn, i, LogBytesPerWord); ++ add(Rm, Pm_base, Rn); ++ ld(Rm, Address(Rm)); ++ add(Rn, Pn_base, Rn); ++ ld(Rn, Address(Rn)); ++ li(t0, 1); // set carry flag, i.e. no borrow ++ align(16); ++ bind(loop); { ++ notr(Rn, Rn); ++ add(Rm, Rm, t0); ++ add(Rm, Rm, Rn); ++ sltu(t0, Rm, Rn); ++ slli(Rn, i, LogBytesPerWord); // Rn as temp register ++ add(Rn, Pm_base, Rn); ++ sd(Rm, Address(Rn)); ++ add(i, i, 1); ++ slli(Rn, i, LogBytesPerWord); ++ add(Rm, Pm_base, Rn); ++ ld(Rm, Address(Rm)); ++ add(Rn, Pn_base, Rn); ++ ld(Rn, Address(Rn)); ++ sub(cnt, cnt, 1); ++ } bnez(cnt, loop); ++ addi(tmp0, tmp0, -1); ++ add(tmp0, tmp0, t0); ++ } bnez(tmp0, again); ++ } bind(post); ++ } + -+ // Get mirror and store it in the frame as GC root for this Method* -+#if INCLUDE_SHENANDOAHGC -+ if (UseShenandoahGC) { -+ __ load_mirror(x28, xmethod); -+ __ sd(x28, Address(sp, 4 * wordSize)); -+ } else -+#endif -+ { -+ __ load_mirror(t0, xmethod); -+ __ sd(t0, Address(sp, 4 * wordSize)); -+ } -+ __ sd(zr, Address(sp, 5 * wordSize)); ++ // Move memory at s to d, reversing words. ++ // Increments d to end of copied memory ++ // Destroys tmp1, tmp2 ++ // Preserves len ++ // Leaves s pointing to the address which was in d at start ++ void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { ++ assert(tmp1 < x28 && tmp2 < x28, "register corruption"); + -+ __ load_constant_pool_cache(xcpool, xmethod); -+ __ sd(xcpool, Address(sp, 3 * wordSize)); -+ __ sd(xlocals, Address(sp, 2 * wordSize)); ++ slli(tmp1, len, LogBytesPerWord); ++ add(s, s, tmp1); ++ mv(tmp1, len); ++ unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); ++ slli(tmp1, len, LogBytesPerWord); ++ sub(s, d, tmp1); ++ } ++ // [63...0] -> [31...0][63...32] ++ void reverse1(Register d, Register s, Register tmp) { ++ addi(s, s, -wordSize); ++ ld(tmp, Address(s)); ++ ror_imm(tmp, tmp, 32, t0); ++ sd(tmp, Address(d)); ++ addi(d, d, wordSize); ++ } + -+ __ sd(ra, Address(sp, 11 * wordSize)); -+ __ sd(fp, Address(sp, 10 * wordSize)); -+ __ la(fp, Address(sp, 12 * wordSize)); // include ra & fp ++ void step_squaring() { ++ // An extra ACC ++ step(); ++ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); ++ } + -+ // set sender sp -+ // leave last_sp as null -+ __ sd(x30, Address(sp, 9 * wordSize)); -+ __ sd(zr, Address(sp, 8 * wordSize)); ++ void last_squaring(Register i) { ++ Label dont; ++ // if ((i & 1) == 0) { ++ andi(t0, i, 0x1); ++ bnez(t0, dont); { ++ // MACC(Ra, Rb, tmp0, tmp1, tmp2); ++ // Ra = *++Pa; ++ // Rb = *--Pb; ++ mulhu(Rhi_ab, Ra, Rb); ++ mul(Rlo_ab, Ra, Rb); ++ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); ++ } bind(dont); ++ } + -+ // Move SP out of the way -+ if (!native_call) { -+ __ load_max_stack(t0, xmethod); -+ __ add(t0, t0, frame::interpreter_frame_monitor_size() + 2); -+ __ slli(t0, t0, 3); -+ __ sub(t0, sp, t0); -+ __ andi(sp, t0, -16); -+ } -+} ++ void extra_step_squaring() { ++ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n + -+// End of helpers ++ // MACC(Rm, Rn, tmp0, tmp1, tmp2); ++ // Rm = *++Pm; ++ // Rn = *--Pn; ++ mulhu(Rhi_mn, Rm, Rn); ++ mul(Rlo_mn, Rm, Rn); ++ addi(Pm, Pm, wordSize); ++ ld(Rm, Address(Pm)); ++ addi(Pn, Pn, -wordSize); ++ ld(Rn, Address(Pn)); ++ } + -+// Various method entries -+//------------------------------------------------------------------------------------------------------------------------ -+// -+// ++ void post1_squaring() { ++ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n + -+// Method entry for java.lang.ref.Reference.get. -+address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { -+ // Code: _aload_0, _getfield, _areturn -+ // parameter size = 1 -+ // -+ // The code that gets generated by this routine is split into 2 parts: -+ // 1. The "intrinsified" code for G1 (or any SATB based GC), -+ // 2. The slow path - which is an expansion of the regular method entry. -+ // -+ // Notes:- -+ // * In the G1 code we do not check whether we need to block for -+ // a safepoint. If G1 is enabled then we must execute the specialized -+ // code for Reference.get (except when the Reference object is null) -+ // so that we can log the value in the referent field with an SATB -+ // update buffer. -+ // If the code for the getfield template is modified so that the -+ // G1 pre-barrier code is executed when the current method is -+ // Reference.get() then going through the normal method entry -+ // will be fine. -+ // * The G1 code can, however, check the receiver object (the instance -+ // of java.lang.Reference) and jump to the slow path if null. If the -+ // Reference object is null then we obviously cannot fetch the referent -+ // and so we don't need to call the G1 pre-barrier. Thus we can use the -+ // regular method entry code to generate the NPE. -+ // -+ // This code is based on generate_accessor_entry. -+ // -+ // xmethod: Method* -+ // x30: senderSP must preserve for slow path, set SP to it on fast path ++ // *Pm = Rm = tmp0 * inv; ++ mul(Rm, tmp0, inv); ++ sd(Rm, Address(Pm)); + -+ // RA is live. It must be saved around calls. ++ // MACC(Rm, Rn, tmp0, tmp1, tmp2); ++ // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; ++ mulhu(Rhi_mn, Rm, Rn); + -+ address entry = __ pc(); ++#ifndef PRODUCT ++ // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply"); ++ { ++ mul(Rlo_mn, Rm, Rn); ++ add(Rlo_mn, tmp0, Rlo_mn); ++ Label ok; ++ beqz(Rlo_mn, ok); { ++ stop("broken Montgomery multiply"); ++ } bind(ok); ++ } ++#endif ++ // We have very carefully set things up so that ++ // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate ++ // the lower half of Rm * Rn because we know the result already: ++ // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff ++ // tmp0 != 0. So, rather than do a mul and a cad we just set ++ // the carry flag iff tmp0 is nonzero. ++ // ++ // mul(Rlo_mn, Rm, Rn); ++ // cad(zr, tmp, Rlo_mn); ++ addi(t0, tmp0, -1); ++ sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero ++ cadc(tmp0, tmp1, Rhi_mn, t0); ++ adc(tmp1, tmp2, zr, t0); ++ mv(tmp2, zr); ++ } + -+ const int referent_offset = java_lang_ref_Reference::referent_offset; -+ guarantee(referent_offset > 0, "referent offset not initialized"); ++ // use t0 as carry ++ void acc(Register Rhi, Register Rlo, ++ Register tmp0, Register tmp1, Register tmp2) { ++ cad(tmp0, tmp0, Rlo, t0); ++ cadc(tmp1, tmp1, Rhi, t0); ++ adc(tmp2, tmp2, zr, t0); ++ } + -+ Label slow_path; -+ const Register local_0 = c_rarg0; -+ // Check if local 0 != NULL -+ // If the receiver is null then it is OK to jump to the slow path. -+ __ ld(local_0, Address(esp, 0)); -+ __ beqz(local_0, slow_path); ++ public: ++ /** ++ * Fast Montgomery multiplication. The derivation of the ++ * algorithm is in A Cryptographic Library for the Motorola ++ * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. ++ * ++ * Arguments: ++ * ++ * Inputs for multiplication: ++ * c_rarg0 - int array elements a ++ * c_rarg1 - int array elements b ++ * c_rarg2 - int array elements n (the modulus) ++ * c_rarg3 - int length ++ * c_rarg4 - int inv ++ * c_rarg5 - int array elements m (the result) ++ * ++ * Inputs for squaring: ++ * c_rarg0 - int array elements a ++ * c_rarg1 - int array elements n (the modulus) ++ * c_rarg2 - int length ++ * c_rarg3 - int inv ++ * c_rarg4 - int array elements m (the result) ++ * ++ */ ++ address generate_multiply() { ++ Label argh, nothing; ++ bind(argh); ++ stop("MontgomeryMultiply total_allocation must be <= 8192"); + -+ __ mv(x9, x30); // Move senderSP to a callee-saved register ++ align(CodeEntryAlignment); ++ address entry = pc(); + -+ // Load the value of the referent field. -+ const Address field_address(local_0, referent_offset); -+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ t1, /*tmp2*/ t0); ++ beqz(Rlen, nothing); + -+ // areturn -+ __ andi(sp, x9, -16); // done with stack -+ __ ret(); ++ enter(); + -+ // generate a vanilla interpreter entry as the slow path -+ __ bind(slow_path); -+ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); -+ return entry; -+} ++ // Make room. ++ li(Ra, 512); ++ bgt(Rlen, Ra, argh); ++ slli(Ra, Rlen, exact_log2(4 * sizeof(jint))); ++ sub(Ra, sp, Ra); ++ andi(sp, Ra, -2 * wordSize); + -+/** -+ * Method entry for static native methods: -+ * int java.util.zip.CRC32.update(int crc, int b) -+ */ -+address TemplateInterpreterGenerator::generate_CRC32_update_entry() { -+ // TODO: Unimplemented generate_CRC32_update_entry -+ return 0; -+} ++ srliw(Rlen, Rlen, 1); // length in longwords = len/2 + -+/** -+ * Method entry for static native methods: -+ * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) -+ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) -+ */ -+address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { -+ // TODO: Unimplemented generate_CRC32_updateBytes_entry -+ return 0; -+} ++ { ++ // Copy input args, reversing as we go. We use Ra as a ++ // temporary variable. ++ reverse(Ra, Pa_base, Rlen, Ri, Rj); ++ if (!_squaring) ++ reverse(Ra, Pb_base, Rlen, Ri, Rj); ++ reverse(Ra, Pn_base, Rlen, Ri, Rj); ++ } + -+/** -+ * Method entry for intrinsic-candidate (non-native) methods: -+ * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) -+ * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end) -+ * Unlike CRC32, CRC32C does not have any methods marked as native -+ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses -+ */ -+address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { -+ // TODO: Unimplemented generate_CRC32C_updateBytes_entry -+ return 0; -+} ++ // Push all call-saved registers and also Pm_base which we'll need ++ // at the end. ++ save_regs(); + -+void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { -+ // Bang each page in the shadow zone. We can't assume it's been done for -+ // an interpreter frame with greater than a page of locals, so each page -+ // needs to be checked. Only true for non-native. -+ if (UseStackBanging) { -+ const int n_shadow_pages = checked_cast(JavaThread::stack_shadow_zone_size()) / os::vm_page_size(); -+ const int start_page = native_call ? n_shadow_pages : 1; -+ const int page_size = os::vm_page_size(); -+ for (int pages = start_page; pages <= n_shadow_pages ; pages++) { -+ __ sub(t1, sp, pages * page_size); -+ __ sd(zr, Address(t1)); -+ } -+ } -+} ++#ifndef PRODUCT ++ // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ { ++ ld(Rn, Address(Pn_base)); ++ mul(Rlo_mn, Rn, inv); ++ li(t0, -1); ++ Label ok; ++ beq(Rlo_mn, t0, ok); ++ stop("broken inverse in Montgomery multiply"); ++ bind(ok); ++ } ++#endif + -+// Interpreter stub for calling a native method. (asm interpreter) -+// This sets up a somewhat different looking stack for calling the -+// native method than the typical interpreter frame setup. -+address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { -+ // determine code generation flags -+ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ mv(Pm_base, Ra); + -+ // x11: Method* -+ // x30: sender sp ++ mv(tmp0, zr); ++ mv(tmp1, zr); ++ mv(tmp2, zr); + -+ address entry_point = __ pc(); ++ block_comment("for (int i = 0; i < len; i++) {"); ++ mv(Ri, zr); { ++ Label loop, end; ++ bge(Ri, Rlen, end); + -+ const Address constMethod (xmethod, Method::const_offset()); -+ const Address access_flags (xmethod, Method::access_flags_offset()); -+ const Address size_of_parameters(x12, ConstMethod:: -+ size_of_parameters_offset()); ++ bind(loop); ++ pre1(Ri); + -+ // get parameter size (always needed) -+ __ ld(x12, constMethod); -+ __ load_unsigned_short(x12, size_of_parameters); ++ block_comment(" for (j = i; j; j--) {"); { ++ mv(Rj, Ri); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step); ++ } block_comment(" } // j"); + -+ // Native calls don't need the stack size check since they have no -+ // expression stack and the arguments are already on the stack and -+ // we only add a handful of words to the stack. ++ post1(); ++ addw(Ri, Ri, 1); ++ blt(Ri, Rlen, loop); ++ bind(end); ++ block_comment("} // i"); ++ } + -+ // xmethod: Method* -+ // x12: size of parameters -+ // x30: sender sp ++ block_comment("for (int i = len; i < 2*len; i++) {"); ++ mv(Ri, Rlen); { ++ Label loop, end; ++ slli(t0, Rlen, 1); ++ bge(Ri, t0, end); + -+ // for natives the size of locals is zero ++ bind(loop); ++ pre2(Ri, Rlen); + -+ // compute beginning of parameters (xlocals) -+ __ shadd(xlocals, x12, esp, xlocals, 3); -+ __ addi(xlocals, xlocals, -wordSize); ++ block_comment(" for (j = len*2-i-1; j; j--) {"); { ++ slliw(Rj, Rlen, 1); ++ subw(Rj, Rj, Ri); ++ subw(Rj, Rj, 1); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step); ++ } block_comment(" } // j"); + -+ // Pull SP back to minimum size: this avoids holes in the stack -+ __ andi(sp, esp, -16); ++ post2(Ri, Rlen); ++ addw(Ri, Ri, 1); ++ slli(t0, Rlen, 1); ++ blt(Ri, t0, loop); ++ bind(end); ++ } ++ block_comment("} // i"); + -+ // initialize fixed part of activation frame -+ generate_fixed_frame(true); ++ normalize(Rlen); + -+ // make sure method is native & not abstract -+#ifdef ASSERT -+ __ lwu(x10, access_flags); -+ __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute non-native method as native", false); -+ __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter"); -+#endif ++ mv(Ra, Pm_base); // Save Pm_base in Ra ++ restore_regs(); // Restore caller's Pm_base + -+ // Since at this point in the method invocation the exception -+ // handler would try to exit the monitor of synchronized methods -+ // which hasn't been entered yet, we set the thread local variable -+ // _do_not_unlock_if_synchronized to true. The remove_activation -+ // will check this flag. ++ // Copy our result into caller's Pm_base ++ reverse(Pm_base, Ra, Rlen, Ri, Rj); + -+ const Address do_not_unlock_if_synchronized(xthread, -+ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); -+ __ mv(t1, true); -+ __ sb(t1, do_not_unlock_if_synchronized); ++ leave(); ++ bind(nothing); ++ ret(); + -+ // increment invocation count & check for overflow -+ Label invocation_counter_overflow; -+ if (inc_counter) { -+ generate_counter_incr(&invocation_counter_overflow, NULL, NULL); -+ } ++ return entry; ++ } + -+ Label continue_after_compile; -+ __ bind(continue_after_compile); ++ /** ++ * ++ * Arguments: ++ * ++ * Inputs: ++ * c_rarg0 - int array elements a ++ * c_rarg1 - int array elements n (the modulus) ++ * c_rarg2 - int length ++ * c_rarg3 - int inv ++ * c_rarg4 - int array elements m (the result) ++ * ++ */ ++ address generate_square() { ++ Label argh; ++ bind(argh); ++ stop("MontgomeryMultiply total_allocation must be <= 8192"); + -+ bang_stack_shadow_pages(true); ++ align(CodeEntryAlignment); ++ address entry = pc(); + -+ // reset the _do_not_unlock_if_synchronized flag -+ __ sb(zr, do_not_unlock_if_synchronized); ++ enter(); + -+ // check for synchronized methods -+ // Must happen AFTER invocation_counter check and stack overflow check, -+ // so method is not locked if overflows. -+ if (synchronized) { -+ lock_method(); -+ } else { -+ // no synchronization necessary -+#ifdef ASSERT -+ __ lwu(x10, access_flags); -+ __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization"); -+#endif -+ } ++ // Make room. ++ li(Ra, 512); ++ bgt(Rlen, Ra, argh); ++ slli(Ra, Rlen, exact_log2(4 * sizeof(jint))); ++ sub(Ra, sp, Ra); ++ andi(sp, Ra, -2 * wordSize); + -+ // start execution -+#ifdef ASSERT -+ __ verify_frame_setup(); -+#endif ++ srliw(Rlen, Rlen, 1); // length in longwords = len/2 + -+ // jvmti support -+ __ notify_method_entry(); ++ { ++ // Copy input args, reversing as we go. We use Ra as a ++ // temporary variable. ++ reverse(Ra, Pa_base, Rlen, Ri, Rj); ++ reverse(Ra, Pn_base, Rlen, Ri, Rj); ++ } + -+ // work registers -+ const Register t = x18; -+ const Register result_handler = x19; ++ // Push all call-saved registers and also Pm_base which we'll need ++ // at the end. ++ save_regs(); + -+ // allocate space for parameters -+ __ ld(t, Address(xmethod, Method::const_offset())); -+ __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset())); ++ mv(Pm_base, Ra); + -+ __ slli(t, t, Interpreter::logStackElementSize); -+ __ sub(x30, esp, t); -+ __ andi(sp, x30, -16); -+ __ mv(esp, x30); ++ mv(tmp0, zr); ++ mv(tmp1, zr); ++ mv(tmp2, zr); + -+ // get signature handler -+ { -+ Label L; -+ __ ld(t, Address(xmethod, Method::signature_handler_offset())); -+ __ bnez(t, L); -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::prepare_native_call), -+ xmethod); -+ __ ld(t, Address(xmethod, Method::signature_handler_offset())); -+ __ bind(L); -+ } ++ block_comment("for (int i = 0; i < len; i++) {"); ++ mv(Ri, zr); { ++ Label loop, end; ++ bind(loop); ++ bge(Ri, Rlen, end); + -+ // call signature handler -+ assert(InterpreterRuntime::SignatureHandlerGenerator::from() == xlocals, -+ "adjust this code"); -+ assert(InterpreterRuntime::SignatureHandlerGenerator::to() == sp, -+ "adjust this code"); -+ assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t0, -+ "adjust this code"); ++ pre1(Ri); + -+ // The generated handlers do not touch xmethod (the method). -+ // However, large signatures cannot be cached and are generated -+ // each time here. The slow-path generator can do a GC on return, -+ // so we must reload it after the call. -+ __ jalr(t); -+ __ get_method(xmethod); // slow path can do a GC, reload xmethod ++ block_comment("for (j = (i+1)/2; j; j--) {"); { ++ addi(Rj, Ri, 1); ++ srliw(Rj, Rj, 1); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); ++ } block_comment(" } // j"); + ++ last_squaring(Ri); + -+ // result handler is in x10 -+ // set result handler -+ __ mv(result_handler, x10); -+ // pass mirror handle if static call -+ { -+ Label L; -+ __ lwu(t, Address(xmethod, Method::access_flags_offset())); -+ __ andi(t0, t, JVM_ACC_STATIC); -+ __ beqz(t0, L); -+ // get mirror -+ __ load_mirror(t, xmethod); -+ // copy mirror into activation frame -+ __ sd(t, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize)); -+ // pass handle to mirror -+ __ addi(c_rarg1, fp, frame::interpreter_frame_oop_temp_offset * wordSize); -+ __ bind(L); -+ } ++ block_comment(" for (j = i/2; j; j--) {"); { ++ srliw(Rj, Ri, 1); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); ++ } block_comment(" } // j"); + -+ // get native function entry point in x28 -+ { -+ Label L; -+ __ ld(x28, Address(xmethod, Method::native_function_offset())); -+ address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); -+ __ mv(t1, unsatisfied); -+ __ ld(t1, t1); -+ __ bne(x28, t1, L); -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::prepare_native_call), -+ xmethod); -+ __ get_method(xmethod); -+ __ ld(x28, Address(xmethod, Method::native_function_offset())); -+ __ bind(L); -+ } ++ post1_squaring(); ++ addi(Ri, Ri, 1); ++ blt(Ri, Rlen, loop); + -+ // pass JNIEnv -+ __ add(c_rarg0, xthread, in_bytes(JavaThread::jni_environment_offset())); ++ bind(end); ++ block_comment("} // i"); ++ } + -+ // It is enough that the pc() points into the right code -+ // segment. It does not have to be the correct return pc. -+ Label native_return; -+ __ set_last_Java_frame(esp, fp, native_return, x30); ++ block_comment("for (int i = len; i < 2*len; i++) {"); ++ mv(Ri, Rlen); { ++ Label loop, end; ++ bind(loop); ++ slli(t0, Rlen, 1); ++ bge(Ri, t0, end); + -+ // change thread state -+#ifdef ASSERT -+ { -+ Label L; -+ __ lwu(t, Address(xthread, JavaThread::thread_state_offset())); -+ __ addi(t0, zr, (u1)_thread_in_Java); -+ __ beq(t, t0, L); -+ __ stop("Wrong thread state in native stub"); -+ __ bind(L); -+ } -+#endif ++ pre2(Ri, Rlen); + -+ // Change state to native -+ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); -+ __ mv(t0, _thread_in_native); -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sw(t0, Address(t1)); ++ block_comment(" for (j = (2*len-i-1)/2; j; j--) {"); { ++ slli(Rj, Rlen, 1); ++ sub(Rj, Rj, Ri); ++ sub(Rj, Rj, 1); ++ srliw(Rj, Rj, 1); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); ++ } block_comment(" } // j"); + -+ // Call the native method. -+ __ jalr(x28); -+ __ bind(native_return); -+ __ get_method(xmethod); -+ // result potentially in x10 or f10 ++ last_squaring(Ri); + -+ // make room for the pushes we're about to do -+ __ sub(t0, esp, 4 * wordSize); -+ __ andi(sp, t0, -16); ++ block_comment(" for (j = (2*len-i)/2; j; j--) {"); { ++ slli(Rj, Rlen, 1); ++ sub(Rj, Rj, Ri); ++ srliw(Rj, Rj, 1); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); ++ } block_comment(" } // j"); + -+ // NOTE: The order of these pushes is known to frame::interpreter_frame_result -+ // in order to extract the result of a method call. If the order of these -+ // pushes change or anything else is added to the stack then the code in -+ // interpreter_frame_result must also change. -+ __ push(dtos); -+ __ push(ltos); ++ post2(Ri, Rlen); ++ addi(Ri, Ri, 1); ++ slli(t0, Rlen, 1); ++ blt(Ri, t0, loop); + -+ // change thread state -+ // Force all preceding writes to be observed prior to thread state change -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ bind(end); ++ block_comment("} // i"); ++ } + -+ __ mv(t0, _thread_in_native_trans); -+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); ++ normalize(Rlen); + -+ if (os::is_MP()) { -+ if (UseMembar) { -+ // Force this write out before the read below -+ __ membar(MacroAssembler::AnyAny); -+ } else { -+ // Write serialization page so VM thread can do a pseudo remote membar. -+ // We use the current thread pointer to calculate a thread specific -+ // offset to write to within the page. This minimizes bus traffic -+ // due to cache line collision. -+ __ serialize_memory(xthread, t0, t1); ++ mv(Ra, Pm_base); // Save Pm_base in Ra ++ restore_regs(); // Restore caller's Pm_base ++ ++ // Copy our result into caller's Pm_base ++ reverse(Pm_base, Ra, Rlen, Ri, Rj); ++ ++ leave(); ++ ret(); ++ ++ return entry; + } -+ } ++ }; ++#endif // COMPILER2 + -+ // check for safepoint operation in progress and/or pending suspend requests -+ { -+ Label L, Continue; -+ __ safepoint_poll_acquire(L); -+ __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset())); -+ __ beqz(t1, Continue); -+ __ bind(L); ++ // Continuation point for throwing of implicit exceptions that are ++ // not handled in the current activation. Fabricates an exception ++ // oop and initiates normal exception dispatching in this ++ // frame. Since we need to preserve callee-saved values (currently ++ // only for C2, but done for C1 as well) we need a callee-saved oop ++ // map and therefore have to make these stubs into RuntimeStubs ++ // rather than BufferBlobs. If the compiler needs all registers to ++ // be preserved between the fault point and the exception handler ++ // then it must assume responsibility for that in ++ // AbstractCompiler::continuation_for_implicit_null_exception or ++ // continuation_for_implicit_division_by_zero_exception. All other ++ // implicit exceptions (e.g., NullPointerException or ++ // AbstractMethodError on entry) are either at call sites or ++ // otherwise assume that stack unwinding will be initiated, so ++ // caller saved registers were assumed volatile in the compiler. + -+ // Don't use call_VM as it will see a possible pending exception -+ // and forward it and never return here preventing us from -+ // clearing _last_native_pc down below. So we do a runtime call by -+ // hand. -+ // -+ __ mv(c_rarg0, xthread); -+ __ mv(t1, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)); -+ __ jalr(t1); -+ __ get_method(xmethod); -+ __ reinit_heapbase(); -+ __ bind(Continue); -+ } ++#undef __ ++#define __ masm-> + -+ // change thread state -+ // Force all preceding writes to be observed prior to thread state change -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ address generate_throw_exception(const char* name, ++ address runtime_entry, ++ Register arg1 = noreg, ++ Register arg2 = noreg) { ++ // Information about frame layout at time of blocking runtime call. ++ // Note that we only have to preserve callee-saved registers since ++ // the compilers are responsible for supplying a continuation point ++ // if they expect all registers to be preserved. ++ // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0 ++ assert_cond(runtime_entry != NULL); ++ enum layout { ++ fp_off = 0, ++ fp_off2, ++ return_off, ++ return_off2, ++ framesize // inclusive of return address ++ }; + -+ __ mv(t0, _thread_in_Java); -+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); ++ const int insts_size = 512; ++ const int locs_size = 64; + -+ // reset_last_Java_frame -+ __ reset_last_Java_frame(true); ++ CodeBuffer code(name, insts_size, locs_size); ++ OopMapSet* oop_maps = new OopMapSet(); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ assert_cond(oop_maps != NULL && masm != NULL); + -+ if (CheckJNICalls) { -+ // clear_pending_jni_exception_check -+ __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset())); -+ } ++ address start = __ pc(); + -+ // reset handle block -+ __ ld(t, Address(xthread, JavaThread::active_handles_offset())); -+ __ sd(zr, Address(t, JNIHandleBlock::top_offset_in_bytes())); ++ // This is an inlined and slightly modified version of call_VM ++ // which has the ability to fetch the return PC out of ++ // thread-local storage and also sets up last_Java_sp slightly ++ // differently than the real call_VM + -+ // If result is an oop unbox and store it in frame where gc will see it -+ // and result handler will pick it up ++ __ enter(); // Save FP and RA before call + -+ { -+ Label no_oop, not_weak, store_result; -+ __ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT))); -+ __ bne(t, result_handler, no_oop); -+ // Unbox oop result, e.g. JNIHandles::resolve result. -+ __ pop(ltos); -+ __ resolve_jobject(x10, xthread, t); -+ __ sd(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize)); -+ // keep stack depth as expected by pushing oop which will eventually be discarded -+ __ push(ltos); -+ __ bind(no_oop); -+ } ++ assert(is_even(framesize / 2), "sp not 16-byte aligned"); + -+ { -+ Label no_reguard; -+ __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset()))); -+ __ addi(t1, zr, JavaThread::stack_guard_yellow_reserved_disabled); -+ __ bne(t0, t1, no_reguard); ++ // ra and fp are already in place ++ __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog + -+ __ push_call_clobbered_registers(); ++ int frame_complete = __ pc() - start; ++ ++ // Set up last_Java_sp and last_Java_fp ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(sp, fp, the_pc, t0); ++ ++ // Call runtime ++ if (arg1 != noreg) { ++ assert(arg2 != c_rarg1, "clobbered"); ++ __ mv(c_rarg1, arg1); ++ } ++ if (arg2 != noreg) { ++ __ mv(c_rarg2, arg2); ++ } + __ mv(c_rarg0, xthread); -+ __ mv(t1, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); -+ __ jalr(t1); -+ __ pop_call_clobbered_registers(); -+ __ bind(no_reguard); -+ } ++ BLOCK_COMMENT("call runtime_entry"); ++ int32_t offset = 0; ++ __ movptr_with_offset(t0, runtime_entry, offset); ++ __ jalr(x1, t0, offset); + -+ // The method register is junk from after the thread_in_native transition -+ // until here. Also can't call_VM until the bcp has been -+ // restored. Need bcp for throwing exception below so get it now. -+ __ get_method(xmethod); ++ // Generate oop map ++ OopMap* map = new OopMap(framesize, 0); ++ assert_cond(map != NULL); + -+ // restore bcp to have legal interpreter frame, i.e., bci == 0 <=> -+ // xbcp == code_base() -+ __ ld(xbcp, Address(xmethod, Method::const_offset())); // get ConstMethod* -+ __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); // get codebase -+ // handle exceptions (exception handling will handle unlocking!) -+ { ++ oop_maps->add_gc_map(the_pc - start, map); ++ ++ __ reset_last_Java_frame(true); ++ ++ __ leave(); ++ ++ // check for pending exceptions ++#ifdef ASSERT + Label L; + __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ beqz(t0, L); -+ // Note: At some point we may want to unify this with the code -+ // used in call_VM_base(); i.e., we should use the -+ // StubRoutines::forward_exception code. For now this doesn't work -+ // here because the sp is not correctly set at this point. -+ __ MacroAssembler::call_VM(noreg, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::throw_pending_exception)); ++ __ bnez(t0, L); + __ should_not_reach_here(); + __ bind(L); -+ } -+ -+ // do unlocking if necessary -+ { -+ Label L; -+ __ lwu(t, Address(xmethod, Method::access_flags_offset())); -+ __ andi(t0, t, JVM_ACC_SYNCHRONIZED); -+ __ beqz(t0, L); -+ // the code below should be shared with interpreter macro -+ // assembler implementation -+ { -+ Label unlock; -+ // BasicObjectLock will be first in list, since this is a -+ // synchronized method. However, need to check that the object -+ // has not been unlocked by an explicit monitorexit bytecode. -+ -+ // monitor expect in c_rarg1 for slow unlock path -+ __ la(c_rarg1, Address(fp, // address of first monitor -+ (intptr_t)(frame::interpreter_frame_initial_sp_offset * -+ wordSize - sizeof(BasicObjectLock)))); -+ -+ __ ld(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); -+ __ bnez(t, unlock); ++#endif // ASSERT ++ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + -+ // Entry already unlocked, need to throw exception -+ __ MacroAssembler::call_VM(noreg, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::throw_illegal_monitor_state_exception)); -+ __ should_not_reach_here(); + -+ __ bind(unlock); -+ __ unlock_object(c_rarg1); -+ } -+ __ bind(L); ++ // codeBlob framesize is in words (not VMRegImpl::slot_size) ++ RuntimeStub* stub = ++ RuntimeStub::new_runtime_stub(name, ++ &code, ++ frame_complete, ++ (framesize >> (LogBytesPerWord - LogBytesPerInt)), ++ oop_maps, false); ++ assert(stub != NULL, "create runtime stub fail!"); ++ return stub->entry_point(); + } + -+ // jvmti support -+ // Note: This must happen _after_ handling/throwing any exceptions since -+ // the exception handler code notifies the runtime of method exits -+ // too. If this happens before, method entry/exit notifications are -+ // not properly paired (was bug - gri 11/22/99). -+ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); -+ -+ __ pop(ltos); -+ __ pop(dtos); ++ // Initialization ++ void generate_initial() { ++ // Generate initial stubs and initializes the entry points + -+ __ jalr(result_handler); ++ // entry points that exist in all platforms Note: This is code ++ // that could be shared among different platforms - however the ++ // benefit seems to be smaller than the disadvantage of having a ++ // much more complicated generator structure. See also comment in ++ // stubRoutines.hpp. + -+ // remove activation -+ __ ld(esp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp -+ // remove frame anchor -+ __ leave(); ++ StubRoutines::_forward_exception_entry = generate_forward_exception(); + -+ // restore sender sp -+ __ mv(sp, esp); ++ StubRoutines::_call_stub_entry = ++ generate_call_stub(StubRoutines::_call_stub_return_address); + -+ __ ret(); ++ // is referenced by megamorphic call ++ StubRoutines::_catch_exception_entry = generate_catch_exception(); + -+ if (inc_counter) { -+ // Handle overflow of counter and compile method -+ __ bind(invocation_counter_overflow); -+ generate_counter_overflow(continue_after_compile); ++ // Build this early so it's available for the interpreter. ++ StubRoutines::_throw_StackOverflowError_entry = ++ generate_throw_exception("StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, ++ SharedRuntime::throw_StackOverflowError)); ++ StubRoutines::_throw_delayed_StackOverflowError_entry = ++ generate_throw_exception("delayed StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, ++ SharedRuntime::throw_delayed_StackOverflowError)); ++ // Safefetch stubs. ++ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, ++ &StubRoutines::_safefetch32_fault_pc, ++ &StubRoutines::_safefetch32_continuation_pc); ++ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, ++ &StubRoutines::_safefetchN_fault_pc, ++ &StubRoutines::_safefetchN_continuation_pc); + } + -+ return entry_point; -+} ++ void generate_all() { ++ // support for verify_oop (must happen after universe_init) ++ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); ++ StubRoutines::_throw_AbstractMethodError_entry = ++ generate_throw_exception("AbstractMethodError throw_exception", ++ CAST_FROM_FN_PTR(address, ++ SharedRuntime:: ++ throw_AbstractMethodError)); + -+// -+// Generic interpreted method entry to (asm) interpreter -+// -+address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { ++ StubRoutines::_throw_IncompatibleClassChangeError_entry = ++ generate_throw_exception("IncompatibleClassChangeError throw_exception", ++ CAST_FROM_FN_PTR(address, ++ SharedRuntime:: ++ throw_IncompatibleClassChangeError)); + -+ // determine code generation flags -+ const bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ StubRoutines::_throw_NullPointerException_at_call_entry = ++ generate_throw_exception("NullPointerException at call throw_exception", ++ CAST_FROM_FN_PTR(address, ++ SharedRuntime:: ++ throw_NullPointerException_at_call)); ++ // arraycopy stubs used by compilers ++ generate_arraycopy_stubs(); + -+ // t0: sender sp -+ address entry_point = __ pc(); ++#ifdef COMPILER2 ++ if (UseMulAddIntrinsic) { ++ StubRoutines::_mulAdd = generate_mulAdd(); ++ } + -+ const Address constMethod(xmethod, Method::const_offset()); -+ const Address access_flags(xmethod, Method::access_flags_offset()); -+ const Address size_of_parameters(x13, -+ ConstMethod::size_of_parameters_offset()); -+ const Address size_of_locals(x13, ConstMethod::size_of_locals_offset()); ++ if (UseMultiplyToLenIntrinsic) { ++ StubRoutines::_multiplyToLen = generate_multiplyToLen(); ++ } + -+ // get parameter size (always needed) -+ // need to load the const method first -+ __ ld(x13, constMethod); -+ __ load_unsigned_short(x12, size_of_parameters); ++ if (UseSquareToLenIntrinsic) { ++ StubRoutines::_squareToLen = generate_squareToLen(); ++ } + -+ // x12: size of parameters ++ if (UseMontgomeryMultiplyIntrinsic) { ++ StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); ++ MontgomeryMultiplyGenerator g(_masm, /*squaring*/false); ++ StubRoutines::_montgomeryMultiply = g.generate_multiply(); ++ } + -+ __ load_unsigned_short(x13, size_of_locals); // get size of locals in words -+ __ sub(x13, x13, x12); // x13 = no. of additional locals ++ if (UseMontgomerySquareIntrinsic) { ++ StubCodeMark mark(this, "StubRoutines", "montgomerySquare"); ++ MontgomeryMultiplyGenerator g(_masm, /*squaring*/true); ++ StubRoutines::_montgomerySquare = g.generate_square(); ++ } + -+ // see if we've got enough room on the stack for locals plus overhead. -+ generate_stack_overflow_check(); ++ if (UseRVVForBigIntegerShiftIntrinsics) { ++ StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift(); ++ StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift(); ++ } ++#endif + -+ // compute beginning of parameters (xlocals) -+ __ shadd(xlocals, x12, esp, t1, 3); -+ __ add(xlocals, xlocals, -wordSize); ++ generate_compare_long_strings(); + -+ // Make room for additional locals -+ __ slli(t1, x13, 3); -+ __ sub(t0, esp, t1); ++ generate_string_indexof_stubs(); + -+ // Padding between locals and fixed part of activation frame to ensure -+ // SP is always 16-byte aligned. -+ __ andi(sp, t0, -16); ++ BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); ++ if (bs_nm != NULL) { ++ StubRoutines::riscv::_method_entry_barrier = generate_method_entry_barrier(); ++ } + -+ // x13 - # of additional locals -+ // allocate space for locals -+ // explicitly initialize locals -+ { -+ Label exit, loop; -+ __ blez(x13, exit); // do nothing if x13 <= 0 -+ __ bind(loop); -+ __ sd(zr, Address(t0)); -+ __ add(t0, t0, wordSize); -+ __ add(x13, x13, -1); // until everything initialized -+ __ bnez(x13, loop); -+ __ bind(exit); ++ StubRoutines::riscv::set_completed(); + } + -+ // And the base dispatch table -+ __ get_dispatch(); -+ -+ // initialize fixed part of activation frame -+ generate_fixed_frame(false); -+ -+ // make sure method is not native & not abstract -+#ifdef ASSERT -+ __ lwu(x10, access_flags); -+ __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute native method as non-native"); -+ __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter"); -+#endif -+ -+ // Since at this point in the method invocation the exception -+ // handler would try to exit the monitor of synchronized methods -+ // which hasn't been entered yet, we set the thread local variable -+ // _do_not_unlock_if_synchronized to true. The remove_activation -+ // will check this flag. -+ -+ const Address do_not_unlock_if_synchronized(xthread, -+ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); -+ __ mv(t1, true); -+ __ sb(t1, do_not_unlock_if_synchronized); ++ public: ++ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { ++ if (all) { ++ generate_all(); ++ } else { ++ generate_initial(); ++ } ++ } + -+ Label no_mdp; -+ const Register mdp = x13; -+ __ ld(mdp, Address(xmethod, Method::method_data_offset())); -+ __ beqz(mdp, no_mdp); -+ __ add(mdp, mdp, in_bytes(MethodData::data_offset())); -+ __ profile_parameters_type(mdp, x11, x12, x14); // use x11, x12, x14 as tmp registers -+ __ bind(no_mdp); ++ ~StubGenerator() {} ++}; // end class declaration + -+ // increment invocation count & check for overflow -+ Label invocation_counter_overflow; -+ Label profile_method; -+ Label profile_method_continue; -+ if (inc_counter) { -+ generate_counter_incr(&invocation_counter_overflow, -+ &profile_method, -+ &profile_method_continue); -+ if (ProfileInterpreter) { -+ __ bind(profile_method_continue); -+ } ++#define UCM_TABLE_MAX_ENTRIES 8 ++void StubGenerator_generate(CodeBuffer* code, bool all) { ++ if (UnsafeCopyMemory::_table == NULL) { ++ UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); + } + -+ Label continue_after_compile; -+ __ bind(continue_after_compile); ++ StubGenerator g(code, all); ++} +diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp +new file mode 100644 +index 00000000000..395a2d338e4 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp +@@ -0,0 +1,58 @@ ++/* ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ bang_stack_shadow_pages(false); ++#include "precompiled.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "utilities/globalDefinitions.hpp" + -+ // reset the _do_not_unlock_if_synchronized flag -+ __ sb(zr, do_not_unlock_if_synchronized); ++// Implementation of the platform-specific part of StubRoutines - for ++// a description of how to extend it, see the stubRoutines.hpp file. + -+ // check for synchronized methods -+ // Must happen AFTER invocation_counter check and stack overflow check, -+ // so method is not locked if overflows. -+ if (synchronized) { -+ // Allocate monitor and lock method -+ lock_method(); -+ } else { -+ // no synchronization necessary -+#ifdef ASSERT -+ __ lwu(x10, access_flags); -+ __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization"); -+#endif -+ } ++address StubRoutines::riscv::_get_previous_sp_entry = NULL; + -+ // start execution -+#ifdef ASSERT -+ __ verify_frame_setup(); -+#endif ++address StubRoutines::riscv::_f2i_fixup = NULL; ++address StubRoutines::riscv::_f2l_fixup = NULL; ++address StubRoutines::riscv::_d2i_fixup = NULL; ++address StubRoutines::riscv::_d2l_fixup = NULL; ++address StubRoutines::riscv::_float_sign_mask = NULL; ++address StubRoutines::riscv::_float_sign_flip = NULL; ++address StubRoutines::riscv::_double_sign_mask = NULL; ++address StubRoutines::riscv::_double_sign_flip = NULL; ++address StubRoutines::riscv::_zero_blocks = NULL; ++address StubRoutines::riscv::_compare_long_string_LL = NULL; ++address StubRoutines::riscv::_compare_long_string_UU = NULL; ++address StubRoutines::riscv::_compare_long_string_LU = NULL; ++address StubRoutines::riscv::_compare_long_string_UL = NULL; ++address StubRoutines::riscv::_string_indexof_linear_ll = NULL; ++address StubRoutines::riscv::_string_indexof_linear_uu = NULL; ++address StubRoutines::riscv::_string_indexof_linear_ul = NULL; ++address StubRoutines::riscv::_large_byte_array_inflate = NULL; ++address StubRoutines::riscv::_method_entry_barrier = NULL; + -+ // jvmti support -+ __ notify_method_entry(); ++bool StubRoutines::riscv::_completed = false; +diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp +new file mode 100644 +index 00000000000..51f07819c33 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp +@@ -0,0 +1,161 @@ ++/* ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ __ dispatch_next(vtos); ++#ifndef CPU_RISCV_STUBROUTINES_RISCV_HPP ++#define CPU_RISCV_STUBROUTINES_RISCV_HPP + -+ // invocation counter overflow -+ if (inc_counter) { -+ if (ProfileInterpreter) { -+ // We have decided to profile this method in the interpreter -+ __ bind(profile_method); -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); -+ __ set_method_data_pointer_for_bcp(); -+ // don't think we need this -+ __ get_method(x11); -+ __ jal(profile_method_continue); -+ } -+ // Handle overflow of counter and compile method -+ __ bind(invocation_counter_overflow); -+ generate_counter_overflow(continue_after_compile); -+ } ++// This file holds the platform specific parts of the StubRoutines ++// definition. See stubRoutines.hpp for a description on how to ++// extend it. + -+ return entry_point; ++static bool returns_to_call_stub(address return_pc) { ++ return return_pc == _call_stub_return_address; +} + -+//----------------------------------------------------------------------------- -+// Exceptions ++enum platform_dependent_constants { ++ code_size1 = 19000, // simply increase if too small (assembler will crash if too small) ++ code_size2 = 28000 // simply increase if too small (assembler will crash if too small) ++}; + -+void TemplateInterpreterGenerator::generate_throw_exception() { -+ // Entry point in previous activation (i.e., if the caller was -+ // interpreted) -+ Interpreter::_rethrow_exception_entry = __ pc(); -+ // Restore sp to interpreter_frame_last_sp even though we are going -+ // to empty the expression stack for the exception processing. -+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); -+ // x10: exception -+ // x13: return address/pc that threw exception -+ __ restore_bcp(); // xbcp points to call/send -+ __ restore_locals(); -+ __ restore_constant_pool_cache(); -+ __ reinit_heapbase(); // restore xheapbase as heapbase. -+ __ get_dispatch(); ++class riscv { ++ friend class StubGenerator; + -+ // Entry point for exceptions thrown within interpreter code -+ Interpreter::_throw_exception_entry = __ pc(); -+ // If we came here via a NullPointerException on the receiver of a -+ // method, xthread may be corrupt. -+ __ get_method(xmethod); -+ // expression stack is undefined here -+ // x10: exception -+ // xbcp: exception bcp -+ __ verify_oop(x10); -+ __ mv(c_rarg1, x10); ++ private: ++ static address _get_previous_sp_entry; + -+ // expression stack must be empty before entering the VM in case of -+ // an exception -+ __ empty_expression_stack(); -+ // find exception handler address and preserve exception oop -+ __ call_VM(x13, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::exception_handler_for_exception), -+ c_rarg1); ++ static address _f2i_fixup; ++ static address _f2l_fixup; ++ static address _d2i_fixup; ++ static address _d2l_fixup; + -+ // Calculate stack limit -+ __ ld(t0, Address(xmethod, Method::const_offset())); -+ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); -+ __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4); -+ __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); -+ __ slli(t0, t0, 3); -+ __ sub(t0, t1, t0); -+ __ andi(sp, t0, -16); ++ static address _float_sign_mask; ++ static address _float_sign_flip; ++ static address _double_sign_mask; ++ static address _double_sign_flip; + -+ // x10: exception handler entry point -+ // x13: preserved exception oop -+ // xbcp: bcp for exception handler -+ __ push_ptr(x13); // push exception which is now the only value on the stack -+ __ jr(x10); // jump to exception handler (may be _remove_activation_entry!) ++ static address _zero_blocks; + -+ // If the exception is not handled in the current frame the frame is -+ // removed and the exception is rethrown (i.e. exception -+ // continuation is _rethrow_exception). -+ // -+ // Note: At this point the bci is still the bxi for the instruction -+ // which caused the exception and the expression stack is -+ // empty. Thus, for any VM calls at this point, GC will find a legal -+ // oop map (with empty expression stack). ++ static address _compare_long_string_LL; ++ static address _compare_long_string_LU; ++ static address _compare_long_string_UL; ++ static address _compare_long_string_UU; ++ static address _string_indexof_linear_ll; ++ static address _string_indexof_linear_uu; ++ static address _string_indexof_linear_ul; ++ static address _large_byte_array_inflate; + -+ // -+ // JVMTI PopFrame support -+ // ++ static address _method_entry_barrier; + -+ Interpreter::_remove_activation_preserving_args_entry = __ pc(); -+ __ empty_expression_stack(); -+ // Set the popframe_processing bit in pending_popframe_condition -+ // indicating that we are currently handling popframe, so that -+ // call_VMs that may happen later do not trigger new popframe -+ // handling cycles. -+ __ lwu(x13, Address(xthread, JavaThread::popframe_condition_offset())); -+ __ ori(x13, x13, JavaThread::popframe_processing_bit); -+ __ sw(x13, Address(xthread, JavaThread::popframe_condition_offset())); ++ static bool _completed; + -+ { -+ // Check to see whether we are returning to a deoptimized frame. -+ // (The PopFrame call ensures that the caller of the popped frame is -+ // either interpreted or compiled and deoptimizes it if compiled.) -+ // In this case, we can't call dispatch_next() after the frame is -+ // popped, but instead must save the incoming arguments and restore -+ // them after deoptimization has occurred. -+ // -+ // Note that we don't compare the return PC against the -+ // deoptimization blob's unpack entry because of the presence of -+ // adapter frames in C2. -+ Label caller_not_deoptimized; -+ __ ld(c_rarg1, Address(fp, frame::return_addr_offset * wordSize)); -+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), c_rarg1); -+ __ bnez(x10, caller_not_deoptimized); ++ public: + -+ // Compute size of arguments for saving when returning to -+ // deoptimized caller -+ __ get_method(x10); -+ __ ld(x10, Address(x10, Method::const_offset())); -+ __ load_unsigned_short(x10, Address(x10, in_bytes(ConstMethod:: -+ size_of_parameters_offset()))); -+ __ slli(x10, x10, Interpreter::logStackElementSize); -+ __ restore_locals(); -+ __ sub(xlocals, xlocals, x10); -+ __ add(xlocals, xlocals, wordSize); -+ // Save these arguments -+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, -+ Deoptimization:: -+ popframe_preserve_args), -+ xthread, x10, xlocals); ++ static address get_previous_sp_entry() { ++ return _get_previous_sp_entry; ++ } + -+ __ remove_activation(vtos, -+ /* throw_monitor_exception */ false, -+ /* install_monitor_exception */ false, -+ /* notify_jvmdi */ false); ++ static address f2i_fixup() { ++ return _f2i_fixup; ++ } + -+ // Inform deoptimization that it is responsible for restoring -+ // these arguments -+ __ mv(t0, JavaThread::popframe_force_deopt_reexecution_bit); -+ __ sw(t0, Address(xthread, JavaThread::popframe_condition_offset())); ++ static address f2l_fixup() { ++ return _f2l_fixup; ++ } + -+ // Continue in deoptimization handler -+ __ ret(); ++ static address d2i_fixup() { ++ return _d2i_fixup; ++ } + -+ __ bind(caller_not_deoptimized); ++ static address d2l_fixup() { ++ return _d2l_fixup; + } + -+ __ remove_activation(vtos, -+ /* throw_monitor_exception */ false, -+ /* install_monitor_exception */ false, -+ /* notify_jvmdi */ false); ++ static address float_sign_mask() { ++ return _float_sign_mask; ++ } + -+ // Restore the last_sp and null it out -+ __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); -+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ static address float_sign_flip() { ++ return _float_sign_flip; ++ } + -+ __ restore_bcp(); -+ __ restore_locals(); -+ __ restore_constant_pool_cache(); -+ __ get_method(xmethod); -+ __ get_dispatch(); ++ static address double_sign_mask() { ++ return _double_sign_mask; ++ } + -+ // The method data pointer was incremented already during -+ // call profiling. We have to restore the mdp for the current bcp. -+ if (ProfileInterpreter) { -+ __ set_method_data_pointer_for_bcp(); ++ static address double_sign_flip() { ++ return _double_sign_flip; + } + -+ // Clear the popframe condition flag -+ __ sw(zr, Address(xthread, JavaThread::popframe_condition_offset())); -+ assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive"); ++ static address zero_blocks() { ++ return _zero_blocks; ++ } + -+#if INCLUDE_JVMTI -+ { -+ Label L_done; ++ static address compare_long_string_LL() { ++ return _compare_long_string_LL; ++ } + -+ __ lbu(t0, Address(xbcp, 0)); -+ __ mv(t1, Bytecodes::_invokestatic); -+ __ bne(t1, t0, L_done); ++ static address compare_long_string_LU() { ++ return _compare_long_string_LU; ++ } + -+ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. -+ // Detect such a case in the InterpreterRuntime function and return the member name argument,or NULL. ++ static address compare_long_string_UL() { ++ return _compare_long_string_UL; ++ } + -+ __ ld(c_rarg0, Address(xlocals, 0)); -+ __ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null),c_rarg0, xmethod, xbcp); ++ static address compare_long_string_UU() { ++ return _compare_long_string_UU; ++ } + -+ __ beqz(x10, L_done); ++ static address string_indexof_linear_ul() { ++ return _string_indexof_linear_ul; ++ } + -+ __ sd(x10, Address(esp, 0)); -+ __ bind(L_done); ++ static address string_indexof_linear_ll() { ++ return _string_indexof_linear_ll; + } -+#endif // INCLUDE_JVMTI + -+ // Restore machine SP -+ __ ld(t0, Address(xmethod, Method::const_offset())); -+ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); -+ __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4); -+ __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); -+ __ slliw(t0, t0, 3); -+ __ sub(t0, t1, t0); -+ __ andi(sp, t0, -16); ++ static address string_indexof_linear_uu() { ++ return _string_indexof_linear_uu; ++ } + -+ __ dispatch_next(vtos); -+ // end of PopFrame support ++ static address large_byte_array_inflate() { ++ return _large_byte_array_inflate; ++ } + -+ Interpreter::_remove_activation_entry = __ pc(); ++ static address method_entry_barrier() { ++ return _method_entry_barrier; ++ } + -+ // preserve exception over this code sequence -+ __ pop_ptr(x10); -+ __ sd(x10, Address(xthread, JavaThread::vm_result_offset())); -+ // remove the activation (without doing throws on illegalMonitorExceptions) -+ __ remove_activation(vtos, false, true, false); -+ // restore exception -+ __ get_vm_result(x10, xthread); ++ static bool complete() { ++ return _completed; ++ } + -+ // In between activations - previous activation type unknown yet -+ // compute continuation point - the continuation point expects the -+ // following registers set up: -+ // -+ // x10: exception -+ // ra: return address/pc that threw exception -+ // sp: expression stack of caller -+ // fp: fp of caller -+ // FIXME: There's no point saving RA here because VM calls don't trash it -+ __ sub(sp, sp, 2 * wordSize); -+ __ sd(x10, Address(sp, 0)); // save exception -+ __ sd(ra, Address(sp, wordSize)); // save return address -+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, -+ SharedRuntime::exception_handler_for_return_address), -+ xthread, ra); -+ __ mv(x11, x10); // save exception handler -+ __ ld(x10, Address(sp, 0)); // restore exception -+ __ ld(ra, Address(sp, wordSize)); // restore return address -+ __ add(sp, sp, 2 * wordSize); -+ // We might be returning to a deopt handler that expects x13 to -+ // contain the exception pc -+ __ mv(x13, ra); -+ // Note that an "issuing PC" is actually the next PC after the call -+ __ jr(x11); // jump to exception -+ // handler of caller -+} -+ -+// -+// JVMTI ForceEarlyReturn support -+// -+address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { -+ address entry = __ pc(); -+ -+ __ restore_bcp(); -+ __ restore_locals(); -+ __ empty_expression_stack(); -+ __ load_earlyret_value(state); -+ -+ __ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); -+ Address cond_addr(t0, JvmtiThreadState::earlyret_state_offset()); -+ -+ // Clear the earlyret state -+ assert(JvmtiThreadState::earlyret_inactive == 0, "should be"); -+ __ sd(zr, cond_addr); -+ -+ __ remove_activation(state, -+ false, /* throw_monitor_exception */ -+ false, /* install_monitor_exception */ -+ true); /* notify_jvmdi */ -+ __ ret(); -+ -+ return entry; -+} -+// end of ForceEarlyReturn support -+ -+//----------------------------------------------------------------------------- -+// Helper for vtos entry point generation -+ -+void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, -+ address& bep, -+ address& cep, -+ address& sep, -+ address& aep, -+ address& iep, -+ address& lep, -+ address& fep, -+ address& dep, -+ address& vep) { -+ assert(t != NULL && t->is_valid() && t->tos_in() == vtos, "illegal template"); -+ Label L; -+ aep = __ pc(); __ push_ptr(); __ j(L); -+ fep = __ pc(); __ push_f(); __ j(L); -+ dep = __ pc(); __ push_d(); __ j(L); -+ lep = __ pc(); __ push_l(); __ j(L); -+ bep = cep = sep = -+ iep = __ pc(); __ push_i(); -+ vep = __ pc(); -+ __ bind(L); -+ generate_and_dispatch(t); -+} -+ -+//----------------------------------------------------------------------------- -+ -+// Non-product code -+#ifndef PRODUCT -+address TemplateInterpreterGenerator::generate_trace_code(TosState state) { -+ address entry = __ pc(); -+ -+ __ push_reg(ra); -+ __ push(state); -+ __ push_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp); -+ __ mv(c_rarg2, x10); // Pass itos -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), c_rarg1, c_rarg2, c_rarg3); -+ __ pop_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp); -+ __ pop(state); -+ __ pop_reg(ra); -+ __ ret(); // return from result handler -+ -+ return entry; -+} -+ -+void TemplateInterpreterGenerator::count_bytecode() { -+ __ push_reg(t0); -+ __ push_reg(x10); -+ __ mv(x10, (address) &BytecodeCounter::_counter_value); -+ __ mv(t0, 1); -+ __ amoadd_d(zr, x10, t0, Assembler::aqrl); -+ __ pop_reg(x10); -+ __ pop_reg(t0); -+} -+ -+void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; } -+ -+void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; } -+ -+void TemplateInterpreterGenerator::trace_bytecode(Template* t) { -+ // Call a little run-time stub to avoid blow-up for each bytecode. -+ // The run-time runtime saves the right registers, depending on -+ // the tosca in-state for the given template. -+ -+ assert(Interpreter::trace_code(t->tos_in()) != NULL, "entry must have been generated"); -+ __ jal(Interpreter::trace_code(t->tos_in())); -+ __ reinit_heapbase(); -+} -+ -+void TemplateInterpreterGenerator::stop_interpreter_at() { -+ Label L; -+ __ push_reg(t0); -+ __ mv(t0, (address) &BytecodeCounter::_counter_value); -+ __ ld(t0, Address(t0)); -+ __ mv(t1, StopInterpreterAt); -+ __ bne(t0, t1, L); -+ __ ebreak(); -+ __ bind(L); -+ __ pop_reg(t0); -+} ++ static void set_completed() { ++ _completed = true; ++ } ++}; + -+#endif // !PRODUCT -diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp new file mode 100644 -index 000000000..8e6e7dee5 +index 00000000000..6537b2dbd94 --- /dev/null -+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -@@ -0,0 +1,4028 @@ ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -0,0 +1,1794 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -49225,6468 +49961,5737 @@ index 000000000..8e6e7dee5 +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/bytecodeTracer.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/templateInterpreterGenerator.hpp" +#include "interpreter/templateTable.hpp" -+#include "memory/universe.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/arrayOop.hpp" +#include "oops/method.hpp" +#include "oops/methodData.hpp" -+#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" -+#include "prims/methodHandles.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" ++#include "runtime/jniHandles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" ++#include "runtime/timer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++#include "utilities/powerOfTwo.hpp" ++#include + -+#define __ _masm-> -+ -+// Platform-dependent initialization -+ -+void TemplateTable::pd_initialize() { -+ // No riscv specific initialization -+} -+ -+// Address computation: local variables ++#ifndef PRODUCT ++#include "oops/method.hpp" ++#endif // !PRODUCT + -+static inline Address iaddress(int n) { -+ return Address(xlocals, Interpreter::local_offset_in_bytes(n)); -+} ++// Size of interpreter code. Increase if too small. Interpreter will ++// fail with a guarantee ("not enough space for interpreter generation"); ++// if too small. ++// Run with +PrintInterpreter to get the VM to print out the size. ++// Max size with JVMTI ++int TemplateInterpreter::InterpreterCodeSize = 256 * 1024; + -+static inline Address laddress(int n) { -+ return iaddress(n + 1); -+} ++#define __ _masm-> + -+static inline Address faddress(int n) { -+ return iaddress(n); -+} ++//----------------------------------------------------------------------------- + -+static inline Address daddress(int n) { -+ return laddress(n); -+} ++address TemplateInterpreterGenerator::generate_slow_signature_handler() { ++ address entry = __ pc(); + -+static inline Address aaddress(int n) { -+ return iaddress(n); -+} ++ __ andi(esp, esp, -16); ++ __ mv(c_rarg3, esp); ++ // xmethod ++ // xlocals ++ // c_rarg3: first stack arg - wordSize ++ // adjust sp + -+static inline Address iaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { -+ _masm->shadd(temp, r, xlocals, temp, 3); -+ return Address(temp, 0); -+} ++ __ addi(sp, c_rarg3, -18 * wordSize); ++ __ addi(sp, sp, -2 * wordSize); ++ __ sd(ra, Address(sp, 0)); + -+static inline Address laddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { -+ _masm->shadd(temp, r, xlocals, temp, 3); -+ return Address(temp, Interpreter::local_offset_in_bytes(1));; -+} ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::slow_signature_handler), ++ xmethod, xlocals, c_rarg3); + -+static inline Address faddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { -+ return iaddress(r, temp, _masm); -+} ++ // x10: result handler + -+static inline Address daddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { -+ return laddress(r, temp, _masm); -+} ++ // Stack layout: ++ // sp: return address <- sp ++ // 1 garbage ++ // 8 integer args (if static first is unused) ++ // 1 float/double identifiers ++ // 8 double args ++ // stack args <- esp ++ // garbage ++ // expression stack bottom ++ // bcp (NULL) ++ // ... + -+static inline Address aaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { -+ return iaddress(r, temp, _masm); -+} ++ // Restore ra ++ __ ld(ra, Address(sp, 0)); ++ __ addi(sp, sp , 2 * wordSize); + -+// At top of Java expression stack which may be different than esp(). It -+// isn't for category 1 objects. -+static inline Address at_tos () { -+ return Address(esp, Interpreter::expr_offset_in_bytes(0)); -+} ++ // Do FP first so we can use c_rarg3 as temp ++ __ lwu(c_rarg3, Address(sp, 9 * wordSize)); // float/double identifiers + -+static inline Address at_tos_p1() { -+ return Address(esp, Interpreter::expr_offset_in_bytes(1)); -+} ++ for (int i = 0; i < Argument::n_float_register_parameters_c; i++) { ++ const FloatRegister r = g_FPArgReg[i]; ++ Label d, done; + -+static inline Address at_tos_p2() { -+ return Address(esp, Interpreter::expr_offset_in_bytes(2)); -+} ++ __ andi(t0, c_rarg3, 1UL << i); ++ __ bnez(t0, d); ++ __ flw(r, Address(sp, (10 + i) * wordSize)); ++ __ j(done); ++ __ bind(d); ++ __ fld(r, Address(sp, (10 + i) * wordSize)); ++ __ bind(done); ++ } + -+static inline Address at_tos_p3() { -+ return Address(esp, Interpreter::expr_offset_in_bytes(3)); -+} ++ // c_rarg0 contains the result from the call of ++ // InterpreterRuntime::slow_signature_handler so we don't touch it ++ // here. It will be loaded with the JNIEnv* later. ++ for (int i = 1; i < Argument::n_int_register_parameters_c; i++) { ++ const Register rm = g_INTArgReg[i]; ++ __ ld(rm, Address(sp, i * wordSize)); ++ } + -+static inline Address at_tos_p4() { -+ return Address(esp, Interpreter::expr_offset_in_bytes(4)); -+} ++ __ addi(sp, sp, 18 * wordSize); ++ __ ret(); + -+static inline Address at_tos_p5() { -+ return Address(esp, Interpreter::expr_offset_in_bytes(5)); ++ return entry; +} + -+// Miscelaneous helper routines -+// Store an oop (or NULL) at the Address described by obj. -+// If val == noreg this means store a NULL -+static void do_oop_store(InterpreterMacroAssembler* _masm, -+ Address dst, -+ Register val, -+ DecoratorSet decorators) { -+ assert(val == noreg || val == x10, "parameter is just for looks"); -+ __ store_heap_oop(dst, val, x29, x11, x13, decorators); -+} ++// Various method entries ++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { ++ // xmethod: Method* ++ // x30: sender sp ++ // esp: args + -+static void do_oop_load(InterpreterMacroAssembler* _masm, -+ Address src, -+ Register dst, -+ DecoratorSet decorators) { -+ __ load_heap_oop(dst, src, x7, x11, decorators); -+} ++ if (!InlineIntrinsics) { ++ return NULL; // Generate a vanilla entry ++ } + -+Address TemplateTable::at_bcp(int offset) { -+ assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); -+ return Address(xbcp, offset); -+} ++ // These don't need a safepoint check because they aren't virtually ++ // callable. We won't enter these intrinsics from compiled code. ++ // If in the future we added an intrinsic which was virtually callable ++ // we'd have to worry about how to safepoint so that this code is used. + -+void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, -+ Register temp_reg, bool load_bc_into_bc_reg/*=true*/, -+ int byte_no) -+{ -+ if (!RewriteBytecodes) { return; } -+ Label L_patch_done; ++ // mathematical functions inlined by compiler ++ // (interpreter must provide identical implementation ++ // in order to avoid monotonicity bugs when switching ++ // from interpreter to compiler in the middle of some ++ // computation) ++ // ++ // stack: ++ // [ arg ] <-- esp ++ // [ arg ] ++ // retaddr in ra + -+ switch (bc) { -+ case Bytecodes::_fast_aputfield: // fall through -+ case Bytecodes::_fast_bputfield: // fall through -+ case Bytecodes::_fast_zputfield: // fall through -+ case Bytecodes::_fast_cputfield: // fall through -+ case Bytecodes::_fast_dputfield: // fall through -+ case Bytecodes::_fast_fputfield: // fall through -+ case Bytecodes::_fast_iputfield: // fall through -+ case Bytecodes::_fast_lputfield: // fall through -+ case Bytecodes::_fast_sputfield: { -+ // We skip bytecode quickening for putfield instructions when -+ // the put_code written to the constant pool cache is zero. -+ // This is required so that every execution of this instruction -+ // calls out to InterpreterRuntime::resolve_get_put to do -+ // additional, required work. -+ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); -+ assert(load_bc_into_bc_reg, "we use bc_reg as temp"); -+ __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1); -+ __ mv(bc_reg, bc); -+ __ beqz(temp_reg, L_patch_done); ++ address fn = NULL; ++ address entry_point = NULL; ++ Register continuation = ra; ++ switch (kind) { ++ case Interpreter::java_lang_math_abs: ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ fabs_d(f10, f10); ++ __ mv(sp, x30); // Restore caller's SP + break; -+ } -+ default: -+ assert(byte_no == -1, "sanity"); -+ // the pair bytecodes have already done the load. -+ if (load_bc_into_bc_reg) { -+ __ mv(bc_reg, bc); ++ case Interpreter::java_lang_math_sqrt: ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ fsqrt_d(f10, f10); ++ __ mv(sp, x30); ++ break; ++ case Interpreter::java_lang_math_sin : ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ mv(sp, x30); ++ __ mv(x9, ra); ++ continuation = x9; // The first callee-saved register ++ if (StubRoutines::dsin() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin()); ++ } ++ __ mv(t0, fn); ++ __ jalr(t0); ++ break; ++ case Interpreter::java_lang_math_cos : ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ mv(sp, x30); ++ __ mv(x9, ra); ++ continuation = x9; // The first callee-saved register ++ if (StubRoutines::dcos() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos()); ++ } ++ __ mv(t0, fn); ++ __ jalr(t0); ++ break; ++ case Interpreter::java_lang_math_tan : ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ mv(sp, x30); ++ __ mv(x9, ra); ++ continuation = x9; // The first callee-saved register ++ if (StubRoutines::dtan() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan()); ++ } ++ __ mv(t0, fn); ++ __ jalr(t0); ++ break; ++ case Interpreter::java_lang_math_log : ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ mv(sp, x30); ++ __ mv(x9, ra); ++ continuation = x9; // The first callee-saved register ++ if (StubRoutines::dlog() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog()); ++ } ++ __ mv(t0, fn); ++ __ jalr(t0); ++ break; ++ case Interpreter::java_lang_math_log10 : ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ mv(sp, x30); ++ __ mv(x9, ra); ++ continuation = x9; // The first callee-saved register ++ if (StubRoutines::dlog10() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10()); ++ } ++ __ mv(t0, fn); ++ __ jalr(t0); ++ break; ++ case Interpreter::java_lang_math_exp : ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ mv(sp, x30); ++ __ mv(x9, ra); ++ continuation = x9; // The first callee-saved register ++ if (StubRoutines::dexp() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp()); ++ } ++ __ mv(t0, fn); ++ __ jalr(t0); ++ break; ++ case Interpreter::java_lang_math_pow : ++ entry_point = __ pc(); ++ __ mv(x9, ra); ++ continuation = x9; ++ __ fld(f10, Address(esp, 2 * Interpreter::stackElementSize)); ++ __ fld(f11, Address(esp)); ++ __ mv(sp, x30); ++ if (StubRoutines::dpow() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow()); ++ } ++ __ mv(t0, fn); ++ __ jalr(t0); ++ break; ++ case Interpreter::java_lang_math_fmaD : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp, 4 * Interpreter::stackElementSize)); ++ __ fld(f11, Address(esp, 2 * Interpreter::stackElementSize)); ++ __ fld(f12, Address(esp)); ++ __ fmadd_d(f10, f10, f11, f12); ++ __ mv(sp, x30); // Restore caller's SP ++ } ++ break; ++ case Interpreter::java_lang_math_fmaF : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ flw(f10, Address(esp, 2 * Interpreter::stackElementSize)); ++ __ flw(f11, Address(esp, Interpreter::stackElementSize)); ++ __ flw(f12, Address(esp)); ++ __ fmadd_s(f10, f10, f11, f12); ++ __ mv(sp, x30); // Restore caller's SP + } ++ break; ++ default: ++ ; + } -+ -+ if (JvmtiExport::can_post_breakpoint()) { -+ Label L_fast_patch; -+ // if a breakpoint is present we can't rewrite the stream directly -+ __ load_unsigned_byte(temp_reg, at_bcp(0)); -+ __ addi(temp_reg, temp_reg, -Bytecodes::_breakpoint); // temp_reg is temporary register. -+ __ bnez(temp_reg, L_fast_patch); -+ // Let breakpoint table handling rewrite to quicker bytecode -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), xmethod, xbcp, bc_reg); -+ __ j(L_patch_done); -+ __ bind(L_fast_patch); ++ if (entry_point != NULL) { ++ __ jr(continuation); + } + -+#ifdef ASSERT -+ Label L_okay; -+ __ load_unsigned_byte(temp_reg, at_bcp(0)); -+ __ beq(temp_reg, bc_reg, L_okay); -+ __ addi(temp_reg, temp_reg, -(int) Bytecodes::java_code(bc)); -+ __ beqz(temp_reg, L_okay); -+ __ stop("patching the wrong bytecode"); -+ __ bind(L_okay); -+#endif -+ -+ // patch bytecode -+ __ sb(bc_reg, at_bcp(0)); -+ __ bind(L_patch_done); ++ return entry_point; +} + -+// Individual instructions ++// Abstract method entry ++// Attempt to execute abstract method. Throw exception ++address TemplateInterpreterGenerator::generate_abstract_entry(void) { ++ // xmethod: Method* ++ // x30: sender SP + -+void TemplateTable::nop() { -+ transition(vtos, vtos); -+ // nothing to do -+} ++ address entry_point = __ pc(); + -+void TemplateTable::shouldnotreachhere() { -+ transition(vtos, vtos); -+ __ stop("should not reach here bytecode"); -+} ++ // abstract method entry + -+void TemplateTable::aconst_null() -+{ -+ transition(vtos, atos); -+ __ mv(x10, zr); -+} ++ // pop return address, reset last_sp to NULL ++ __ empty_expression_stack(); ++ __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) ++ __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) + -+void TemplateTable::iconst(int value) -+{ -+ transition(vtos, itos); -+ __ mv(x10, value); -+} ++ // throw exception ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_AbstractMethodErrorWithMethod), ++ xmethod); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); + -+void TemplateTable::lconst(int value) -+{ -+ transition(vtos, ltos); -+ __ mv(x10, value); ++ return entry_point; +} + -+void TemplateTable::fconst(int value) -+{ -+ transition(vtos, ftos); -+ static float fBuf[2] = {1.0, 2.0}; -+ __ mv(t0, (intptr_t)fBuf); -+ switch (value) { -+ case 0: -+ __ fmv_w_x(f10, zr); -+ break; -+ case 1: -+ __ flw(f10, t0, 0); -+ break; -+ case 2: -+ __ flw(f10, t0, sizeof(float)); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+} ++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { ++ address entry = __ pc(); + -+void TemplateTable::dconst(int value) -+{ -+ transition(vtos, dtos); -+ static double dBuf[2] = {1.0, 2.0}; -+ __ mv(t0, (intptr_t)dBuf); -+ switch (value) { -+ case 0: -+ __ fmv_d_x(f10, zr); -+ break; -+ case 1: -+ __ fld(f10, t0, 0); -+ break; -+ case 2: -+ __ fld(f10, t0, sizeof(double)); -+ break; -+ default: -+ ShouldNotReachHere(); ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(t0, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize)); ++ __ mv(t1, sp); ++ // maximal sp for current fp (stack grows negative) ++ // check if frame is complete ++ __ bge(t0, t1, L); ++ __ stop ("interpreter frame not set up"); ++ __ bind(L); + } -+} ++#endif // ASSERT ++ // Restore bcp under the assumption that the current frame is still ++ // interpreted ++ __ restore_bcp(); + -+void TemplateTable::bipush() -+{ -+ transition(vtos, itos); -+ __ load_signed_byte(x10, at_bcp(1)); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // throw exception ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); ++ return entry; +} + -+void TemplateTable::sipush() -+{ -+ transition(vtos, itos); -+ __ load_unsigned_short(x10, at_bcp(1)); -+ __ revb_w_w(x10, x10); -+ __ sraiw(x10, x10, 16); -+} ++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { ++ address entry = __ pc(); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // setup parameters + -+void TemplateTable::ldc(bool wide) -+{ -+ transition(vtos, vtos); -+ Label call_ldc, notFloat, notClass, notInt, Done; ++ // convention: expect aberrant index in register x11 ++ __ zero_extend(c_rarg2, x11, 32); ++ // convention: expect array in register x13 ++ __ mv(c_rarg1, x13); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime:: ++ throw_ArrayIndexOutOfBoundsException), ++ c_rarg1, c_rarg2); ++ return entry; ++} + -+ if (wide) { -+ __ get_unsigned_2_byte_index_at_bcp(x11, 1); -+ } else { -+ __ load_unsigned_byte(x11, at_bcp(1)); -+ } -+ __ get_cpool_and_tags(x12, x10); ++address TemplateInterpreterGenerator::generate_ClassCastException_handler() { ++ address entry = __ pc(); + -+ const int base_offset = ConstantPool::header_size() * wordSize; -+ const int tags_offset = Array::base_offset_in_bytes(); ++ // object is at TOS ++ __ pop_reg(c_rarg1); + -+ // get type -+ __ addi(x13, x11, tags_offset); -+ __ add(x13, x10, x13); -+ __ membar(MacroAssembler::AnyAny); -+ __ lbu(x13, Address(x13, 0)); -+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); + -+ // unresolved class - get the resolved class -+ __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClass); -+ __ beq(x13, t1, call_ldc); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime:: ++ throw_ClassCastException), ++ c_rarg1); ++ return entry; ++} + -+ // unresolved class in error state - call into runtime to throw the error -+ // from the first resolution attempt -+ __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClassInError); -+ __ beq(x13, t1, call_ldc); ++address TemplateInterpreterGenerator::generate_exception_handler_common( ++ const char* name, const char* message, bool pass_oop) { ++ assert(!pass_oop || message == NULL, "either oop or message but not both"); ++ address entry = __ pc(); ++ if (pass_oop) { ++ // object is at TOS ++ __ pop_reg(c_rarg2); ++ } ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // setup parameters ++ __ la(c_rarg1, Address((address)name)); ++ if (pass_oop) { ++ __ call_VM(x10, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime:: ++ create_klass_exception), ++ c_rarg1, c_rarg2); ++ } else { ++ // kind of lame ExternalAddress can't take NULL because ++ // external_word_Relocation will assert. ++ if (message != NULL) { ++ __ la(c_rarg2, Address((address)message)); ++ } else { ++ __ mv(c_rarg2, NULL_WORD); ++ } ++ __ call_VM(x10, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), ++ c_rarg1, c_rarg2); ++ } ++ // throw exception ++ __ j(address(Interpreter::throw_exception_entry())); ++ return entry; ++} + -+ // resolved class - need to call vm to get java mirror of the class -+ __ mv(t1, (u1)JVM_CONSTANT_Class); -+ __ bne(x13, t1, notClass); ++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { ++ address entry = __ pc(); + -+ __ bind(call_ldc); -+ __ mv(c_rarg1, wide); -+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1); -+ __ push_ptr(x10); -+ __ verify_oop(x10); -+ __ j(Done); ++ // Restore stack bottom in case i2c adjusted stack ++ __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ // and NULL it as marker that esp is now tos until next java call ++ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ restore_constant_pool_cache(); ++ __ get_method(xmethod); + -+ __ bind(notClass); -+ __ mv(t1, (u1)JVM_CONSTANT_Float); -+ __ bne(x13, t1, notFloat); ++ if (state == atos) { ++ Register obj = x10; ++ Register mdp = x11; ++ Register tmp = x12; ++ __ ld(mdp, Address(xmethod, Method::method_data_offset())); ++ __ profile_return_type(mdp, obj, tmp); ++ } + -+ // ftos -+ __ shadd(x11, x11, x12, x11, 3); -+ __ flw(f10, Address(x11, base_offset)); -+ __ push_f(f10); -+ __ j(Done); ++ // Pop N words from the stack ++ __ get_cache_and_index_at_bcp(x11, x12, 1, index_size); ++ __ ld(x11, Address(x11, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ andi(x11, x11, ConstantPoolCacheEntry::parameter_size_mask); + -+ __ bind(notFloat); ++ __ shadd(esp, x11, esp, t0, 3); + -+ __ mv(t1, (u1)JVM_CONSTANT_Integer); -+ __ bne(x13, t1, notInt); ++ // Restore machine SP ++ __ ld(t0, Address(xmethod, Method::const_offset())); ++ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); ++ __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2); ++ __ ld(t1, ++ Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); ++ __ slli(t0, t0, 3); ++ __ sub(t0, t1, t0); ++ __ andi(sp, t0, -16); + -+ // itos -+ __ shadd(x11, x11, x12, x11, 3); -+ __ lw(x10, Address(x11, base_offset)); -+ __ push_i(x10); -+ __ j(Done); ++ __ check_and_handle_popframe(xthread); ++ __ check_and_handle_earlyret(xthread); + -+ __ bind(notInt); -+ condy_helper(Done); ++ __ get_dispatch(); ++ __ dispatch_next(state, step); + -+ __ bind(Done); ++ return entry; +} + -+// Fast path for caching oop constants. -+void TemplateTable::fast_aldc(bool wide) -+{ -+ transition(vtos, atos); ++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, ++ int step, ++ address continuation) { ++ address entry = __ pc(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ restore_constant_pool_cache(); ++ __ get_method(xmethod); ++ __ get_dispatch(); + -+ const Register result = x10; -+ const Register tmp = x11; -+ const Register rarg = x12; ++ // Calculate stack limit ++ __ ld(t0, Address(xmethod, Method::const_offset())); ++ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); ++ __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2); ++ __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); ++ __ slli(t0, t0, 3); ++ __ sub(t0, t1, t0); ++ __ andi(sp, t0, -16); + -+ const int index_size = wide ? sizeof(u2) : sizeof(u1); ++ // Restore expression stack pointer ++ __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ // NULL last_sp until next java call ++ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + -+ Label resolved; ++ // handle exceptions ++ { ++ Label L; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, L); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } + -+ // We are resolved if the resolved reference cache entry contains a -+ // non-null object (String, MethodType, etc.) -+ assert_different_registers(result, tmp); -+ __ get_cache_index_at_bcp(tmp, 1, index_size); -+ __ load_resolved_reference_at_index(result, tmp); -+ __ bnez(result, resolved); ++ if (continuation == NULL) { ++ __ dispatch_next(state, step); ++ } else { ++ __ jump_to_entry(continuation); ++ } ++ return entry; ++} + -+ const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); ++address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) { ++ address entry = __ pc(); ++ if (type == T_OBJECT) { ++ // retrieve result from frame ++ __ ld(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize)); ++ // and verify it ++ __ verify_oop(x10); ++ } else { ++ __ cast_primitive_type(type, x10); ++ } + -+ // first time invocation - must resolve first -+ __ mv(rarg, (int)bytecode()); -+ __ call_VM(result, entry, rarg); ++ __ ret(); // return from result handler ++ return entry; ++} + -+ __ bind(resolved); ++address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, ++ address runtime_entry) { ++ assert_cond(runtime_entry != NULL); ++ address entry = __ pc(); ++ __ push(state); ++ __ call_VM(noreg, runtime_entry); ++ __ fence(0xf, 0xf); ++ __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); ++ return entry; ++} + -+ { // Check for the null sentinel. -+ // If we just called the VM, it already did the mapping for us, -+ // but it's harmless to retry. -+ Label notNull; ++// Helpers for commoning out cases in the various type of method entries. ++// + -+ // Stash null_sentinel address to get its value later -+ int32_t offset = 0; -+ __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset); -+ __ ld(tmp, Address(rarg, offset)); -+ __ bne(result, tmp, notNull); -+ __ mv(result, zr); // NULL object reference -+ __ bind(notNull); -+ } + -+ if (VerifyOops) { -+ // Safe to call with 0 result -+ __ verify_oop(result); ++// increment invocation count & check for overflow ++// ++// Note: checking for negative value instead of overflow ++// so we have a 'sticky' overflow test ++// ++// xmethod: method ++// ++void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) { ++ Label done; ++ // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. ++ int increment = InvocationCounter::count_increment; ++ Label no_mdo; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(x10, Address(xmethod, Method::method_data_offset())); ++ __ beqz(x10, no_mdo); ++ // Increment counter in the MDO ++ const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(x10, in_bytes(MethodData::invoke_mask_offset())); ++ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow); ++ __ j(done); + } ++ __ bind(no_mdo); ++ // Increment counter in MethodCounters ++ const Address invocation_counter(t1, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ __ get_method_counters(xmethod, t1, done); ++ const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); ++ __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); ++ __ bind(done); +} + -+void TemplateTable::ldc2_w() -+{ -+ transition(vtos, vtos); -+ Label notDouble, notLong, Done; -+ __ get_unsigned_2_byte_index_at_bcp(x10, 1); ++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { ++ __ mv(c_rarg1, zr); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), c_rarg1); ++ __ j(do_continue); ++} + -+ __ get_cpool_and_tags(x11, x12); -+ const int base_offset = ConstantPool::header_size() * wordSize; -+ const int tags_offset = Array::base_offset_in_bytes(); ++// See if we've got enough room on the stack for locals plus overhead ++// below JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError ++// without going through the signal handler, i.e., reserved and yellow zones ++// will not be made usable. The shadow zone must suffice to handle the ++// overflow. ++// The expression stack grows down incrementally, so the normal guard ++// page mechanism will work for that. ++// ++// NOTE: Since the additional locals are also always pushed (wasn't ++// obvious in generate_method_entry) so the guard should work for them ++// too. ++// ++// Args: ++// x13: number of additional locals this frame needs (what we must check) ++// xmethod: Method* ++// ++// Kills: ++// x10 ++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { + -+ // get type -+ __ add(x12, x12, x10); -+ __ load_unsigned_byte(x12, Address(x12, tags_offset)); -+ __ mv(t1, JVM_CONSTANT_Double); -+ __ bne(x12, t1, notDouble); ++ // monitor entry size: see picture of stack set ++ // (generate_method_entry) and frame_amd64.hpp ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + -+ // dtos -+ __ shadd(x12, x10, x11, x12, 3); -+ __ fld(f10, Address(x12, base_offset)); -+ __ push_d(f10); -+ __ j(Done); ++ // total overhead size: entry_size + (saved fp through expr stack ++ // bottom). be sure to change this if you add/subtract anything ++ // to/from the overhead area ++ const int overhead_size = ++ -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size; + -+ __ bind(notDouble); -+ __ mv(t1, (int)JVM_CONSTANT_Long); -+ __ bne(x12, t1, notLong); ++ const int page_size = os::vm_page_size(); + -+ // ltos -+ __ shadd(x10, x10, x11, x10, 3); -+ __ ld(x10, Address(x10, base_offset)); -+ __ push_l(x10); -+ __ j(Done); ++ Label after_frame_check; + -+ __ bind(notLong); -+ condy_helper(Done); -+ __ bind(Done); ++ // see if the frame is greater than one page in size. If so, ++ // then we need to verify there is enough stack space remaining ++ // for the additional locals. ++ __ mv(t0, (page_size - overhead_size) / Interpreter::stackElementSize); ++ __ bleu(x13, t0, after_frame_check); + -+} ++ // compute sp as if this were going to be the last frame on ++ // the stack before the red zone + -+void TemplateTable::condy_helper(Label& Done) -+{ -+ const Register obj = x10; -+ const Register rarg = x11; -+ const Register flags = x12; -+ const Register off = x13; ++ // locals + overhead, in bytes ++ __ mv(x10, overhead_size); ++ __ shadd(x10, x13, x10, t0, Interpreter::logStackElementSize); // 2 slots per parameter. + -+ const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); ++ const Address stack_limit(xthread, JavaThread::stack_overflow_limit_offset()); ++ __ ld(t0, stack_limit); + -+ __ mv(rarg, (int) bytecode()); -+ __ call_VM(obj, entry, rarg); ++#ifdef ASSERT ++ Label limit_okay; ++ // Verify that thread stack limit is non-zero. ++ __ bnez(t0, limit_okay); ++ __ stop("stack overflow limit is zero"); ++ __ bind(limit_okay); ++#endif + -+ __ get_vm_result_2(flags, xthread); ++ // Add stack limit to locals. ++ __ add(x10, x10, t0); + -+ // VMr = obj = base address to find primitive value to push -+ // VMr2 = flags = (tos, off) using format of CPCE::_flags -+ __ mv(off, flags); -+ __ mv(t0, ConstantPoolCacheEntry::field_index_mask); -+ __ andrw(off, off, t0); ++ // Check against the current stack bottom. ++ __ bgtu(sp, x10, after_frame_check); + -+ __ add(off, obj, off); -+ const Address field(off, 0); // base + R---->base + offset ++ // Remove the incoming args, peeling the machine SP back to where it ++ // was in the caller. This is not strictly necessary, but unless we ++ // do so the stack frame may have a garbage FP; this ensures a ++ // correct call stack that we can always unwind. The ANDI should be ++ // unnecessary because the sender SP in x30 is always aligned, but ++ // it doesn't hurt. ++ __ andi(sp, x30, -16); + -+ __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits)); -+ __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> flags:0~3 ++ // Note: the restored frame is not necessarily interpreted. ++ // Use the shared runtime version of the StackOverflowError. ++ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); ++ __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry())); + -+ switch (bytecode()) { -+ case Bytecodes::_ldc: // fall through -+ case Bytecodes::_ldc_w: { -+ // tos in (itos, ftos, stos, btos, ctos, ztos) -+ Label notInt, notFloat, notShort, notByte, notChar, notBool; -+ __ mv(t1, itos); -+ __ bne(flags, t1, notInt); -+ // itos -+ __ lw(x10, field); -+ __ push(itos); -+ __ j(Done); ++ // all done with frame size check ++ __ bind(after_frame_check); ++} + -+ __ bind(notInt); -+ __ mv(t1, ftos); -+ __ bne(flags, t1, notFloat); -+ // ftos -+ __ load_float(field); -+ __ push(ftos); -+ __ j(Done); ++// Allocate monitor and lock method (asm interpreter) ++// ++// Args: ++// xmethod: Method* ++// xlocals: locals ++// ++// Kills: ++// x10 ++// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs) ++// t0, t1 (temporary regs) ++void TemplateInterpreterGenerator::lock_method() { ++ // synchronize method ++ const Address access_flags(xmethod, Method::access_flags_offset()); ++ const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + -+ __ bind(notFloat); -+ __ mv(t1, stos); -+ __ bne(flags, t1, notShort); -+ // stos -+ __ load_signed_short(x10, field); -+ __ push(stos); -+ __ j(Done); -+ -+ __ bind(notShort); -+ __ mv(t1, btos); -+ __ bne(flags, t1, notByte); -+ // btos -+ __ load_signed_byte(x10, field); -+ __ push(btos); -+ __ j(Done); -+ -+ __ bind(notByte); -+ __ mv(t1, ctos); -+ __ bne(flags, t1, notChar); -+ // ctos -+ __ load_unsigned_short(x10, field); -+ __ push(ctos); -+ __ j(Done); ++#ifdef ASSERT ++ __ lwu(x10, access_flags); ++ __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method doesn't need synchronization", false); ++#endif // ASSERT + -+ __ bind(notChar); -+ __ mv(t1, ztos); -+ __ bne(flags, t1, notBool); -+ // ztos -+ __ load_signed_byte(x10, field); -+ __ push(ztos); -+ __ j(Done); ++ // get synchronization object ++ { ++ Label done; ++ __ lwu(x10, access_flags); ++ __ andi(t0, x10, JVM_ACC_STATIC); ++ // get receiver (assume this is frequent case) ++ __ ld(x10, Address(xlocals, Interpreter::local_offset_in_bytes(0))); ++ __ beqz(t0, done); ++ __ load_mirror(x10, xmethod); + -+ __ bind(notBool); -+ break; ++#ifdef ASSERT ++ { ++ Label L; ++ __ bnez(x10, L); ++ __ stop("synchronization object is NULL"); ++ __ bind(L); + } ++#endif // ASSERT + -+ case Bytecodes::_ldc2_w: { -+ Label notLong, notDouble; -+ __ mv(t1, ltos); -+ __ bne(flags, t1, notLong); -+ // ltos -+ __ ld(x10, field); -+ __ push(ltos); -+ __ j(Done); ++ __ bind(done); ++ } + -+ __ bind(notLong); -+ __ mv(t1, dtos); -+ __ bne(flags, t1, notDouble); -+ // dtos -+ __ load_double(field); -+ __ push(dtos); -+ __ j(Done); ++ // add space for monitor & lock ++ __ add(sp, sp, - entry_size); // add space for a monitor entry ++ __ add(esp, esp, - entry_size); ++ __ mv(t0, esp); ++ __ sd(t0, monitor_block_top); // set new monitor block top ++ // store object ++ __ sd(x10, Address(esp, BasicObjectLock::obj_offset_in_bytes())); ++ __ mv(c_rarg1, esp); // object address ++ __ lock_object(c_rarg1); ++} + -+ __ bind(notDouble); -+ break; -+ } ++// Generate a fixed interpreter frame. This is identical setup for ++// interpreted methods and for native methods hence the shared code. ++// ++// Args: ++// ra: return address ++// xmethod: Method* ++// xlocals: pointer to locals ++// xcpool: cp cache ++// stack_pointer: previous sp ++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { ++ // initialize fixed part of activation frame ++ if (native_call) { ++ __ add(esp, sp, - 14 * wordSize); ++ __ mv(xbcp, zr); ++ __ add(sp, sp, - 14 * wordSize); ++ // add 2 zero-initialized slots for native calls ++ __ sd(zr, Address(sp, 13 * wordSize)); ++ __ sd(zr, Address(sp, 12 * wordSize)); ++ } else { ++ __ add(esp, sp, - 12 * wordSize); ++ __ ld(t0, Address(xmethod, Method::const_offset())); // get ConstMethod ++ __ add(xbcp, t0, in_bytes(ConstMethod::codes_offset())); // get codebase ++ __ add(sp, sp, - 12 * wordSize); ++ } ++ __ sd(xbcp, Address(sp, wordSize)); ++ __ sd(esp, Address(sp, 0)); + -+ default: -+ ShouldNotReachHere(); ++ if (ProfileInterpreter) { ++ Label method_data_continue; ++ __ ld(t0, Address(xmethod, Method::method_data_offset())); ++ __ beqz(t0, method_data_continue); ++ __ la(t0, Address(t0, in_bytes(MethodData::data_offset()))); ++ __ bind(method_data_continue); + } + -+ __ stop("bad ldc/condy"); -+} ++ __ sd(xmethod, Address(sp, 7 * wordSize)); ++ __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize)); + -+void TemplateTable::locals_index(Register reg, int offset) -+{ -+ __ lbu(reg, at_bcp(offset)); -+ __ neg(reg, reg); -+} ++ // Get mirror and store it in the frame as GC root for this Method* ++ __ load_mirror(t2, xmethod); ++ __ sd(zr, Address(sp, 5 * wordSize)); ++ __ sd(t2, Address(sp, 4 * wordSize)); + -+void TemplateTable::iload() { -+ iload_internal(); -+} ++ __ ld(xcpool, Address(xmethod, Method::const_offset())); ++ __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset())); ++ __ ld(xcpool, Address(xcpool, ConstantPool::cache_offset_in_bytes())); ++ __ sd(xcpool, Address(sp, 3 * wordSize)); ++ __ sd(xlocals, Address(sp, 2 * wordSize)); + -+void TemplateTable::nofast_iload() { -+ iload_internal(may_not_rewrite); -+} ++ __ sd(ra, Address(sp, 11 * wordSize)); ++ __ sd(fp, Address(sp, 10 * wordSize)); ++ __ la(fp, Address(sp, 12 * wordSize)); // include ra & fp + -+void TemplateTable::iload_internal(RewriteControl rc) { -+ transition(vtos, itos); -+ if (RewriteFrequentPairs && rc == may_rewrite) { -+ Label rewrite, done; -+ const Register bc = x14; ++ // set sender sp ++ // leave last_sp as null ++ __ sd(x30, Address(sp, 9 * wordSize)); ++ __ sd(zr, Address(sp, 8 * wordSize)); + -+ // get next bytecode -+ __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); ++ // Move SP out of the way ++ if (!native_call) { ++ __ ld(t0, Address(xmethod, Method::const_offset())); ++ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); ++ __ add(t0, t0, frame::interpreter_frame_monitor_size() + 2); ++ __ slli(t0, t0, 3); ++ __ sub(t0, sp, t0); ++ __ andi(sp, t0, -16); ++ } ++} + -+ // if _iload, wait to rewrite to iload2. We only want to rewrite the -+ // last two iloads in a pair. Comparing against fast_iload means that -+ // the next bytecode is neither an iload or a caload, and therefore -+ // an iload pair. -+ __ mv(t1, Bytecodes::_iload); -+ __ beq(x11, t1, done); ++// End of helpers + -+ // if _fast_iload rewrite to _fast_iload2 -+ __ mv(t1, Bytecodes::_fast_iload); -+ __ mv(bc, Bytecodes::_fast_iload2); -+ __ beq(x11, t1, rewrite); ++// Various method entries ++//------------------------------------------------------------------------------------------------------------------------ ++// ++// + -+ // if _caload rewrite to _fast_icaload -+ __ mv(t1, Bytecodes::_caload); -+ __ mv(bc, Bytecodes::_fast_icaload); -+ __ beq(x11, t1, rewrite); ++// Method entry for java.lang.ref.Reference.get. ++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { ++ // Code: _aload_0, _getfield, _areturn ++ // parameter size = 1 ++ // ++ // The code that gets generated by this routine is split into 2 parts: ++ // 1. The "intrinsified" code for G1 (or any SATB based GC), ++ // 2. The slow path - which is an expansion of the regular method entry. ++ // ++ // Notes:- ++ // * In the G1 code we do not check whether we need to block for ++ // a safepoint. If G1 is enabled then we must execute the specialized ++ // code for Reference.get (except when the Reference object is null) ++ // so that we can log the value in the referent field with an SATB ++ // update buffer. ++ // If the code for the getfield template is modified so that the ++ // G1 pre-barrier code is executed when the current method is ++ // Reference.get() then going through the normal method entry ++ // will be fine. ++ // * The G1 code can, however, check the receiver object (the instance ++ // of java.lang.Reference) and jump to the slow path if null. If the ++ // Reference object is null then we obviously cannot fetch the referent ++ // and so we don't need to call the G1 pre-barrier. Thus we can use the ++ // regular method entry code to generate the NPE. ++ // ++ // This code is based on generate_accessor_entry. ++ // ++ // xmethod: Method* ++ // x30: senderSP must preserve for slow path, set SP to it on fast path + -+ // else rewrite to _fast_iload -+ __ mv(bc, Bytecodes::_fast_iload); ++ // ra is live. It must be saved around calls. + -+ // rewrite -+ // bc: new bytecode -+ __ bind(rewrite); -+ patch_bytecode(Bytecodes::_iload, bc, x11, false); -+ __ bind(done); ++ address entry = __ pc(); + -+ } ++ const int referent_offset = java_lang_ref_Reference::referent_offset(); ++ guarantee(referent_offset > 0, "referent offset not initialized"); + -+ // do iload, get the local value into tos -+ locals_index(x11); -+ __ lw(x10, iaddress(x11, x10, _masm)); -+} ++ Label slow_path; ++ const Register local_0 = c_rarg0; ++ // Check if local 0 != NULL ++ // If the receiver is null then it is OK to jump to the slow path. ++ __ ld(local_0, Address(esp, 0)); ++ __ beqz(local_0, slow_path); + -+void TemplateTable::fast_iload2() -+{ -+ transition(vtos, itos); -+ locals_index(x11); -+ __ lw(x10, iaddress(x11, x10, _masm)); -+ __ push(itos); -+ locals_index(x11, 3); -+ __ lw(x10, iaddress(x11, x10, _masm)); -+} ++ __ mv(x9, x30); // Move senderSP to a callee-saved register + -+void TemplateTable::fast_iload() -+{ -+ transition(vtos, itos); -+ locals_index(x11); -+ __ lw(x10, iaddress(x11, x10, _masm)); -+} ++ // Load the value of the referent field. ++ const Address field_address(local_0, referent_offset); ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ t1, /*tmp2*/ t0); + -+void TemplateTable::lload() -+{ -+ transition(vtos, ltos); -+ __ lbu(x11, at_bcp(1)); -+ __ slli(x11, x11, LogBytesPerWord); -+ __ sub(x11, xlocals, x11); -+ __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1))); -+} ++ // areturn ++ __ andi(sp, x9, -16); // done with stack ++ __ ret(); + -+void TemplateTable::fload() -+{ -+ transition(vtos, ftos); -+ locals_index(x11); -+ __ flw(f10, faddress(x11, t0, _masm)); ++ // generate a vanilla interpreter entry as the slow path ++ __ bind(slow_path); ++ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); ++ return entry; +} + -+void TemplateTable::dload() -+{ -+ transition(vtos, dtos); -+ __ lbu(x11, at_bcp(1)); -+ __ slli(x11, x11, LogBytesPerWord); -+ __ sub(x11, xlocals, x11); -+ __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1))); ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.update(int crc, int b) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_update_entry() { ++ // TODO: Unimplemented generate_CRC32_update_entry ++ return 0; +} + -+void TemplateTable::aload() -+{ -+ transition(vtos, atos); -+ locals_index(x11); -+ __ ld(x10, iaddress(x11, x10, _masm)); -+ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) ++ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ // TODO: Unimplemented generate_CRC32_updateBytes_entry ++ return 0; +} + -+void TemplateTable::locals_index_wide(Register reg) { -+ __ lhu(reg, at_bcp(2)); -+ __ revb_h_h_u(reg, reg); // reverse bytes in half-word and zero-extend -+ __ neg(reg, reg); ++/** ++ * Method entry for intrinsic-candidate (non-native) methods: ++ * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) ++ * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end) ++ * Unlike CRC32, CRC32C does not have any methods marked as native ++ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses ++ */ ++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ // TODO: Unimplemented generate_CRC32C_updateBytes_entry ++ return 0; +} + -+void TemplateTable::wide_iload() { -+ transition(vtos, itos); -+ locals_index_wide(x11); -+ __ lw(x10, iaddress(x11, t0, _masm)); -+} ++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { ++ // See more discussion in stackOverflow.hpp. + -+void TemplateTable::wide_lload() -+{ -+ transition(vtos, ltos); -+ __ lhu(x11, at_bcp(2)); -+ __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend -+ __ slli(x11, x11, LogBytesPerWord); -+ __ sub(x11, xlocals, x11); -+ __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1))); -+} ++ const int shadow_zone_size = checked_cast(StackOverflow::stack_shadow_zone_size()); ++ const int page_size = os::vm_page_size(); ++ const int n_shadow_pages = shadow_zone_size / page_size; + -+void TemplateTable::wide_fload() -+{ -+ transition(vtos, ftos); -+ locals_index_wide(x11); -+ __ flw(f10, faddress(x11, t0, _masm)); -+} ++#ifdef ASSERT ++ Label L_good_limit; ++ __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit())); ++ __ bnez(t0, L_good_limit); ++ __ stop("shadow zone safe limit is not initialized"); ++ __ bind(L_good_limit); ++ ++ Label L_good_watermark; ++ __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark())); ++ __ bnez(t0, L_good_watermark); ++ __ stop("shadow zone growth watermark is not initialized"); ++ __ bind(L_good_watermark); ++#endif + -+void TemplateTable::wide_dload() -+{ -+ transition(vtos, dtos); -+ __ lhu(x11, at_bcp(2)); -+ __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend -+ __ slli(x11, x11, LogBytesPerWord); -+ __ sub(x11, xlocals, x11); -+ __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1))); -+} ++ Label L_done; + -+void TemplateTable::wide_aload() -+{ -+ transition(vtos, atos); -+ locals_index_wide(x11); -+ __ ld(x10, aaddress(x11, t0, _masm)); -+} ++ __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark())); ++ __ bgtu(sp, t0, L_done); + -+void TemplateTable::index_check(Register array, Register index) -+{ -+ // destroys x11, t0 -+ // check array -+ __ null_check(array, arrayOopDesc::length_offset_in_bytes()); -+ // sign extend index for use by indexed load -+ // check index -+ const Register length = t0; -+ __ lwu(length, Address(array, arrayOopDesc::length_offset_in_bytes())); -+ if (index != x11) { -+ assert(x11 != array, "different registers"); -+ __ mv(x11, index); ++ for (int p = 1; p <= n_shadow_pages; p++) { ++ __ bang_stack_with_offset(p * page_size); + } -+ Label ok; -+ __ addw(index, index, zr); -+ __ bltu(index, length, ok); -+ __ mv(x13, array); -+ __ mv(t0, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); -+ __ jr(t0); -+ __ bind(ok); -+} -+ -+void TemplateTable::iaload() -+{ -+ transition(itos, itos); -+ __ mv(x11, x10); -+ __ pop_ptr(x10); -+ // x10: array -+ // x11: index -+ index_check(x10, x11); // leaves index in x11 -+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); -+ __ shadd(t0, x11, x10, t0, 2); -+ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); -+ __ addw(x10, x10, zr); // signed extended -+} + -+void TemplateTable::laload() -+{ -+ transition(itos, ltos); -+ __ mv(x11, x10); -+ __ pop_ptr(x10); -+ // x10: array -+ // x11: index -+ index_check(x10, x11); // leaves index in x11 -+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); -+ __ shadd(t0, x11, x10, t0, 3); -+ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); -+} ++ // Record the new watermark, but only if the update is above the safe limit. ++ // Otherwise, the next time around the check above would pass the safe limit. ++ __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit())); ++ __ bleu(sp, t0, L_done); ++ __ sd(sp, Address(xthread, JavaThread::shadow_zone_growth_watermark())); + -+void TemplateTable::faload() -+{ -+ transition(itos, ftos); -+ __ mv(x11, x10); -+ __ pop_ptr(x10); -+ // x10: array -+ // x11: index -+ index_check(x10, x11); // leaves index in x11 -+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); -+ __ shadd(t0, x11, x10, t0, 2); -+ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ bind(L_done); +} + -+void TemplateTable::daload() -+{ -+ transition(itos, dtos); -+ __ mv(x11, x10); -+ __ pop_ptr(x10); -+ // x10: array -+ // x11: index -+ index_check(x10, x11); // leaves index in x11 -+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); -+ __ shadd(t0, x11, x10, t0, 3); -+ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); -+} ++// Interpreter stub for calling a native method. (asm interpreter) ++// This sets up a somewhat different looking stack for calling the ++// native method than the typical interpreter frame setup. ++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + -+void TemplateTable::aaload() -+{ -+ transition(itos, atos); -+ __ mv(x11, x10); -+ __ pop_ptr(x10); -+ // x10: array -+ // x11: index -+ index_check(x10, x11); // leaves index in x11 -+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); -+ __ shadd(t0, x11, x10, t0, LogBytesPerHeapOop); -+ do_oop_load(_masm, -+ Address(t0), -+ x10, -+ IS_ARRAY); -+} ++ // x11: Method* ++ // x30: sender sp + -+void TemplateTable::baload() -+{ -+ transition(itos, itos); -+ __ mv(x11, x10); -+ __ pop_ptr(x10); -+ // x10: array -+ // x11: index -+ index_check(x10, x11); // leaves index in x11 -+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); -+ __ shadd(t0, x11, x10, t0, 0); -+ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); -+} ++ address entry_point = __ pc(); + -+void TemplateTable::caload() -+{ -+ transition(itos, itos); -+ __ mv(x11, x10); -+ __ pop_ptr(x10); -+ // x10: array -+ // x11: index -+ index_check(x10, x11); // leaves index in x11 -+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); -+ __ shadd(t0, x11, x10, t0, 1); -+ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); -+} ++ const Address constMethod (xmethod, Method::const_offset()); ++ const Address access_flags (xmethod, Method::access_flags_offset()); ++ const Address size_of_parameters(x12, ConstMethod:: ++ size_of_parameters_offset()); + -+// iload followed by caload frequent pair -+void TemplateTable::fast_icaload() -+{ -+ transition(vtos, itos); -+ // load index out of locals -+ locals_index(x12); -+ __ lw(x11, iaddress(x12, x11, _masm)); -+ __ pop_ptr(x10); ++ // get parameter size (always needed) ++ __ ld(x12, constMethod); ++ __ load_unsigned_short(x12, size_of_parameters); + -+ // x10: array -+ // x11: index -+ index_check(x10, x11); // leaves index in x11, kills t0 -+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11 -+ __ shadd(t0, x11, x10, t0, 1); -+ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); -+} ++ // Native calls don't need the stack size check since they have no ++ // expression stack and the arguments are already on the stack and ++ // we only add a handful of words to the stack. + -+void TemplateTable::saload() -+{ -+ transition(itos, itos); -+ __ mv(x11, x10); -+ __ pop_ptr(x10); -+ // x10: array -+ // x11: index -+ index_check(x10, x11); // leaves index in x11, kills t0 -+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1); -+ __ shadd(t0, x11, x10, t0, 1); -+ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); -+} ++ // xmethod: Method* ++ // x12: size of parameters ++ // x30: sender sp + -+void TemplateTable::iload(int n) -+{ -+ transition(vtos, itos); -+ __ lw(x10, iaddress(n)); -+} ++ // for natives the size of locals is zero + -+void TemplateTable::lload(int n) -+{ -+ transition(vtos, ltos); -+ __ ld(x10, laddress(n)); -+} ++ // compute beginning of parameters (xlocals) ++ __ shadd(xlocals, x12, esp, xlocals, 3); ++ __ addi(xlocals, xlocals, -wordSize); + -+void TemplateTable::fload(int n) -+{ -+ transition(vtos, ftos); -+ __ flw(f10, faddress(n)); -+} ++ // Pull SP back to minimum size: this avoids holes in the stack ++ __ andi(sp, esp, -16); + -+void TemplateTable::dload(int n) -+{ -+ transition(vtos, dtos); -+ __ fld(f10, daddress(n)); -+} ++ // initialize fixed part of activation frame ++ generate_fixed_frame(true); + -+void TemplateTable::aload(int n) -+{ -+ transition(vtos, atos); -+ __ ld(x10, iaddress(n)); -+} ++ // make sure method is native & not abstract ++#ifdef ASSERT ++ __ lwu(x10, access_flags); ++ __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute non-native method as native", false); ++ __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter"); ++#endif + -+void TemplateTable::aload_0() { -+ aload_0_internal(); -+} ++ // Since at this point in the method invocation the exception ++ // handler would try to exit the monitor of synchronized methods ++ // which hasn't been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation ++ // will check this flag. + -+void TemplateTable::nofast_aload_0() { -+ aload_0_internal(may_not_rewrite); -+} ++ const Address do_not_unlock_if_synchronized(xthread, ++ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ __ mv(t1, true); ++ __ sb(t1, do_not_unlock_if_synchronized); + -+void TemplateTable::aload_0_internal(RewriteControl rc) { -+ // According to bytecode histograms, the pairs: -+ // -+ // _aload_0, _fast_igetfield -+ // _aload_0, _fast_agetfield -+ // _aload_0, _fast_fgetfield -+ // -+ // occur frequently. If RewriteFrequentPairs is set, the (slow) -+ // _aload_0 bytecode checks if the next bytecode is either -+ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then -+ // rewrites the current bytecode into a pair bytecode; otherwise it -+ // rewrites the current bytecode into _fast_aload_0 that doesn't do -+ // the pair check anymore. -+ // -+ // Note: If the next bytecode is _getfield, the rewrite must be -+ // delayed, otherwise we may miss an opportunity for a pair. -+ // -+ // Also rewrite frequent pairs -+ // aload_0, aload_1 -+ // aload_0, iload_1 -+ // These bytecodes with a small amount of code are most profitable -+ // to rewrite -+ if (RewriteFrequentPairs && rc == may_rewrite) { -+ Label rewrite, done; -+ const Register bc = x14; ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow); ++ } + -+ // get next bytecode -+ __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); ++ Label continue_after_compile; ++ __ bind(continue_after_compile); + -+ // if _getfield then wait with rewrite -+ __ mv(t1, Bytecodes::Bytecodes::_getfield); -+ __ beq(x11, t1, done); ++ bang_stack_shadow_pages(true); + -+ // if _igetfield then rewrite to _fast_iaccess_0 -+ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); -+ __ mv(t1, Bytecodes::_fast_igetfield); -+ __ mv(bc, Bytecodes::_fast_iaccess_0); -+ __ beq(x11, t1, rewrite); ++ // reset the _do_not_unlock_if_synchronized flag ++ __ sb(zr, do_not_unlock_if_synchronized); + -+ // if _agetfield then rewrite to _fast_aaccess_0 -+ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); -+ __ mv(t1, Bytecodes::_fast_agetfield); -+ __ mv(bc, Bytecodes::_fast_aaccess_0); -+ __ beq(x11, t1, rewrite); ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ if (synchronized) { ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ __ lwu(x10, access_flags); ++ __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization"); ++#endif ++ } + -+ // if _fgetfield then rewrite to _fast_faccess_0 -+ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); -+ __ mv(t1, Bytecodes::_fast_fgetfield); -+ __ mv(bc, Bytecodes::_fast_faccess_0); -+ __ beq(x11, t1, rewrite); ++ // start execution ++#ifdef ASSERT ++ __ verify_frame_setup(); ++#endif + -+ // else rewrite to _fast_aload0 -+ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition"); -+ __ mv(bc, Bytecodes::Bytecodes::_fast_aload_0); ++ // jvmti support ++ __ notify_method_entry(); + -+ // rewrite -+ // bc: new bytecode -+ __ bind(rewrite); -+ patch_bytecode(Bytecodes::_aload_0, bc, x11, false); ++ // work registers ++ const Register t = x18; ++ const Register result_handler = x19; + -+ __ bind(done); -+ } ++ // allocate space for parameters ++ __ ld(t, Address(xmethod, Method::const_offset())); ++ __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset())); + -+ // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop). -+ aload(0); -+} ++ __ slli(t, t, Interpreter::logStackElementSize); ++ __ sub(x30, esp, t); ++ __ andi(sp, x30, -16); ++ __ mv(esp, x30); + -+void TemplateTable::istore() -+{ -+ transition(itos, vtos); -+ locals_index(x11); -+ __ sw(x10, iaddress(x11, t0, _masm)); -+} ++ // get signature handler ++ { ++ Label L; ++ __ ld(t, Address(xmethod, Method::signature_handler_offset())); ++ __ bnez(t, L); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::prepare_native_call), ++ xmethod); ++ __ ld(t, Address(xmethod, Method::signature_handler_offset())); ++ __ bind(L); ++ } + -+void TemplateTable::lstore() -+{ -+ transition(ltos, vtos); -+ locals_index(x11); -+ __ sd(x10, laddress(x11, t0, _masm)); -+} ++ // call signature handler ++ assert(InterpreterRuntime::SignatureHandlerGenerator::from() == xlocals, ++ "adjust this code"); ++ assert(InterpreterRuntime::SignatureHandlerGenerator::to() == sp, ++ "adjust this code"); ++ assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t0, ++ "adjust this code"); + -+void TemplateTable::fstore() { -+ transition(ftos, vtos); -+ locals_index(x11); -+ __ fsw(f10, iaddress(x11, t0, _masm)); -+} ++ // The generated handlers do not touch xmethod (the method). ++ // However, large signatures cannot be cached and are generated ++ // each time here. The slow-path generator can do a GC on return, ++ // so we must reload it after the call. ++ __ jalr(t); ++ __ get_method(xmethod); // slow path can do a GC, reload xmethod + -+void TemplateTable::dstore() { -+ transition(dtos, vtos); -+ locals_index(x11); -+ __ fsd(f10, daddress(x11, t0, _masm)); -+} + -+void TemplateTable::astore() -+{ -+ transition(vtos, vtos); -+ __ pop_ptr(x10); -+ locals_index(x11); -+ __ sd(x10, aaddress(x11, t0, _masm)); -+} ++ // result handler is in x10 ++ // set result handler ++ __ mv(result_handler, x10); ++ // pass mirror handle if static call ++ { ++ Label L; ++ __ lwu(t, Address(xmethod, Method::access_flags_offset())); ++ __ andi(t0, t, JVM_ACC_STATIC); ++ __ beqz(t0, L); ++ // get mirror ++ __ load_mirror(t, xmethod); ++ // copy mirror into activation frame ++ __ sd(t, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize)); ++ // pass handle to mirror ++ __ addi(c_rarg1, fp, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ bind(L); ++ } + -+void TemplateTable::wide_istore() { -+ transition(vtos, vtos); -+ __ pop_i(); -+ locals_index_wide(x11); -+ __ sw(x10, iaddress(x11, t0, _masm)); -+} ++ // get native function entry point in x28 ++ { ++ Label L; ++ __ ld(x28, Address(xmethod, Method::native_function_offset())); ++ address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); ++ __ mv(t1, unsatisfied); ++ __ ld(t1, t1); ++ __ bne(x28, t1, L); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::prepare_native_call), ++ xmethod); ++ __ get_method(xmethod); ++ __ ld(x28, Address(xmethod, Method::native_function_offset())); ++ __ bind(L); ++ } + -+void TemplateTable::wide_lstore() { -+ transition(vtos, vtos); -+ __ pop_l(); -+ locals_index_wide(x11); -+ __ sd(x10, laddress(x11, t0, _masm)); -+} ++ // pass JNIEnv ++ __ add(c_rarg0, xthread, in_bytes(JavaThread::jni_environment_offset())); + -+void TemplateTable::wide_fstore() { -+ transition(vtos, vtos); -+ __ pop_f(); -+ locals_index_wide(x11); -+ __ fsw(f10, faddress(x11, t0, _masm)); -+} ++ // It is enough that the pc() points into the right code ++ // segment. It does not have to be the correct return pc. ++ Label native_return; ++ __ set_last_Java_frame(esp, fp, native_return, x30); + -+void TemplateTable::wide_dstore() { -+ transition(vtos, vtos); -+ __ pop_d(); -+ locals_index_wide(x11); -+ __ fsd(f10, daddress(x11, t0, _masm)); -+} ++ // change thread state ++#ifdef ASSERT ++ { ++ Label L; ++ __ lwu(t, Address(xthread, JavaThread::thread_state_offset())); ++ __ addi(t0, zr, (u1)_thread_in_Java); ++ __ beq(t, t0, L); ++ __ stop("Wrong thread state in native stub"); ++ __ bind(L); ++ } ++#endif + -+void TemplateTable::wide_astore() { -+ transition(vtos, vtos); -+ __ pop_ptr(x10); -+ locals_index_wide(x11); -+ __ sd(x10, aaddress(x11, t0, _masm)); -+} ++ // Change state to native ++ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); ++ __ mv(t0, _thread_in_native); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sw(t0, Address(t1)); + -+void TemplateTable::iastore() { -+ transition(itos, vtos); -+ __ pop_i(x11); -+ __ pop_ptr(x13); -+ // x10: value -+ // x11: index -+ // x13: array -+ index_check(x13, x11); // prefer index in x11 -+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); -+ __ shadd(t0, x11, x13, t0, 2); -+ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg); -+} ++ // Call the native method. ++ __ jalr(x28); ++ __ bind(native_return); ++ __ get_method(xmethod); ++ // result potentially in x10 or f10 + -+void TemplateTable::lastore() { -+ transition(ltos, vtos); -+ __ pop_i(x11); -+ __ pop_ptr(x13); -+ // x10: value -+ // x11: index -+ // x13: array -+ index_check(x13, x11); // prefer index in x11 -+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); -+ __ shadd(t0, x11, x13, t0, 3); -+ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg); -+} ++ // make room for the pushes we're about to do ++ __ sub(t0, esp, 4 * wordSize); ++ __ andi(sp, t0, -16); + -+void TemplateTable::fastore() { -+ transition(ftos, vtos); -+ __ pop_i(x11); -+ __ pop_ptr(x13); -+ // f10: value -+ // x11: index -+ // x13: array -+ index_check(x13, x11); // prefer index in x11 -+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); -+ __ shadd(t0, x11, x13, t0, 2); -+ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg, noreg); -+} ++ // NOTE: The order of these pushes is known to frame::interpreter_frame_result ++ // in order to extract the result of a method call. If the order of these ++ // pushes change or anything else is added to the stack then the code in ++ // interpreter_frame_result must also change. ++ __ push(dtos); ++ __ push(ltos); + -+void TemplateTable::dastore() { -+ transition(dtos, vtos); -+ __ pop_i(x11); -+ __ pop_ptr(x13); -+ // f10: value -+ // x11: index -+ // x13: array -+ index_check(x13, x11); // prefer index in x11 -+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); -+ __ shadd(t0, x11, x13, t0, 3); -+ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg, noreg); -+} ++ // change thread state ++ // Force all preceding writes to be observed prior to thread state change ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + -+void TemplateTable::aastore() { -+ Label is_null, ok_is_subtype, done; -+ transition(vtos, vtos); -+ // stack: ..., array, index, value -+ __ ld(x10, at_tos()); // value -+ __ ld(x12, at_tos_p1()); // index -+ __ ld(x13, at_tos_p2()); // array ++ __ mv(t0, _thread_in_native_trans); ++ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); + -+ index_check(x13, x12); // kills x11 -+ __ add(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); -+ __ shadd(x14, x14, x13, x14, LogBytesPerHeapOop); ++ // Force this write out before the read below ++ __ membar(MacroAssembler::AnyAny); + -+ Address element_address(x14, 0); ++ // check for safepoint operation in progress and/or pending suspend requests ++ { ++ Label L, Continue; + -+ // do array store check - check for NULL value first -+ __ beqz(x10, is_null); ++ // We need an acquire here to ensure that any subsequent load of the ++ // global SafepointSynchronize::_state flag is ordered after this load ++ // of the thread-local polling word. We don't want this poll to ++ // return false (i.e. not safepointing) and a later poll of the global ++ // SafepointSynchronize::_state spuriously to return true. ++ // ++ // This is to avoid a race when we're in a native->Java transition ++ // racing the code which wakes up from a safepoint. ++ __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */); ++ __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset())); ++ __ beqz(t1, Continue); ++ __ bind(L); + -+ // Move subklass into x11 -+ __ load_klass(x11, x10); -+ // Move superklass into x10 -+ __ load_klass(x10, x13); -+ __ ld(x10, Address(x10, -+ ObjArrayKlass::element_klass_offset())); -+ // Compress array + index * oopSize + 12 into a single register. Frees x12. ++ // Don't use call_VM as it will see a possible pending exception ++ // and forward it and never return here preventing us from ++ // clearing _last_native_pc down below. So we do a runtime call by ++ // hand. ++ // ++ __ mv(c_rarg0, xthread); ++ __ mv(t1, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)); ++ __ jalr(t1); ++ __ get_method(xmethod); ++ __ reinit_heapbase(); ++ __ bind(Continue); ++ } + -+ // Generate subtype check. Blows x12, x15 -+ // Superklass in x10. Subklass in x11. -+ __ gen_subtype_check(x11, ok_is_subtype); //todo ++ // change thread state ++ // Force all preceding writes to be observed prior to thread state change ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + -+ // Come here on failure -+ // object is at TOS -+ __ j(Interpreter::_throw_ArrayStoreException_entry); ++ __ mv(t0, _thread_in_Java); ++ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); + -+ // Come here on success -+ __ bind(ok_is_subtype); ++ // reset_last_Java_frame ++ __ reset_last_Java_frame(true); + -+ // Get the value we will store -+ __ ld(x10, at_tos()); -+ // Now store using the appropriate barrier -+ do_oop_store(_masm, element_address, x10, IS_ARRAY); -+ __ j(done); ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset())); ++ } + -+ // Have a NULL in x10, x13=array, x12=index. Store NULL at ary[idx] -+ __ bind(is_null); -+ __ profile_null_seen(x12); ++ // reset handle block ++ __ ld(t, Address(xthread, JavaThread::active_handles_offset())); ++ __ sd(zr, Address(t, JNIHandleBlock::top_offset_in_bytes())); + -+ // Store a NULL -+ do_oop_store(_masm, element_address, noreg, IS_ARRAY); ++ // If result is an oop unbox and store it in frame where gc will see it ++ // and result handler will pick it up + -+ // Pop stack arguments -+ __ bind(done); -+ __ add(esp, esp, 3 * Interpreter::stackElementSize); ++ { ++ Label no_oop; ++ __ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT))); ++ __ bne(t, result_handler, no_oop); ++ // Unbox oop result, e.g. JNIHandles::resolve result. ++ __ pop(ltos); ++ __ resolve_jobject(x10, xthread, t); ++ __ sd(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize)); ++ // keep stack depth as expected by pushing oop which will eventually be discarded ++ __ push(ltos); ++ __ bind(no_oop); ++ } + -+} ++ { ++ Label no_reguard; ++ __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset()))); ++ __ addi(t1, zr, (u1)StackOverflow::stack_guard_yellow_reserved_disabled); ++ __ bne(t0, t1, no_reguard); + -+void TemplateTable::bastore() -+{ -+ transition(itos, vtos); -+ __ pop_i(x11); -+ __ pop_ptr(x13); -+ // x10: value -+ // x11: index -+ // x13: array -+ index_check(x13, x11); // prefer index in x11 ++ __ pusha(); // only save smashed registers ++ __ mv(c_rarg0, xthread); ++ __ mv(t1, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); ++ __ jalr(t1); ++ __ popa(); // only restore smashed registers ++ __ bind(no_reguard); ++ } + -+ // Need to check whether array is boolean or byte -+ // since both types share the bastore bytecode. -+ __ load_klass(x12, x13); -+ __ lwu(x12, Address(x12, Klass::layout_helper_offset())); -+ Label L_skip; -+ __ andi(t0, x12, Klass::layout_helper_boolean_diffbit()); -+ __ beqz(t0, L_skip); -+ __ andi(x10, x10, 1); // if it is a T_BOOLEAN array, mask the stored value to 0/1 -+ __ bind(L_skip); ++ // The method register is junk from after the thread_in_native transition ++ // until here. Also can't call_VM until the bcp has been ++ // restored. Need bcp for throwing exception below so get it now. ++ __ get_method(xmethod); + -+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); ++ // restore bcp to have legal interpreter frame, i.e., bci == 0 <=> ++ // xbcp == code_base() ++ __ ld(xbcp, Address(xmethod, Method::const_offset())); // get ConstMethod* ++ __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); // get codebase ++ // handle exceptions (exception handling will handle unlocking!) ++ { ++ Label L; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, L); ++ // Note: At some point we may want to unify this with the code ++ // used in call_VM_base(); i.e., we should use the ++ // StubRoutines::forward_exception code. For now this doesn't work ++ // here because the sp is not correctly set at this point. ++ __ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } + -+ __ add(x11, x13, x11); -+ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg, noreg); -+} ++ // do unlocking if necessary ++ { ++ Label L; ++ __ lwu(t, Address(xmethod, Method::access_flags_offset())); ++ __ andi(t0, t, JVM_ACC_SYNCHRONIZED); ++ __ beqz(t0, L); ++ // the code below should be shared with interpreter macro ++ // assembler implementation ++ { ++ Label unlock; ++ // BasicObjectLock will be first in list, since this is a ++ // synchronized method. However, need to check that the object ++ // has not been unlocked by an explicit monitorexit bytecode. + -+void TemplateTable::castore() -+{ -+ transition(itos, vtos); -+ __ pop_i(x11); -+ __ pop_ptr(x13); -+ // x10: value -+ // x11: index -+ // x13: array -+ index_check(x13, x11); // prefer index in x11 -+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); -+ __ shadd(t0, x11, x13, t0, 1); -+ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg); -+} ++ // monitor expect in c_rarg1 for slow unlock path ++ __ la(c_rarg1, Address(fp, // address of first monitor ++ (intptr_t)(frame::interpreter_frame_initial_sp_offset * ++ wordSize - sizeof(BasicObjectLock)))); + -+void TemplateTable::sastore() -+{ -+ castore(); -+} ++ __ ld(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); ++ __ bnez(t, unlock); + -+void TemplateTable::istore(int n) -+{ -+ transition(itos, vtos); -+ __ sd(x10, iaddress(n)); -+} ++ // Entry already unlocked, need to throw exception ++ __ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); + -+void TemplateTable::lstore(int n) -+{ -+ transition(ltos, vtos); -+ __ sd(x10, laddress(n)); -+} ++ __ bind(unlock); ++ __ unlock_object(c_rarg1); ++ } ++ __ bind(L); ++ } + -+void TemplateTable::fstore(int n) -+{ -+ transition(ftos, vtos); -+ __ fsw(f10, faddress(n)); -+} ++ // jvmti support ++ // Note: This must happen _after_ handling/throwing any exceptions since ++ // the exception handler code notifies the runtime of method exits ++ // too. If this happens before, method entry/exit notifications are ++ // not properly paired (was bug - gri 11/22/99). ++ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); + -+void TemplateTable::dstore(int n) -+{ -+ transition(dtos, vtos); -+ __ fsd(f10, daddress(n)); -+} ++ __ pop(ltos); ++ __ pop(dtos); + -+void TemplateTable::astore(int n) -+{ -+ transition(vtos, vtos); -+ __ pop_ptr(x10); -+ __ sd(x10, iaddress(n)); -+} ++ __ jalr(result_handler); + -+void TemplateTable::pop() -+{ -+ transition(vtos, vtos); -+ __ addi(esp, esp, Interpreter::stackElementSize); -+} ++ // remove activation ++ __ ld(esp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp ++ // remove frame anchor ++ __ leave(); + -+void TemplateTable::pop2() -+{ -+ transition(vtos, vtos); -+ __ addi(esp, esp, 2 * Interpreter::stackElementSize); -+} ++ // restore sender sp ++ __ mv(sp, esp); + -+void TemplateTable::dup() -+{ -+ transition(vtos, vtos); -+ __ ld(x10, Address(esp, 0)); -+ __ push_reg(x10); -+ // stack: ..., a, a -+} ++ __ ret(); + -+void TemplateTable::dup_x1() -+{ -+ transition(vtos, vtos); -+ // stack: ..., a, b -+ __ ld(x10, at_tos()); // load b -+ __ ld(x12, at_tos_p1()); // load a -+ __ sd(x10, at_tos_p1()); // store b -+ __ sd(x12, at_tos()); // store a -+ __ push_reg(x10); // push b -+ // stack: ..., b, a, b -+} ++ if (inc_counter) { ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ } + -+void TemplateTable::dup_x2() -+{ -+ transition(vtos, vtos); -+ // stack: ..., a, b, c -+ __ ld(x10, at_tos()); // load c -+ __ ld(x12, at_tos_p2()); // load a -+ __ sd(x10, at_tos_p2()); // store c in a -+ __ push_reg(x10); // push c -+ // stack: ..., c, b, c, c -+ __ ld(x10, at_tos_p2()); // load b -+ __ sd(x12, at_tos_p2()); // store a in b -+ // stack: ..., c, a, c, c -+ __ sd(x10, at_tos_p1()); // store b in c -+ // stack: ..., c, a, b, c ++ return entry_point; +} + -+void TemplateTable::dup2() -+{ -+ transition(vtos, vtos); -+ // stack: ..., a, b -+ __ ld(x10, at_tos_p1()); // load a -+ __ push_reg(x10); // push a -+ __ ld(x10, at_tos_p1()); // load b -+ __ push_reg(x10); // push b -+ // stack: ..., a, b, a, b -+} ++// ++// Generic interpreted method entry to (asm) interpreter ++// ++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { + -+void TemplateTable::dup2_x1() -+{ -+ transition(vtos, vtos); -+ // stack: ..., a, b, c -+ __ ld(x12, at_tos()); // load c -+ __ ld(x10, at_tos_p1()); // load b -+ __ push_reg(x10); // push b -+ __ push_reg(x12); // push c -+ // stack: ..., a, b, c, b, c -+ __ sd(x12, at_tos_p3()); // store c in b -+ // stack: ..., a, c, c, b, c -+ __ ld(x12, at_tos_p4()); // load a -+ __ sd(x12, at_tos_p2()); // store a in 2nd c -+ // stack: ..., a, c, a, b, c -+ __ sd(x10, at_tos_p4()); // store b in a -+ // stack: ..., b, c, a, b, c -+} ++ // determine code generation flags ++ const bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + -+void TemplateTable::dup2_x2() -+{ -+ transition(vtos, vtos); -+ // stack: ..., a, b, c, d -+ __ ld(x12, at_tos()); // load d -+ __ ld(x10, at_tos_p1()); // load c -+ __ push_reg(x10); // push c -+ __ push_reg(x12); // push d -+ // stack: ..., a, b, c, d, c, d -+ __ ld(x10, at_tos_p4()); // load b -+ __ sd(x10, at_tos_p2()); // store b in d -+ __ sd(x12, at_tos_p4()); // store d in b -+ // stack: ..., a, d, c, b, c, d -+ __ ld(x12, at_tos_p5()); // load a -+ __ ld(x10, at_tos_p3()); // load c -+ __ sd(x12, at_tos_p3()); // store a in c -+ __ sd(x10, at_tos_p5()); // store c in a -+ // stack: ..., c, d, a, b, c, d -+} ++ // t0: sender sp ++ address entry_point = __ pc(); + -+void TemplateTable::swap() -+{ -+ transition(vtos, vtos); -+ // stack: ..., a, b -+ __ ld(x12, at_tos_p1()); // load a -+ __ ld(x10, at_tos()); // load b -+ __ sd(x12, at_tos()); // store a in b -+ __ sd(x10, at_tos_p1()); // store b in a -+ // stack: ..., b, a -+} ++ const Address constMethod(xmethod, Method::const_offset()); ++ const Address access_flags(xmethod, Method::access_flags_offset()); ++ const Address size_of_parameters(x13, ++ ConstMethod::size_of_parameters_offset()); ++ const Address size_of_locals(x13, ConstMethod::size_of_locals_offset()); + -+void TemplateTable::iop2(Operation op) -+{ -+ transition(itos, itos); -+ // x10 <== x11 op x10 -+ __ pop_i(x11); -+ switch (op) { -+ case add : __ addw(x10, x11, x10); break; -+ case sub : __ subw(x10, x11, x10); break; -+ case mul : __ mulw(x10, x11, x10); break; -+ case _and : __ andrw(x10, x11, x10); break; -+ case _or : __ orrw(x10, x11, x10); break; -+ case _xor : __ xorrw(x10, x11, x10); break; -+ case shl : __ sllw(x10, x11, x10); break; -+ case shr : __ sraw(x10, x11, x10); break; -+ case ushr : __ srlw(x10, x11, x10); break; -+ default : ShouldNotReachHere(); -+ } -+} ++ // get parameter size (always needed) ++ // need to load the const method first ++ __ ld(x13, constMethod); ++ __ load_unsigned_short(x12, size_of_parameters); + -+void TemplateTable::lop2(Operation op) -+{ -+ transition(ltos, ltos); -+ // x10 <== x11 op x10 -+ __ pop_l(x11); -+ switch (op) { -+ case add : __ add(x10, x11, x10); break; -+ case sub : __ sub(x10, x11, x10); break; -+ case mul : __ mul(x10, x11, x10); break; -+ case _and : __ andr(x10, x11, x10); break; -+ case _or : __ orr(x10, x11, x10); break; -+ case _xor : __ xorr(x10, x11, x10); break; -+ default : ShouldNotReachHere(); ++ // x12: size of parameters ++ ++ __ load_unsigned_short(x13, size_of_locals); // get size of locals in words ++ __ sub(x13, x13, x12); // x13 = no. of additional locals ++ ++ // see if we've got enough room on the stack for locals plus overhead. ++ generate_stack_overflow_check(); ++ ++ // compute beginning of parameters (xlocals) ++ __ shadd(xlocals, x12, esp, t1, 3); ++ __ add(xlocals, xlocals, -wordSize); ++ ++ // Make room for additional locals ++ __ slli(t1, x13, 3); ++ __ sub(t0, esp, t1); ++ ++ // Padding between locals and fixed part of activation frame to ensure ++ // SP is always 16-byte aligned. ++ __ andi(sp, t0, -16); ++ ++ // x13 - # of additional locals ++ // allocate space for locals ++ // explicitly initialize locals ++ { ++ Label exit, loop; ++ __ blez(x13, exit); // do nothing if x13 <= 0 ++ __ bind(loop); ++ __ sd(zr, Address(t0)); ++ __ add(t0, t0, wordSize); ++ __ add(x13, x13, -1); // until everything initialized ++ __ bnez(x13, loop); ++ __ bind(exit); + } -+} + -+void TemplateTable::idiv() -+{ -+ transition(itos, itos); -+ // explicitly check for div0 -+ Label no_div0; -+ __ bnez(x10, no_div0); -+ __ mv(t0, Interpreter::_throw_ArithmeticException_entry); -+ __ jr(t0); -+ __ bind(no_div0); -+ __ pop_i(x11); -+ // x10 <== x11 idiv x10 -+ __ corrected_idivl(x10, x11, x10, /* want_remainder */ false); -+} ++ // And the base dispatch table ++ __ get_dispatch(); + -+void TemplateTable::irem() -+{ -+ transition(itos, itos); -+ // explicitly check for div0 -+ Label no_div0; -+ __ bnez(x10, no_div0); -+ __ mv(t0, Interpreter::_throw_ArithmeticException_entry); -+ __ jr(t0); -+ __ bind(no_div0); -+ __ pop_i(x11); -+ // x10 <== x11 irem x10 -+ __ corrected_idivl(x10, x11, x10, /* want_remainder */ true); -+} ++ // initialize fixed part of activation frame ++ generate_fixed_frame(false); + -+void TemplateTable::lmul() -+{ -+ transition(ltos, ltos); -+ __ pop_l(x11); -+ __ mul(x10, x10, x11); -+} ++ // make sure method is not native & not abstract ++#ifdef ASSERT ++ __ lwu(x10, access_flags); ++ __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute native method as non-native"); ++ __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter"); ++#endif + -+void TemplateTable::ldiv() -+{ -+ transition(ltos, ltos); -+ // explicitly check for div0 -+ Label no_div0; -+ __ bnez(x10, no_div0); -+ __ mv(t0, Interpreter::_throw_ArithmeticException_entry); -+ __ jr(t0); -+ __ bind(no_div0); -+ __ pop_l(x11); -+ // x10 <== x11 ldiv x10 -+ __ corrected_idivq(x10, x11, x10, /* want_remainder */ false); -+} ++ // Since at this point in the method invocation the exception ++ // handler would try to exit the monitor of synchronized methods ++ // which hasn't been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation ++ // will check this flag. + -+void TemplateTable::lrem() -+{ -+ transition(ltos, ltos); -+ // explicitly check for div0 -+ Label no_div0; -+ __ bnez(x10, no_div0); -+ __ mv(t0, Interpreter::_throw_ArithmeticException_entry); -+ __ jr(t0); -+ __ bind(no_div0); -+ __ pop_l(x11); -+ // x10 <== x11 lrem x10 -+ __ corrected_idivq(x10, x11, x10, /* want_remainder */ true); -+} ++ const Address do_not_unlock_if_synchronized(xthread, ++ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ __ mv(t1, true); ++ __ sb(t1, do_not_unlock_if_synchronized); + -+void TemplateTable::lshl() -+{ -+ transition(itos, ltos); -+ // shift count is in x10 -+ __ pop_l(x11); -+ __ sll(x10, x11, x10); -+} ++ Label no_mdp; ++ const Register mdp = x13; ++ __ ld(mdp, Address(xmethod, Method::method_data_offset())); ++ __ beqz(mdp, no_mdp); ++ __ add(mdp, mdp, in_bytes(MethodData::data_offset())); ++ __ profile_parameters_type(mdp, x11, x12, x14); // use x11, x12, x14 as tmp registers ++ __ bind(no_mdp); + -+void TemplateTable::lshr() -+{ -+ transition(itos, ltos); -+ // shift count is in x10 -+ __ pop_l(x11); -+ __ sra(x10, x11, x10); -+} ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow); ++ } + -+void TemplateTable::lushr() -+{ -+ transition(itos, ltos); -+ // shift count is in x10 -+ __ pop_l(x11); -+ __ srl(x10, x11, x10); -+} ++ Label continue_after_compile; ++ __ bind(continue_after_compile); + -+void TemplateTable::fop2(Operation op) -+{ -+ transition(ftos, ftos); -+ switch (op) { -+ case add: -+ __ pop_f(f11); -+ __ fadd_s(f10, f11, f10); -+ break; -+ case sub: -+ __ pop_f(f11); -+ __ fsub_s(f10, f11, f10); -+ break; -+ case mul: -+ __ pop_f(f11); -+ __ fmul_s(f10, f11, f10); -+ break; -+ case div: -+ __ pop_f(f11); -+ __ fdiv_s(f10, f11, f10); -+ break; -+ case rem: -+ __ fmv_s(f11, f10); -+ __ pop_f(f10); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem)); -+ break; -+ default: -+ ShouldNotReachHere(); ++ bang_stack_shadow_pages(false); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++ __ sb(zr, do_not_unlock_if_synchronized); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ if (synchronized) { ++ // Allocate monitor and lock method ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ __ lwu(x10, access_flags); ++ __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization"); ++#endif + } -+} + -+void TemplateTable::dop2(Operation op) -+{ -+ transition(dtos, dtos); -+ switch (op) { -+ case add: -+ __ pop_d(f11); -+ __ fadd_d(f10, f11, f10); -+ break; -+ case sub: -+ __ pop_d(f11); -+ __ fsub_d(f10, f11, f10); -+ break; -+ case mul: -+ __ pop_d(f11); -+ __ fmul_d(f10, f11, f10); -+ break; -+ case div: -+ __ pop_d(f11); -+ __ fdiv_d(f10, f11, f10); -+ break; -+ case rem: -+ __ fmv_d(f11, f10); -+ __ pop_d(f10); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem)); -+ break; -+ default: -+ ShouldNotReachHere(); ++ // start execution ++#ifdef ASSERT ++ __ verify_frame_setup(); ++#endif ++ ++ // jvmti support ++ __ notify_method_entry(); ++ ++ __ dispatch_next(vtos); ++ ++ // invocation counter overflow ++ if (inc_counter) { ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); + } -+} + -+void TemplateTable::ineg() -+{ -+ transition(itos, itos); -+ __ negw(x10, x10); ++ return entry_point; +} + -+void TemplateTable::lneg() -+{ -+ transition(ltos, ltos); -+ __ neg(x10, x10); -+} ++//----------------------------------------------------------------------------- ++// Exceptions + -+void TemplateTable::fneg() -+{ -+ transition(ftos, ftos); -+ __ fneg_s(f10, f10); -+} ++void TemplateInterpreterGenerator::generate_throw_exception() { ++ // Entry point in previous activation (i.e., if the caller was ++ // interpreted) ++ Interpreter::_rethrow_exception_entry = __ pc(); ++ // Restore sp to interpreter_frame_last_sp even though we are going ++ // to empty the expression stack for the exception processing. ++ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ // x10: exception ++ // x13: return address/pc that threw exception ++ __ restore_bcp(); // xbcp points to call/send ++ __ restore_locals(); ++ __ restore_constant_pool_cache(); ++ __ reinit_heapbase(); // restore xheapbase as heapbase. ++ __ get_dispatch(); + -+void TemplateTable::dneg() -+{ -+ transition(dtos, dtos); -+ __ fneg_d(f10, f10); -+} ++ // Entry point for exceptions thrown within interpreter code ++ Interpreter::_throw_exception_entry = __ pc(); ++ // If we came here via a NullPointerException on the receiver of a ++ // method, xthread may be corrupt. ++ __ get_method(xmethod); ++ // expression stack is undefined here ++ // x10: exception ++ // xbcp: exception bcp ++ __ verify_oop(x10); ++ __ mv(c_rarg1, x10); + -+void TemplateTable::iinc() -+{ -+ transition(vtos, vtos); -+ __ load_signed_byte(x11, at_bcp(2)); // get constant -+ locals_index(x12); -+ __ ld(x10, iaddress(x12, x10, _masm)); -+ __ addw(x10, x10, x11); -+ __ sd(x10, iaddress(x12, t0, _masm)); -+} ++ // expression stack must be empty before entering the VM in case of ++ // an exception ++ __ empty_expression_stack(); ++ // find exception handler address and preserve exception oop ++ __ call_VM(x13, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::exception_handler_for_exception), ++ c_rarg1); + -+void TemplateTable::wide_iinc() -+{ -+ transition(vtos, vtos); -+ __ lwu(x11, at_bcp(2)); // get constant and index -+ __ revb_h_w_u(x11, x11); // reverse bytes in half-word (32bit) and zero-extend -+ __ zero_extend(x12, x11, 16); -+ __ neg(x12, x12); -+ __ slli(x11, x11, 32); -+ __ srai(x11, x11, 48); -+ __ ld(x10, iaddress(x12, t0, _masm)); -+ __ addw(x10, x10, x11); -+ __ sd(x10, iaddress(x12, t0, _masm)); -+} ++ // Calculate stack limit ++ __ ld(t0, Address(xmethod, Method::const_offset())); ++ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); ++ __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4); ++ __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); ++ __ slli(t0, t0, 3); ++ __ sub(t0, t1, t0); ++ __ andi(sp, t0, -16); + -+void TemplateTable::convert() -+{ -+ // Checking -+#ifdef ASSERT -+ { -+ TosState tos_in = ilgl; -+ TosState tos_out = ilgl; -+ switch (bytecode()) { -+ case Bytecodes::_i2l: // fall through -+ case Bytecodes::_i2f: // fall through -+ case Bytecodes::_i2d: // fall through -+ case Bytecodes::_i2b: // fall through -+ case Bytecodes::_i2c: // fall through -+ case Bytecodes::_i2s: tos_in = itos; break; -+ case Bytecodes::_l2i: // fall through -+ case Bytecodes::_l2f: // fall through -+ case Bytecodes::_l2d: tos_in = ltos; break; -+ case Bytecodes::_f2i: // fall through -+ case Bytecodes::_f2l: // fall through -+ case Bytecodes::_f2d: tos_in = ftos; break; -+ case Bytecodes::_d2i: // fall through -+ case Bytecodes::_d2l: // fall through -+ case Bytecodes::_d2f: tos_in = dtos; break; -+ default : ShouldNotReachHere(); -+ } -+ switch (bytecode()) { -+ case Bytecodes::_l2i: // fall through -+ case Bytecodes::_f2i: // fall through -+ case Bytecodes::_d2i: // fall through -+ case Bytecodes::_i2b: // fall through -+ case Bytecodes::_i2c: // fall through -+ case Bytecodes::_i2s: tos_out = itos; break; -+ case Bytecodes::_i2l: // fall through -+ case Bytecodes::_f2l: // fall through -+ case Bytecodes::_d2l: tos_out = ltos; break; -+ case Bytecodes::_i2f: // fall through -+ case Bytecodes::_l2f: // fall through -+ case Bytecodes::_d2f: tos_out = ftos; break; -+ case Bytecodes::_i2d: // fall through -+ case Bytecodes::_l2d: // fall through -+ case Bytecodes::_f2d: tos_out = dtos; break; -+ default : ShouldNotReachHere(); -+ } -+ transition(tos_in, tos_out); -+ } -+#endif // ASSERT ++ // x10: exception handler entry point ++ // x13: preserved exception oop ++ // xbcp: bcp for exception handler ++ __ push_ptr(x13); // push exception which is now the only value on the stack ++ __ jr(x10); // jump to exception handler (may be _remove_activation_entry!) + -+ // Conversion -+ switch (bytecode()) { -+ case Bytecodes::_i2l: -+ __ sign_extend(x10, x10, 32); -+ break; -+ case Bytecodes::_i2f: -+ __ fcvt_s_w(f10, x10); -+ break; -+ case Bytecodes::_i2d: -+ __ fcvt_d_w(f10, x10); -+ break; -+ case Bytecodes::_i2b: -+ __ sign_extend(x10, x10, 8); -+ break; -+ case Bytecodes::_i2c: -+ __ zero_extend(x10, x10, 16); -+ break; -+ case Bytecodes::_i2s: -+ __ sign_extend(x10, x10, 16); -+ break; -+ case Bytecodes::_l2i: -+ __ addw(x10, x10, zr); -+ break; -+ case Bytecodes::_l2f: -+ __ fcvt_s_l(f10, x10); -+ break; -+ case Bytecodes::_l2d: -+ __ fcvt_d_l(f10, x10); -+ break; -+ case Bytecodes::_f2i: -+ __ fcvt_w_s_safe(x10, f10); -+ break; -+ case Bytecodes::_f2l: -+ __ fcvt_l_s_safe(x10, f10); -+ break; -+ case Bytecodes::_f2d: -+ __ fcvt_d_s(f10, f10); -+ break; -+ case Bytecodes::_d2i: -+ __ fcvt_w_d_safe(x10, f10); -+ break; -+ case Bytecodes::_d2l: -+ __ fcvt_l_d_safe(x10, f10); -+ break; -+ case Bytecodes::_d2f: -+ __ fcvt_s_d(f10, f10); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+} ++ // If the exception is not handled in the current frame the frame is ++ // removed and the exception is rethrown (i.e. exception ++ // continuation is _rethrow_exception). ++ // ++ // Note: At this point the bci is still the bxi for the instruction ++ // which caused the exception and the expression stack is ++ // empty. Thus, for any VM calls at this point, GC will find a legal ++ // oop map (with empty expression stack). + -+void TemplateTable::lcmp() -+{ -+ transition(ltos, itos); -+ __ pop_l(x11); -+ __ cmp_l2i(t0, x11, x10); -+ __ mv(x10, t0); -+} ++ // ++ // JVMTI PopFrame support ++ // + -+void TemplateTable::float_cmp(bool is_float, int unordered_result) -+{ -+ // For instruction feq, flt and fle, the result is 0 if either operand is NaN -+ if (is_float) { -+ __ pop_f(f11); -+ // if unordered_result < 0: -+ // we want -1 for unordered or less than, 0 for equal and 1 for -+ // greater than. -+ // else: -+ // we want -1 for less than, 0 for equal and 1 for unordered or -+ // greater than. -+ // f11 primary, f10 secondary -+ __ float_compare(x10, f11, f10, unordered_result); -+ } else { -+ __ pop_d(f11); -+ // if unordered_result < 0: -+ // we want -1 for unordered or less than, 0 for equal and 1 for -+ // greater than. -+ // else: -+ // we want -1 for less than, 0 for equal and 1 for unordered or -+ // greater than. -+ // f11 primary, f10 secondary -+ __ double_compare(x10, f11, f10, unordered_result); -+ } -+} ++ Interpreter::_remove_activation_preserving_args_entry = __ pc(); ++ __ empty_expression_stack(); ++ // Set the popframe_processing bit in pending_popframe_condition ++ // indicating that we are currently handling popframe, so that ++ // call_VMs that may happen later do not trigger new popframe ++ // handling cycles. ++ __ lwu(x13, Address(xthread, JavaThread::popframe_condition_offset())); ++ __ ori(x13, x13, JavaThread::popframe_processing_bit); ++ __ sw(x13, Address(xthread, JavaThread::popframe_condition_offset())); + -+void TemplateTable::branch(bool is_jsr, bool is_wide) -+{ -+ // We might be moving to a safepoint. The thread which calls -+ // Interpreter::notice_safepoints() will effectively flush its cache -+ // when it makes a system call, but we need to do something to -+ // ensure that we see the changed dispatch table. -+ __ membar(MacroAssembler::LoadLoad); ++ { ++ // Check to see whether we are returning to a deoptimized frame. ++ // (The PopFrame call ensures that the caller of the popped frame is ++ // either interpreted or compiled and deoptimizes it if compiled.) ++ // In this case, we can't call dispatch_next() after the frame is ++ // popped, but instead must save the incoming arguments and restore ++ // them after deoptimization has occurred. ++ // ++ // Note that we don't compare the return PC against the ++ // deoptimization blob's unpack entry because of the presence of ++ // adapter frames in C2. ++ Label caller_not_deoptimized; ++ __ ld(c_rarg1, Address(fp, frame::return_addr_offset * wordSize)); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), c_rarg1); ++ __ bnez(x10, caller_not_deoptimized); + -+ __ profile_taken_branch(x10, x11); -+ const ByteSize be_offset = MethodCounters::backedge_counter_offset() + -+ InvocationCounter::counter_offset(); -+ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + -+ InvocationCounter::counter_offset(); ++ // Compute size of arguments for saving when returning to ++ // deoptimized caller ++ __ get_method(x10); ++ __ ld(x10, Address(x10, Method::const_offset())); ++ __ load_unsigned_short(x10, Address(x10, in_bytes(ConstMethod:: ++ size_of_parameters_offset()))); ++ __ slli(x10, x10, Interpreter::logStackElementSize); ++ __ restore_locals(); ++ __ sub(xlocals, xlocals, x10); ++ __ add(xlocals, xlocals, wordSize); ++ // Save these arguments ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ++ Deoptimization:: ++ popframe_preserve_args), ++ xthread, x10, xlocals); + -+ // load branch displacement -+ if (!is_wide) { -+ __ lhu(x12, at_bcp(1)); -+ __ revb_h_h(x12, x12); // reverse bytes in half-word and sign-extend -+ } else { -+ __ lwu(x12, at_bcp(1)); -+ __ revb_w_w(x12, x12); // reverse bytes in word and sign-extend -+ } ++ __ remove_activation(vtos, ++ /* throw_monitor_exception */ false, ++ /* install_monitor_exception */ false, ++ /* notify_jvmdi */ false); + -+ // Handle all the JSR stuff here, then exit. -+ // It's much shorter and cleaner than intermingling with the non-JSR -+ // normal-branch stuff occurring below. ++ // Inform deoptimization that it is responsible for restoring ++ // these arguments ++ __ mv(t0, JavaThread::popframe_force_deopt_reexecution_bit); ++ __ sw(t0, Address(xthread, JavaThread::popframe_condition_offset())); + -+ if (is_jsr) { -+ // compute return address as bci -+ __ ld(t1, Address(xmethod, Method::const_offset())); -+ __ add(t1, t1, -+ in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3)); -+ __ sub(x11, xbcp, t1); -+ __ push_i(x11); -+ // Adjust the bcp by the 16-bit displacement in x12 -+ __ add(xbcp, xbcp, x12); -+ __ load_unsigned_byte(t0, Address(xbcp, 0)); -+ // load the next target bytecode into t0, it is the argument of dispatch_only -+ __ dispatch_only(vtos, /*generate_poll*/true); -+ return; ++ // Continue in deoptimization handler ++ __ ret(); ++ ++ __ bind(caller_not_deoptimized); + } + -+ // Normal (non-jsr) branch handling ++ __ remove_activation(vtos, ++ /* throw_monitor_exception */ false, ++ /* install_monitor_exception */ false, ++ /* notify_jvmdi */ false); + -+ // Adjust the bcp by the displacement in x12 -+ __ add(xbcp, xbcp, x12); ++ // Restore the last_sp and null it out ++ __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + -+ assert(UseLoopCounter || !UseOnStackReplacement, -+ "on-stack-replacement requires loop counters"); -+ Label backedge_counter_overflow; -+ Label profile_method; -+ Label dispatch; -+ if (UseLoopCounter) { -+ // increment backedge counter for backward branches -+ // x10: MDO -+ // x11: MDO bumped taken-count -+ // x12: target offset -+ __ bgtz(x12, dispatch); // count only if backward branch ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ restore_constant_pool_cache(); ++ __ get_method(xmethod); ++ __ get_dispatch(); + -+ // check if MethodCounters exists -+ Label has_counters; -+ __ ld(t0, Address(xmethod, Method::method_counters_offset())); -+ __ bnez(t0, has_counters); -+ __ push_reg(x10); -+ __ push_reg(x11); -+ __ push_reg(x12); -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::build_method_counters), xmethod); -+ __ pop_reg(x12); -+ __ pop_reg(x11); -+ __ pop_reg(x10); -+ __ ld(t0, Address(xmethod, Method::method_counters_offset())); -+ __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory -+ __ bind(has_counters); ++ // The method data pointer was incremented already during ++ // call profiling. We have to restore the mdp for the current bcp. ++ if (ProfileInterpreter) { ++ __ set_method_data_pointer_for_bcp(); ++ } + -+ if (TieredCompilation) { -+ Label no_mdo; -+ int increment = InvocationCounter::count_increment; -+ if (ProfileInterpreter) { -+ // Are we profiling? -+ __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); -+ __ beqz(x11, no_mdo); -+ // Increment the MDO backedge counter -+ const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) + -+ in_bytes(InvocationCounter::counter_offset())); -+ const Address mask(x11, in_bytes(MethodData::backedge_mask_offset())); -+ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, -+ x10, t0, false, -+ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); -+ __ j(dispatch); -+ } -+ __ bind(no_mdo); -+ // Increment backedge counter in MethodCounters* -+ __ ld(t0, Address(xmethod, Method::method_counters_offset())); -+ const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset())); -+ __ increment_mask_and_jump(Address(t0, be_offset), increment, mask, -+ x10, t1, false, -+ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); -+ } else { // not TieredCompilation -+ // increment counter -+ __ ld(t1, Address(xmethod, Method::method_counters_offset())); -+ __ lwu(x10, Address(t1, be_offset)); // load backedge counter -+ __ addw(t0, x10, InvocationCounter::count_increment); // increment counter -+ __ sw(t0, Address(t1, be_offset)); // store counter ++ // Clear the popframe condition flag ++ __ sw(zr, Address(xthread, JavaThread::popframe_condition_offset())); ++ assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive"); + -+ __ lwu(x10, Address(t1, inv_offset)); // load invocation counter -+ __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits -+ __ addw(x10, x10, t0); // add both counters ++#if INCLUDE_JVMTI ++ { ++ Label L_done; + -+ if (ProfileInterpreter) { -+ // Test to see if we should create a method data oop -+ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); -+ __ blt(x10, t0, dispatch); ++ __ lbu(t0, Address(xbcp, 0)); ++ __ li(t1, Bytecodes::_invokestatic); ++ __ bne(t1, t0, L_done); + -+ // if no method data exists, go to profile method -+ __ test_method_data_pointer(x10, profile_method); ++ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. ++ // Detect such a case in the InterpreterRuntime function and return the member name argument,or NULL. + -+ if (UseOnStackReplacement) { -+ // check for overflow against x11 which is the MDO taken count -+ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); -+ __ bltu(x11, t0, dispatch); // Intel == Assembler::below, lo:unsigned lower ++ __ ld(c_rarg0, Address(xlocals, 0)); ++ __ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null),c_rarg0, xmethod, xbcp); + -+ // When ProfileInterpreter is on, the backedge_count comes -+ // from the MethodData*, which value does not get reset on -+ // the call to frequency_counter_overflow(). To avoid -+ // excessive calls to the overflow routine while the method is -+ // being compiled, add a second test to make sure the overflow -+ // function is called only once every overflow_frequency. -+ const int overflow_frequency = 1024; -+ __ andi(x11, x11, overflow_frequency - 1); -+ __ beqz(x11, backedge_counter_overflow); ++ __ beqz(x10, L_done); + -+ } -+ } else { -+ if (UseOnStackReplacement) { -+ // check for overflow against x10, which is the sum of the -+ // counters -+ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); -+ __ bgeu(x10, t0, backedge_counter_overflow); // Intel == Assembler::aboveEqual -+ } -+ } -+ } -+ __ bind(dispatch); ++ __ sd(x10, Address(esp, 0)); ++ __ bind(L_done); + } ++#endif // INCLUDE_JVMTI + -+ // Pre-load the next target bytecode into t0 -+ __ load_unsigned_byte(t0, Address(xbcp, 0)); ++ // Restore machine SP ++ __ ld(t0, Address(xmethod, Method::const_offset())); ++ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); ++ __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4); ++ __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); ++ __ slliw(t0, t0, 3); ++ __ sub(t0, t1, t0); ++ __ andi(sp, t0, -16); + -+ // continue with the bytecode @ target -+ // t0: target bytecode -+ // xbcp: target bcp -+ __ dispatch_only(vtos, /*generate_poll*/true); ++ __ dispatch_next(vtos); ++ // end of PopFrame support + -+ if (UseLoopCounter) { -+ if (ProfileInterpreter && !TieredCompilation) { -+ // Out-of-line code to allocate method data oop. -+ __ bind(profile_method); -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); -+ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode -+ __ set_method_data_pointer_for_bcp(); -+ __ j(dispatch); -+ } ++ Interpreter::_remove_activation_entry = __ pc(); + -+ if (UseOnStackReplacement) { -+ // invocation counter overflow -+ __ bind(backedge_counter_overflow); -+ __ neg(x12, x12); -+ __ add(x12, x12, xbcp); // branch xbcp -+ // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::frequency_counter_overflow), -+ x12); -+ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode ++ // preserve exception over this code sequence ++ __ pop_ptr(x10); ++ __ sd(x10, Address(xthread, JavaThread::vm_result_offset())); ++ // remove the activation (without doing throws on illegalMonitorExceptions) ++ __ remove_activation(vtos, false, true, false); ++ // restore exception ++ __ get_vm_result(x10, xthread); + -+ // x10: osr nmethod (osr ok) or NULL (osr not possible) -+ // w11: target bytecode -+ // x12: temporary -+ __ beqz(x10, dispatch); // test result -- no osr if null -+ // nmethod may have been invalidated (VM may block upon call_VM return) -+ __ lbu(x12, Address(x10, nmethod::state_offset())); -+ if (nmethod::in_use != 0) { -+ __ sub(x12, x12, nmethod::in_use); -+ } -+ __ bnez(x12, dispatch); ++ // In between activations - previous activation type unknown yet ++ // compute continuation point - the continuation point expects the ++ // following registers set up: ++ // ++ // x10: exception ++ // ra: return address/pc that threw exception ++ // sp: expression stack of caller ++ // fp: fp of caller ++ // FIXME: There's no point saving ra here because VM calls don't trash it ++ __ sub(sp, sp, 2 * wordSize); ++ __ sd(x10, Address(sp, 0)); // save exception ++ __ sd(ra, Address(sp, wordSize)); // save return address ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ++ SharedRuntime::exception_handler_for_return_address), ++ xthread, ra); ++ __ mv(x11, x10); // save exception handler ++ __ ld(x10, Address(sp, 0)); // restore exception ++ __ ld(ra, Address(sp, wordSize)); // restore return address ++ __ add(sp, sp, 2 * wordSize); ++ // We might be returning to a deopt handler that expects x13 to ++ // contain the exception pc ++ __ mv(x13, ra); ++ // Note that an "issuing PC" is actually the next PC after the call ++ __ jr(x11); // jump to exception ++ // handler of caller ++} + -+ // We have the address of an on stack replacement routine in x10 -+ // We need to prepare to execute the OSR method. First we must -+ // migrate the locals and monitors off of the stack. ++// ++// JVMTI ForceEarlyReturn support ++// ++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { ++ address entry = __ pc(); + -+ __ mv(x9, x10); // save the nmethod ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ empty_expression_stack(); ++ __ load_earlyret_value(state); + -+ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); ++ __ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); ++ Address cond_addr(t0, JvmtiThreadState::earlyret_state_offset()); + -+ // x10 is OSR buffer, move it to expected parameter location -+ __ mv(j_rarg0, x10); ++ // Clear the earlyret state ++ assert(JvmtiThreadState::earlyret_inactive == 0, "should be"); ++ __ sd(zr, cond_addr); + -+ // remove activation -+ // get sender esp -+ __ ld(esp, -+ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); -+ // remove frame anchor -+ __ leave(); -+ // Ensure compiled code always sees stack at proper alignment -+ __ andi(sp, esp, -16); ++ __ remove_activation(state, ++ false, /* throw_monitor_exception */ ++ false, /* install_monitor_exception */ ++ true); /* notify_jvmdi */ ++ __ ret(); + -+ // and begin the OSR nmethod -+ __ ld(t0, Address(x9, nmethod::osr_entry_point_offset())); -+ __ jr(t0); -+ } -+ } ++ return entry; ++} ++// end of ForceEarlyReturn support ++ ++//----------------------------------------------------------------------------- ++// Helper for vtos entry point generation + ++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, ++ address& bep, ++ address& cep, ++ address& sep, ++ address& aep, ++ address& iep, ++ address& lep, ++ address& fep, ++ address& dep, ++ address& vep) { ++ assert(t != NULL && t->is_valid() && t->tos_in() == vtos, "illegal template"); ++ Label L; ++ aep = __ pc(); __ push_ptr(); __ j(L); ++ fep = __ pc(); __ push_f(); __ j(L); ++ dep = __ pc(); __ push_d(); __ j(L); ++ lep = __ pc(); __ push_l(); __ j(L); ++ bep = cep = sep = ++ iep = __ pc(); __ push_i(); ++ vep = __ pc(); ++ __ bind(L); ++ generate_and_dispatch(t); +} + -+void TemplateTable::if_0cmp(Condition cc) -+{ -+ transition(itos, vtos); -+ // assume branch is more often taken than not (loops use backward branches) -+ Label not_taken; ++//----------------------------------------------------------------------------- + -+ __ addw(x10, x10, zr); -+ switch (cc) { -+ case equal: -+ __ bnez(x10, not_taken); -+ break; -+ case not_equal: -+ __ beqz(x10, not_taken); -+ break; -+ case less: -+ __ bgez(x10, not_taken); -+ break; -+ case less_equal: -+ __ bgtz(x10, not_taken); -+ break; -+ case greater: -+ __ blez(x10, not_taken); -+ break; -+ case greater_equal: -+ __ bltz(x10, not_taken); -+ break; -+ default: -+ break; -+ } -+ -+ branch(false, false); -+ __ bind(not_taken); -+ __ profile_not_taken_branch(x10); -+} ++// Non-product code ++#ifndef PRODUCT ++address TemplateInterpreterGenerator::generate_trace_code(TosState state) { ++ address entry = __ pc(); + -+void TemplateTable::if_icmp(Condition cc) -+{ -+ transition(itos, vtos); -+ // assume branch is more often taken than not (loops use backward branches) -+ Label not_taken; -+ __ pop_i(x11); -+ __ addw(x10, x10, zr); -+ switch (cc) { -+ case equal: -+ __ bne(x11, x10, not_taken); -+ break; -+ case not_equal: -+ __ beq(x11, x10, not_taken); -+ break; -+ case less: -+ __ bge(x11, x10, not_taken); -+ break; -+ case less_equal: -+ __ bgt(x11, x10, not_taken); -+ break; -+ case greater: -+ __ ble(x11, x10, not_taken); -+ break; -+ case greater_equal: -+ __ blt(x11, x10, not_taken); -+ break; -+ default: -+ break; -+ } ++ __ push_reg(ra); ++ __ push(state); ++ __ push_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp); ++ __ mv(c_rarg2, x10); // Pass itos ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), c_rarg1, c_rarg2, c_rarg3); ++ __ pop_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp); ++ __ pop(state); ++ __ pop_reg(ra); ++ __ ret(); // return from result handler + -+ branch(false, false); -+ __ bind(not_taken); -+ __ profile_not_taken_branch(x10); ++ return entry; +} + -+void TemplateTable::if_nullcmp(Condition cc) -+{ -+ transition(atos, vtos); -+ // assume branch is more often taken than not (loops use backward branches) -+ Label not_taken; -+ if (cc == equal) { -+ __ bnez(x10, not_taken); -+ } else { -+ __ beqz(x10, not_taken); -+ } -+ branch(false, false); -+ __ bind(not_taken); -+ __ profile_not_taken_branch(x10); ++void TemplateInterpreterGenerator::count_bytecode() { ++ __ push_reg(t0); ++ __ push_reg(x10); ++ __ mv(x10, (address) &BytecodeCounter::_counter_value); ++ __ li(t0, 1); ++ __ amoadd_d(zr, x10, t0, Assembler::aqrl); ++ __ pop_reg(x10); ++ __ pop_reg(t0); +} + -+void TemplateTable::if_acmp(Condition cc) -+{ -+ transition(atos, vtos); -+ // assume branch is more often taken than not (loops use backward branches) -+ Label not_taken; -+ __ pop_ptr(x11); ++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; } + -+ if (cc == equal) { -+ __ oop_nequal(x11, x10, not_taken); -+ } else if (cc == not_equal) { -+ __ oop_equal(x11, x10, not_taken); -+ } -+ branch(false, false); -+ __ bind(not_taken); -+ __ profile_not_taken_branch(x10); -+} ++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; } + -+void TemplateTable::ret() { -+ transition(vtos, vtos); -+ // We might be moving to a safepoint. The thread which calls -+ // Interpreter::notice_safepoints() will effectively flush its cache -+ // when it makes a system call, but we need to do something to -+ // ensure that we see the changed dispatch table. -+ __ membar(MacroAssembler::LoadLoad); ++void TemplateInterpreterGenerator::trace_bytecode(Template* t) { ++ // Call a little run-time stub to avoid blow-up for each bytecode. ++ // The run-time runtime saves the right registers, depending on ++ // the tosca in-state for the given template. + -+ locals_index(x11); -+ __ ld(x11, aaddress(x11, t1, _masm)); // get return bci, compute return bcp -+ __ profile_ret(x11, x12); -+ __ ld(xbcp, Address(xmethod, Method::const_offset())); -+ __ add(xbcp, xbcp, x11); -+ __ addi(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); -+ __ dispatch_next(vtos, 0, /*generate_poll*/true); ++ assert(Interpreter::trace_code(t->tos_in()) != NULL, "entry must have been generated"); ++ __ jal(Interpreter::trace_code(t->tos_in())); ++ __ reinit_heapbase(); +} + -+void TemplateTable::wide_ret() { -+ transition(vtos, vtos); -+ locals_index_wide(x11); -+ __ ld(x11, aaddress(x11, t0, _masm)); // get return bci, compute return bcp -+ __ profile_ret(x11, x12); -+ __ ld(xbcp, Address(xmethod, Method::const_offset())); -+ __ add(xbcp, xbcp, x11); -+ __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); -+ __ dispatch_next(vtos, 0, /*generate_poll*/true); ++void TemplateInterpreterGenerator::stop_interpreter_at() { ++ Label L; ++ __ push_reg(t0); ++ __ mv(t0, (address) &BytecodeCounter::_counter_value); ++ __ ld(t0, Address(t0)); ++ __ mv(t1, StopInterpreterAt); ++ __ bne(t0, t1, L); ++ __ ebreak(); ++ __ bind(L); ++ __ pop_reg(t0); +} + -+void TemplateTable::tableswitch() { -+ Label default_case, continue_execution; -+ transition(itos, vtos); -+ // align xbcp -+ __ la(x11, at_bcp(BytesPerInt)); -+ __ andi(x11, x11, -BytesPerInt); -+ // load lo & hi -+ __ lwu(x12, Address(x11, BytesPerInt)); -+ __ lwu(x13, Address(x11, 2 * BytesPerInt)); -+ __ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend -+ __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend -+ // check against lo & hi -+ __ blt(x10, x12, default_case); -+ __ bgt(x10, x13, default_case); -+ // lookup dispatch offset -+ __ subw(x10, x10, x12); -+ __ shadd(x13, x10, x11, t0, 2); -+ __ lwu(x13, Address(x13, 3 * BytesPerInt)); -+ __ profile_switch_case(x10, x11, x12); -+ // continue execution -+ __ bind(continue_execution); -+ __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend -+ __ add(xbcp, xbcp, x13); -+ __ load_unsigned_byte(t0, Address(xbcp)); -+ __ dispatch_only(vtos, /*generate_poll*/true); -+ // handle default -+ __ bind(default_case); -+ __ profile_switch_default(x10); -+ __ lwu(x13, Address(x11, 0)); -+ __ j(continue_execution); ++#endif // !PRODUCT +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +new file mode 100644 +index 00000000000..d2a301c6e74 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -0,0 +1,3951 @@ ++/* ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "gc/shared/tlab_globals.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/templateTable.hpp" ++#include "memory/universe.hpp" ++#include "oops/method.hpp" ++#include "oops/methodData.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "utilities/powerOfTwo.hpp" ++ ++#define __ _masm-> ++ ++// Address computation: local variables ++ ++static inline Address iaddress(int n) { ++ return Address(xlocals, Interpreter::local_offset_in_bytes(n)); +} + -+void TemplateTable::lookupswitch() { -+ transition(itos, itos); -+ __ stop("lookupswitch bytecode should have been rewritten"); ++static inline Address laddress(int n) { ++ return iaddress(n + 1); +} + -+void TemplateTable::fast_linearswitch() { -+ transition(itos, vtos); -+ Label loop_entry, loop, found, continue_execution; -+ // bswap x10 so we can avoid bswapping the table entries -+ __ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend -+ // align xbcp -+ __ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of -+ // this instruction (change offsets -+ // below) -+ __ andi(x9, x9, -BytesPerInt); -+ // set counter -+ __ lwu(x11, Address(x9, BytesPerInt)); -+ __ revb_w(x11, x11); -+ __ j(loop_entry); -+ // table search -+ __ bind(loop); -+ __ shadd(t0, x11, x9, t0, 3); -+ __ lw(t0, Address(t0, 2 * BytesPerInt)); -+ __ beq(x10, t0, found); -+ __ bind(loop_entry); -+ __ addi(x11, x11, -1); -+ __ bgez(x11, loop); -+ // default case -+ __ profile_switch_default(x10); -+ __ lwu(x13, Address(x9, 0)); -+ __ j(continue_execution); -+ // entry found -> get offset -+ __ bind(found); -+ __ shadd(t0, x11, x9, t0, 3); -+ __ lwu(x13, Address(t0, 3 * BytesPerInt)); -+ __ profile_switch_case(x11, x10, x9); -+ // continue execution -+ __ bind(continue_execution); -+ __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend -+ __ add(xbcp, xbcp, x13); -+ __ lbu(t0, Address(xbcp, 0)); -+ __ dispatch_only(vtos, /*generate_poll*/true); ++static inline Address faddress(int n) { ++ return iaddress(n); +} + -+void TemplateTable::fast_binaryswitch() { -+ transition(itos, vtos); -+ // Implementation using the following core algorithm: -+ // -+ // int binary_search(int key, LookupswitchPair* array, int n) -+ // binary_search start: -+ // #Binary search according to "Methodik des Programmierens" by -+ // # Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. -+ // int i = 0; -+ // int j = n; -+ // while (i + 1 < j) do -+ // # invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) -+ // # with Q: for all i: 0 <= i < n: key < a[i] -+ // # where a stands for the array and assuming that the (inexisting) -+ // # element a[n] is infinitely big. -+ // int h = (i + j) >> 1 -+ // # i < h < j -+ // if (key < array[h].fast_match()) -+ // then [j = h] -+ // else [i = h] -+ // end -+ // # R: a[i] <= key < a[i+1] or Q -+ // # (i.e., if key is within array, i is the correct index) -+ // return i -+ // binary_search end ++static inline Address daddress(int n) { ++ return laddress(n); ++} + ++static inline Address aaddress(int n) { ++ return iaddress(n); ++} + -+ // Register allocation -+ const Register key = x10; // already set (tosca) -+ const Register array = x11; -+ const Register i = x12; -+ const Register j = x13; -+ const Register h = x14; -+ const Register temp = x15; ++static inline Address iaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++ assert_cond(_masm != NULL); ++ _masm->shadd(temp, r, xlocals, temp, 3); ++ return Address(temp, 0); ++} + -+ // Find array start -+ __ la(array, at_bcp(3 * BytesPerInt)); // btw: should be able to -+ // get rid of this -+ // instruction (change -+ // offsets below) -+ __ andi(array, array, -BytesPerInt); ++static inline Address laddress(Register r, Register temp, ++ InterpreterMacroAssembler* _masm) { ++ assert_cond(_masm != NULL); ++ _masm->shadd(temp, r, xlocals, temp, 3); ++ return Address(temp, Interpreter::local_offset_in_bytes(1));; ++} + -+ // Initialize i & j -+ __ mv(i, zr); // i = 0 -+ __ lwu(j, Address(array, -BytesPerInt)); // j = length(array) ++static inline Address faddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++ return iaddress(r, temp, _masm); ++} + -+ // Convert j into native byteordering -+ __ revb_w(j, j); ++static inline Address daddress(Register r, Register temp, ++ InterpreterMacroAssembler* _masm) { ++ return laddress(r, temp, _masm); ++} + -+ // And start -+ Label entry; -+ __ j(entry); ++static inline Address aaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++ return iaddress(r, temp, _masm); ++} + -+ // binary search loop -+ { -+ Label loop; -+ __ bind(loop); -+ __ addw(h, i, j); // h = i + j -+ __ srliw(h, h, 1); // h = (i + j) >> 1 -+ // if [key < array[h].fast_match()] -+ // then [j = h] -+ // else [i = h] -+ // Convert array[h].match to native byte-ordering before compare -+ __ shadd(temp, h, array, temp, 3); -+ __ ld(temp, Address(temp, 0)); -+ __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend ++static inline Address at_rsp() { ++ return Address(esp, 0); ++} + -+ Label L_done, L_greater; -+ __ bge(key, temp, L_greater); -+ // if [key < array[h].fast_match()] then j = h -+ __ mv(j, h); -+ __ j(L_done); -+ __ bind(L_greater); -+ // if [key >= array[h].fast_match()] then i = h -+ __ mv(i, h); -+ __ bind(L_done); ++// At top of Java expression stack which may be different than esp(). It ++// isn't for category 1 objects. ++static inline Address at_tos () { ++ return Address(esp, Interpreter::expr_offset_in_bytes(0)); ++} + -+ // while [i + 1 < j] -+ __ bind(entry); -+ __ addiw(h, i, 1); // i + 1 -+ __ blt(h, j, loop); // i + 1 < j -+ } ++static inline Address at_tos_p1() { ++ return Address(esp, Interpreter::expr_offset_in_bytes(1)); ++} + -+ // end of binary search, result index is i (must check again!) -+ Label default_case; -+ // Convert array[i].match to native byte-ordering before compare -+ __ shadd(temp, i, array, temp, 3); -+ __ ld(temp, Address(temp, 0)); -+ __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend -+ __ bne(key, temp, default_case); ++static inline Address at_tos_p2() { ++ return Address(esp, Interpreter::expr_offset_in_bytes(2)); ++} + -+ // entry found -> j = offset -+ __ shadd(temp, i, array, temp, 3); -+ __ lwu(j, Address(temp, BytesPerInt)); -+ __ profile_switch_case(i, key, array); -+ __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend ++static inline Address at_tos_p3() { ++ return Address(esp, Interpreter::expr_offset_in_bytes(3)); ++} + -+ __ add(temp, xbcp, j); -+ __ load_unsigned_byte(t0, Address(temp, 0)); ++static inline Address at_tos_p4() { ++ return Address(esp, Interpreter::expr_offset_in_bytes(4)); ++} + -+ __ add(xbcp, xbcp, j); -+ __ la(xbcp, Address(xbcp, 0)); -+ __ dispatch_only(vtos, /*generate_poll*/true); ++static inline Address at_tos_p5() { ++ return Address(esp, Interpreter::expr_offset_in_bytes(5)); ++} + -+ // default case -> j = default offset -+ __ bind(default_case); -+ __ profile_switch_default(i); -+ __ lwu(j, Address(array, -2 * BytesPerInt)); -+ __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend ++// Miscelaneous helper routines ++// Store an oop (or NULL) at the Address described by obj. ++// If val == noreg this means store a NULL ++static void do_oop_store(InterpreterMacroAssembler* _masm, ++ Address dst, ++ Register val, ++ DecoratorSet decorators) { ++ assert(val == noreg || val == x10, "parameter is just for looks"); ++ assert_cond(_masm != NULL); ++ __ store_heap_oop(dst, val, x29, x11, decorators); ++} + -+ __ add(temp, xbcp, j); -+ __ load_unsigned_byte(t0, Address(temp, 0)); ++static void do_oop_load(InterpreterMacroAssembler* _masm, ++ Address src, ++ Register dst, ++ DecoratorSet decorators) { ++ assert_cond(_masm != NULL); ++ __ load_heap_oop(dst, src, x7, x11, decorators); ++} + -+ __ add(xbcp, xbcp, j); -+ __ la(xbcp, Address(xbcp, 0)); -+ __ dispatch_only(vtos, /*generate_poll*/true); ++Address TemplateTable::at_bcp(int offset) { ++ assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); ++ return Address(xbcp, offset); +} + -+void TemplateTable::_return(TosState state) ++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, ++ Register temp_reg, bool load_bc_into_bc_reg/*=true*/, ++ int byte_no) +{ -+ transition(state, state); -+ assert(_desc->calls_vm(), -+ "inconsistent calls_vm information"); // call in remove_activation -+ -+ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { -+ assert(state == vtos, "only valid state"); -+ -+ __ ld(c_rarg1, aaddress(0)); -+ __ load_klass(x13, c_rarg1); -+ __ lwu(x13, Address(x13, Klass::access_flags_offset())); -+ Label skip_register_finalizer; -+ __ andi(t0, x13, JVM_ACC_HAS_FINALIZER); -+ __ beqz(t0, skip_register_finalizer); -+ -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1); ++ if (!RewriteBytecodes) { return; } ++ Label L_patch_done; + -+ __ bind(skip_register_finalizer); ++ switch (bc) { ++ case Bytecodes::_fast_aputfield: // fall through ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_dputfield: // fall through ++ case Bytecodes::_fast_fputfield: // fall through ++ case Bytecodes::_fast_iputfield: // fall through ++ case Bytecodes::_fast_lputfield: // fall through ++ case Bytecodes::_fast_sputfield: { ++ // We skip bytecode quickening for putfield instructions when ++ // the put_code written to the constant pool cache is zero. ++ // This is required so that every execution of this instruction ++ // calls out to InterpreterRuntime::resolve_get_put to do ++ // additional, required work. ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ assert(load_bc_into_bc_reg, "we use bc_reg as temp"); ++ __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1); ++ __ mv(bc_reg, bc); ++ __ beqz(temp_reg, L_patch_done); ++ break; ++ } ++ default: ++ assert(byte_no == -1, "sanity"); ++ // the pair bytecodes have already done the load. ++ if (load_bc_into_bc_reg) { ++ __ mv(bc_reg, bc); ++ } + } + -+ // Issue a StoreStore barrier after all stores but before return -+ // from any constructor for any class with a final field. We don't -+ // know if this is a finalizer, so we always do so. -+ if (_desc->bytecode() == Bytecodes::_return) { -+ __ membar(MacroAssembler::StoreStore); ++ if (JvmtiExport::can_post_breakpoint()) { ++ Label L_fast_patch; ++ // if a breakpoint is present we can't rewrite the stream directly ++ __ load_unsigned_byte(temp_reg, at_bcp(0)); ++ __ addi(temp_reg, temp_reg, -Bytecodes::_breakpoint); // temp_reg is temporary register. ++ __ bnez(temp_reg, L_fast_patch); ++ // Let breakpoint table handling rewrite to quicker bytecode ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), xmethod, xbcp, bc_reg); ++ __ j(L_patch_done); ++ __ bind(L_fast_patch); + } + -+ // Narrow result if state is itos but result type is smaller. -+ // Need to narrow in the return bytecode rather than in generate_return_entry -+ // since compiled code callers expect the result to already be narrowed. -+ if (state == itos) { -+ __ narrow(x10); -+ } ++#ifdef ASSERT ++ Label L_okay; ++ __ load_unsigned_byte(temp_reg, at_bcp(0)); ++ __ beq(temp_reg, bc_reg, L_okay); ++ __ addi(temp_reg, temp_reg, -(int) Bytecodes::java_code(bc)); ++ __ beqz(temp_reg, L_okay); ++ __ stop("patching the wrong bytecode"); ++ __ bind(L_okay); ++#endif + -+ __ remove_activation(state); -+ __ ret(); ++ // patch bytecode ++ __ sb(bc_reg, at_bcp(0)); ++ __ bind(L_patch_done); +} + ++// Individual instructions + -+// ---------------------------------------------------------------------------- -+// Volatile variables demand their effects be made known to all CPU's -+// in order. Store buffers on most chips allow reads & writes to -+// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode -+// without some kind of memory barrier (i.e., it's not sufficient that -+// the interpreter does not reorder volatile references, the hardware -+// also must not reorder them). -+// -+// According to the new Java Memory Model (JMM): -+// (1) All volatiles are serialized wrt to each other. ALSO reads & -+// writes act as aquire & release, so: -+// (2) A read cannot let unrelated NON-volatile memory refs that -+// happen after the read float up to before the read. It's OK for -+// non-volatile memory refs that happen before the volatile read to -+// float down below it. -+// (3) Similar a volatile write cannot let unrelated NON-volatile -+// memory refs that happen BEFORE the write float down to after the -+// write. It's OK for non-volatile memory refs that happen after the -+// volatile write to float up before it. -+// -+// We only put in barriers around volatile refs (they are expensive), -+// not _between_ memory refs (that would require us to track the -+// flavor of the previous memory refs). Requirements (2) and (3) -+// require some barriers before volatile stores and after volatile -+// loads. These nearly cover requirement (1) but miss the -+// volatile-store-volatile-load case. This final case is placed after -+// volatile-stores although it could just as well go before -+// volatile-loads. -+ -+void TemplateTable::resolve_cache_and_index(int byte_no, -+ Register Rcache, -+ Register index, -+ size_t index_size) { -+ const Register temp = x9; -+ assert_different_registers(Rcache, index, temp); -+ -+ Label resolved; -+ -+ Bytecodes::Code code = bytecode(); -+ switch (code) { -+ case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; -+ case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; -+ default: break; -+ } -+ -+ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); -+ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); -+ __ mv(t0, (int) code); -+ __ beq(temp, t0, resolved); -+ -+ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); -+ __ mv(temp, (int) code); -+ __ call_VM(noreg, entry, temp); -+ -+ // Update registers with resolved info -+ __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); -+ // n.b. unlike x86 Rcache is now rcpool plus the indexed offset -+ // so all clients ofthis method must be modified accordingly -+ __ bind(resolved); ++void TemplateTable::nop() { ++ transition(vtos, vtos); ++ // nothing to do +} + -+// The Rcache and index registers must be set before call -+// n.b unlike x86 cache already includes the index offset -+void TemplateTable::load_field_cp_cache_entry(Register obj, -+ Register cache, -+ Register index, -+ Register off, -+ Register flags, -+ bool is_static = false) { -+ assert_different_registers(cache, index, flags, off); -+ -+ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); -+ // Field offset -+ __ ld(off, Address(cache, in_bytes(cp_base_offset + -+ ConstantPoolCacheEntry::f2_offset()))); -+ // Flags -+ __ lwu(flags, Address(cache, in_bytes(cp_base_offset + -+ ConstantPoolCacheEntry::flags_offset()))); ++void TemplateTable::shouldnotreachhere() { ++ transition(vtos, vtos); ++ __ stop("should not reach here bytecode"); ++} + -+ // klass overwrite register -+ if (is_static) { -+ __ ld(obj, Address(cache, in_bytes(cp_base_offset + -+ ConstantPoolCacheEntry::f1_offset()))); -+ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); -+ __ ld(obj, Address(obj, mirror_offset)); -+ __ resolve_oop_handle(obj); -+ } ++void TemplateTable::aconst_null() ++{ ++ transition(vtos, atos); ++ __ mv(x10, zr); +} + -+void TemplateTable::load_invoke_cp_cache_entry(int byte_no, -+ Register method, -+ Register itable_index, -+ Register flags, -+ bool is_invokevirtual, -+ bool is_invokevfinal, /*unused*/ -+ bool is_invokedynamic) { -+ // setup registers -+ const Register cache = t1; -+ const Register index = x14; -+ assert_different_registers(method, flags); -+ assert_different_registers(method, cache, index); -+ assert_different_registers(itable_index, flags); -+ assert_different_registers(itable_index, cache, index); -+ // determine constant pool cache field offsets -+ assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant"); -+ const int method_offset = in_bytes(ConstantPoolCache::base_offset() + -+ (is_invokevirtual ? -+ ConstantPoolCacheEntry::f2_offset() : -+ ConstantPoolCacheEntry::f1_offset())); -+ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + -+ ConstantPoolCacheEntry::flags_offset()); -+ // access constant pool cache fields -+ const int index_offset = in_bytes(ConstantPoolCache::base_offset() + -+ ConstantPoolCacheEntry::f2_offset()); ++void TemplateTable::iconst(int value) ++{ ++ transition(vtos, itos); ++ __ li(x10, value); ++} + -+ const size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2)); -+ resolve_cache_and_index(byte_no, cache, index, index_size); -+ __ ld(method, Address(cache, method_offset)); ++void TemplateTable::lconst(int value) ++{ ++ transition(vtos, ltos); ++ __ li(x10, value); ++} + -+ if (itable_index != noreg) { -+ __ ld(itable_index, Address(cache, index_offset)); ++void TemplateTable::fconst(int value) ++{ ++ transition(vtos, ftos); ++ static float fBuf[2] = {1.0, 2.0}; ++ __ mv(t0, (intptr_t)fBuf); ++ switch (value) { ++ case 0: ++ __ fmv_w_x(f10, zr); ++ break; ++ case 1: ++ __ flw(f10, t0, 0); ++ break; ++ case 2: ++ __ flw(f10, t0, sizeof(float)); ++ break; ++ default: ++ ShouldNotReachHere(); + } -+ __ lwu(flags, Address(cache, flags_offset)); +} + -+// The registers cache and index expected to be set before call. -+// Correct values of the cache and index registers are preserved. -+void TemplateTable::jvmti_post_field_access(Register cache, Register index, -+ bool is_static, bool has_tos) { -+ // do the JVMTI work here to avoid disturbing the register state below -+ // We use c_rarg registers here beacause we want to use the register used in -+ // the call to the VM -+ if (JvmtiExport::can_post_field_access()) { -+ // Check to see if a field access watch has been set before we -+ // take the time to call into the VM. -+ Label L1; -+ assert_different_registers(cache, index, x10); -+ int32_t offset = 0; -+ __ la_patchable(t0, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), offset); -+ __ lwu(x10, Address(t0, offset)); -+ -+ __ beqz(x10, L1); -+ -+ __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1); -+ __ la(c_rarg2, Address(c_rarg2, in_bytes(ConstantPoolCache::base_offset()))); -+ -+ if (is_static) { -+ __ mv(c_rarg1, zr); // NULL object reference -+ } else { -+ __ ld(c_rarg1, at_tos()); // get object pointer without popping it -+ __ verify_oop(c_rarg1); -+ } -+ // c_rarg1: object pointer or NULL -+ // c_rarg2: cache entry pointer -+ // c_rarg3: jvalue object on the stack -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::post_field_access), -+ c_rarg1, c_rarg2, c_rarg3); -+ __ get_cache_and_index_at_bcp(cache, index, 1); -+ __ bind(L1); ++void TemplateTable::dconst(int value) ++{ ++ transition(vtos, dtos); ++ static double dBuf[2] = {1.0, 2.0}; ++ __ mv(t0, (intptr_t)dBuf); ++ switch (value) { ++ case 0: ++ __ fmv_d_x(f10, zr); ++ break; ++ case 1: ++ __ fld(f10, t0, 0); ++ break; ++ case 2: ++ __ fld(f10, t0, sizeof(double)); ++ break; ++ default: ++ ShouldNotReachHere(); + } +} + -+void TemplateTable::pop_and_check_object(Register r) ++void TemplateTable::bipush() +{ -+ __ pop_ptr(r); -+ __ null_check(r); // for field access must check obj. -+ __ verify_oop(r); ++ transition(vtos, itos); ++ __ load_signed_byte(x10, at_bcp(1)); +} + -+void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) ++void TemplateTable::sipush() +{ -+ const Register cache = x12; -+ const Register index = x13; -+ const Register obj = x14; -+ const Register off = x9; -+ const Register flags = x10; -+ const Register raw_flags = x16; -+ const Register bc = x14; // uses same reg as obj, so don't mix them -+ -+ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); -+ jvmti_post_field_access(cache, index, is_static, false); -+ load_field_cp_cache_entry(obj, cache, index, off, raw_flags, is_static); ++ transition(vtos, itos); ++ __ load_unsigned_short(x10, at_bcp(1)); ++ __ revb_w_w(x10, x10); ++ __ sraiw(x10, x10, 16); ++} + -+ if (!is_static) { -+ // obj is on the stack -+ pop_and_check_object(obj); -+ } ++void TemplateTable::ldc(bool wide) ++{ ++ transition(vtos, vtos); ++ Label call_ldc, notFloat, notClass, notInt, Done; + -+ if (!UseBarriersForVolatile) { -+ Label notVolatile; -+ __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::AnyAny); -+ __ bind(notVolatile); ++ if (wide) { ++ __ get_unsigned_2_byte_index_at_bcp(x11, 1); ++ } else { ++ __ load_unsigned_byte(x11, at_bcp(1)); + } ++ __ get_cpool_and_tags(x12, x10); + -+ __ add(off, obj, off); -+ const Address field(off); ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); + -+ Label Done, notByte, notBool, notInt, notShort, notChar, -+ notLong, notFloat, notObj, notDouble; ++ // get type ++ __ addi(x13, x11, tags_offset); ++ __ add(x13, x10, x13); ++ __ membar(MacroAssembler::AnyAny); ++ __ lbu(x13, Address(x13, 0)); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + -+ __ slli(flags, raw_flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + -+ ConstantPoolCacheEntry::tos_state_bits)); -+ __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); ++ // unresolved class - get the resolved class ++ __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClass); ++ __ beq(x13, t1, call_ldc); + -+ assert(btos == 0, "change code, btos != 0"); -+ __ bnez(flags, notByte); ++ // unresolved class in error state - call into runtime to throw the error ++ // from the first resolution attempt ++ __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClassInError); ++ __ beq(x13, t1, call_ldc); + -+ // Dont't rewrite getstatic, only getfield -+ if (is_static) { -+ rc = may_not_rewrite; -+ } ++ // resolved class - need to call vm to get java mirror of the class ++ __ mv(t1, (u1)JVM_CONSTANT_Class); ++ __ bne(x13, t1, notClass); + -+ // btos -+ __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg); -+ __ push(btos); -+ // Rewrite bytecode to be faster -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11); -+ } ++ __ bind(call_ldc); ++ __ mv(c_rarg1, wide); ++ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1); ++ __ push_ptr(x10); ++ __ verify_oop(x10); + __ j(Done); + -+ __ bind(notByte); -+ __ sub(t0, flags, (u1)ztos); -+ __ bnez(t0, notBool); ++ __ bind(notClass); ++ __ mv(t1, (u1)JVM_CONSTANT_Float); ++ __ bne(x13, t1, notFloat); + -+ // ztos (same code as btos) -+ __ access_load_at(T_BOOLEAN, IN_HEAP, x10, field, noreg, noreg); -+ __ push(ztos); -+ // Rewirte bytecode to be faster -+ if (rc == may_rewrite) { -+ // uses btos rewriting, no truncating to t/f bit is needed for getfield -+ patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11); -+ } ++ // ftos ++ __ shadd(x11, x11, x12, x11, 3); ++ __ flw(f10, Address(x11, base_offset)); ++ __ push_f(f10); + __ j(Done); + -+ __ bind(notBool); -+ __ sub(t0, flags, (u1)atos); -+ __ bnez(t0, notObj); -+ // atos -+ do_oop_load(_masm, field, x10, IN_HEAP); -+ __ push(atos); -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_agetfield, bc, x11); -+ } -+ __ j(Done); ++ __ bind(notFloat); ++ ++ __ mv(t1, (u1)JVM_CONSTANT_Integer); ++ __ bne(x13, t1, notInt); + -+ __ bind(notObj); -+ __ sub(t0, flags, (u1)itos); -+ __ bnez(t0, notInt); + // itos -+ __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg); -+ __ addw(x10, x10, zr); // signed extended -+ __ push(itos); -+ // Rewrite bytecode to be faster -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_igetfield, bc, x11); -+ } ++ __ shadd(x11, x11, x12, x11, 3); ++ __ lw(x10, Address(x11, base_offset)); ++ __ push_i(x10); + __ j(Done); + + __ bind(notInt); -+ __ sub(t0, flags, (u1)ctos); -+ __ bnez(t0, notChar); -+ // ctos -+ __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg); -+ __ push(ctos); -+ // Rewrite bytecode to be faster -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_cgetfield, bc, x11); -+ } -+ __ j(Done); ++ condy_helper(Done); + -+ __ bind(notChar); -+ __ sub(t0, flags, (u1)stos); -+ __ bnez(t0, notShort); -+ // stos -+ __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg); -+ __ push(stos); -+ // Rewrite bytecode to be faster -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_sgetfield, bc, x11); -+ } -+ __ j(Done); ++ __ bind(Done); ++} + -+ __ bind(notShort); -+ __ sub(t0, flags, (u1)ltos); -+ __ bnez(t0, notLong); -+ // ltos -+ __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg); -+ __ push(ltos); -+ // Rewrite bytecode to be faster -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_lgetfield, bc, x11); -+ } -+ __ j(Done); ++// Fast path for caching oop constants. ++void TemplateTable::fast_aldc(bool wide) ++{ ++ transition(vtos, atos); + -+ __ bind(notLong); -+ __ sub(t0, flags, (u1)ftos); -+ __ bnez(t0, notFloat); -+ // ftos -+ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); -+ __ push(ftos); -+ // Rewrite bytecode to be faster -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_fgetfield, bc, x11); -+ } -+ __ j(Done); ++ const Register result = x10; ++ const Register tmp = x11; ++ const Register rarg = x12; + -+ __ bind(notFloat); -+#ifdef ASSERT -+ __ sub(t0, flags, (u1)dtos); -+ __ bnez(t0, notDouble); -+#endif -+ // dtos -+ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* ftos */, field, noreg, noreg); -+ __ push(dtos); -+ // Rewrite bytecode to be faster -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_dgetfield, bc, x11); -+ } -+#ifdef ASSERT -+ __ j(Done); ++ const int index_size = wide ? sizeof(u2) : sizeof(u1); + -+ __ bind(notDouble); -+ __ stop("Bad state"); -+#endif ++ Label resolved; + -+ __ bind(Done); ++ // We are resolved if the resolved reference cache entry contains a ++ // non-null object (String, MethodType, etc.) ++ assert_different_registers(result, tmp); ++ __ get_cache_index_at_bcp(tmp, 1, index_size); ++ __ load_resolved_reference_at_index(result, tmp); ++ __ bnez(result, resolved); + -+ Label notVolatile; -+ __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -+ __ bind(notVolatile); ++ const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); ++ ++ // first time invocation - must resolve first ++ __ mv(rarg, (int)bytecode()); ++ __ call_VM(result, entry, rarg); ++ ++ __ bind(resolved); ++ ++ { // Check for the null sentinel. ++ // If we just called the VM, it already did the mapping for us, ++ // but it's harmless to retry. ++ Label notNull; ++ ++ // Stash null_sentinel address to get its value later ++ int32_t offset = 0; ++ __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset); ++ __ ld(tmp, Address(rarg, offset)); ++ __ resolve_oop_handle(tmp); ++ __ bne(result, tmp, notNull); ++ __ mv(result, zr); // NULL object reference ++ __ bind(notNull); ++ } ++ ++ if (VerifyOops) { ++ // Safe to call with 0 result ++ __ verify_oop(result); ++ } +} + -+void TemplateTable::getfield(int byte_no) ++void TemplateTable::ldc2_w() +{ -+ getfield_or_static(byte_no, false); -+} ++ transition(vtos, vtos); ++ Label notDouble, notLong, Done; ++ __ get_unsigned_2_byte_index_at_bcp(x10, 1); + -+void TemplateTable::nofast_getfield(int byte_no) { -+ getfield_or_static(byte_no, false, may_not_rewrite); ++ __ get_cpool_and_tags(x11, x12); ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type ++ __ add(x12, x12, x10); ++ __ load_unsigned_byte(x12, Address(x12, tags_offset)); ++ __ mv(t1, JVM_CONSTANT_Double); ++ __ bne(x12, t1, notDouble); ++ ++ // dtos ++ __ shadd(x12, x10, x11, x12, 3); ++ __ fld(f10, Address(x12, base_offset)); ++ __ push_d(f10); ++ __ j(Done); ++ ++ __ bind(notDouble); ++ __ mv(t1, (int)JVM_CONSTANT_Long); ++ __ bne(x12, t1, notLong); ++ ++ // ltos ++ __ shadd(x10, x10, x11, x10, 3); ++ __ ld(x10, Address(x10, base_offset)); ++ __ push_l(x10); ++ __ j(Done); ++ ++ __ bind(notLong); ++ condy_helper(Done); ++ __ bind(Done); +} + -+void TemplateTable::getstatic(int byte_no) ++void TemplateTable::condy_helper(Label& Done) +{ -+ getfield_or_static(byte_no, true); -+} ++ const Register obj = x10; ++ const Register rarg = x11; ++ const Register flags = x12; ++ const Register off = x13; + -+// The registers cache and index expected to be set before call. -+// The function may destroy various registers, just not the cache and index registers. -+void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { -+ transition(vtos, vtos); ++ const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); + -+ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ __ mv(rarg, (int) bytecode()); ++ __ call_VM(obj, entry, rarg); + -+ if (JvmtiExport::can_post_field_modification()) { -+ // Check to see if a field modification watch has been set before -+ // we take the time to call into the VM. -+ Label L1; -+ assert_different_registers(cache, index, x10); -+ int32_t offset = 0; -+ __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset); -+ __ lwu(x10, Address(t0, offset)); -+ __ beqz(x10, L1); ++ __ get_vm_result_2(flags, xthread); + -+ __ get_cache_and_index_at_bcp(c_rarg2, t0, 1); ++ // VMr = obj = base address to find primitive value to push ++ // VMr2 = flags = (tos, off) using format of CPCE::_flags ++ __ mv(off, flags); ++ __ mv(t0, ConstantPoolCacheEntry::field_index_mask); ++ __ andrw(off, off, t0); + -+ if (is_static) { -+ // Life is simple. Null out the object pointer. -+ __ mv(c_rarg1, zr); -+ } else { -+ // Life is harder. The stack holds the value on top, followed by -+ // the object. We don't know the size of the value, though; it -+ // could be one or two words depending on its type. As a result, -+ // we must find the type to determine where the object is. -+ __ lwu(c_rarg3, Address(c_rarg2, -+ in_bytes(cp_base_offset + -+ ConstantPoolCacheEntry::flags_offset()))); -+ __ srli(c_rarg3, c_rarg3, ConstantPoolCacheEntry::tos_state_shift); -+ ConstantPoolCacheEntry::verify_tos_state_shift(); -+ Label nope2, done, ok; -+ __ ld(c_rarg1, at_tos_p1()); // initially assume a one word jvalue -+ __ sub(t0, c_rarg3, ltos); -+ __ beqz(t0, ok); -+ __ sub(t0, c_rarg3, dtos); -+ __ bnez(t0, nope2); -+ __ bind(ok); -+ __ ld(c_rarg1, at_tos_p2()); // ltos (two word jvalue); -+ __ bind(nope2); -+ } -+ // cache entry pointer -+ __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset)); -+ // object (tos) -+ __ mv(c_rarg3, esp); -+ // c_rarg1: object pointer set up above (NULL if static) -+ // c_rarg2: cache entry pointer -+ // c_rarg3: jvalue object on the stack -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::post_field_modification), -+ c_rarg1, c_rarg2, c_rarg3); -+ __ get_cache_and_index_at_bcp(cache, index, 1); -+ __ bind(L1); -+ } -+} -+ -+void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { -+ transition(vtos, vtos); -+ -+ const Register cache = x12; -+ const Register index = x13; -+ const Register obj = x12; -+ const Register off = x9; -+ const Register flags = x10; -+ const Register bc = x14; -+ -+ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); -+ jvmti_post_field_mod(cache, index, is_static); -+ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); -+ -+ Label Done; -+ __ mv(x15, flags); ++ __ add(off, obj, off); ++ const Address field(off, 0); // base + R---->base + offset + -+ { -+ Label notVolatile; -+ __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore); -+ __ bind(notVolatile); -+ } ++ __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits)); ++ __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> flags:0~3 + -+ Label notByte, notBool, notInt, notShort, notChar, -+ notLong, notFloat, notObj, notDouble; ++ switch (bytecode()) { ++ case Bytecodes::_ldc: // fall through ++ case Bytecodes::_ldc_w: { ++ // tos in (itos, ftos, stos, btos, ctos, ztos) ++ Label notInt, notFloat, notShort, notByte, notChar, notBool; ++ __ mv(t1, itos); ++ __ bne(flags, t1, notInt); ++ // itos ++ __ lw(x10, field); ++ __ push(itos); ++ __ j(Done); + -+ __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + -+ ConstantPoolCacheEntry::tos_state_bits)); -+ __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); ++ __ bind(notInt); ++ __ mv(t1, ftos); ++ __ bne(flags, t1, notFloat); ++ // ftos ++ __ load_float(field); ++ __ push(ftos); ++ __ j(Done); + -+ assert(btos == 0, "change code, btos != 0"); -+ __ bnez(flags, notByte); ++ __ bind(notFloat); ++ __ mv(t1, stos); ++ __ bne(flags, t1, notShort); ++ // stos ++ __ load_signed_short(x10, field); ++ __ push(stos); ++ __ j(Done); + -+ // Don't rewrite putstatic, only putfield -+ if (is_static) { -+ rc = may_not_rewrite; -+ } ++ __ bind(notShort); ++ __ mv(t1, btos); ++ __ bne(flags, t1, notByte); ++ // btos ++ __ load_signed_byte(x10, field); ++ __ push(btos); ++ __ j(Done); + -+ // btos -+ { -+ __ pop(btos); -+ // field address -+ if (!is_static) { -+ pop_and_check_object(obj); -+ } -+ __ add(off, obj, off); // if static, obj from cache, else obj from stack. -+ const Address field(off, 0); // off register as temparator register. -+ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg, noreg); -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_bputfield, bc, x11, true, byte_no); -+ } -+ __ j(Done); -+ } ++ __ bind(notByte); ++ __ mv(t1, ctos); ++ __ bne(flags, t1, notChar); ++ // ctos ++ __ load_unsigned_short(x10, field); ++ __ push(ctos); ++ __ j(Done); + -+ __ bind(notByte); -+ __ sub(t0, flags, (u1)ztos); -+ __ bnez(t0, notBool); ++ __ bind(notChar); ++ __ mv(t1, ztos); ++ __ bne(flags, t1, notBool); ++ // ztos ++ __ load_signed_byte(x10, field); ++ __ push(ztos); ++ __ j(Done); + -+ // ztos -+ { -+ __ pop(ztos); -+ // field address -+ if (!is_static) { -+ pop_and_check_object(obj); -+ } -+ __ add(off, obj, off); // if static, obj from cache, else obj from stack. -+ const Address field(off, 0); -+ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg, noreg); -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_zputfield, bc, x11, true, byte_no); ++ __ bind(notBool); ++ break; + } -+ __ j(Done); -+ } -+ -+ __ bind(notBool); -+ __ sub(t0, flags, (u1)atos); -+ __ bnez(t0, notObj); + -+ // atos -+ { -+ __ pop(atos); -+ // field address -+ if (!is_static) { -+ pop_and_check_object(obj); -+ } -+ __ add(off, obj, off); // if static, obj from cache, else obj from stack. -+ const Address field(off, 0); -+ // Store into the field -+ do_oop_store(_masm, field, x10, IN_HEAP); -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_aputfield, bc, x11, true, byte_no); -+ } -+ __ j(Done); -+ } ++ case Bytecodes::_ldc2_w: { ++ Label notLong, notDouble; ++ __ mv(t1, ltos); ++ __ bne(flags, t1, notLong); ++ // ltos ++ __ ld(x10, field); ++ __ push(ltos); ++ __ j(Done); + -+ __ bind(notObj); -+ __ sub(t0, flags, (u1)itos); -+ __ bnez(t0, notInt); ++ __ bind(notLong); ++ __ mv(t1, dtos); ++ __ bne(flags, t1, notDouble); ++ // dtos ++ __ load_double(field); ++ __ push(dtos); ++ __ j(Done); + -+ // itos -+ { -+ __ pop(itos); -+ // field address -+ if (!is_static) { -+ pop_and_check_object(obj); -+ } -+ __ add(off, obj, off); // if static, obj from cache, else obj from stack. -+ const Address field(off, 0); -+ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg, noreg); -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_iputfield, bc, x11, true, byte_no); ++ __ bind(notDouble); ++ break; + } -+ __ j(Done); -+ } + -+ __ bind(notInt); -+ __ sub(t0, flags, (u1)ctos); -+ __ bnez(t0, notChar); -+ -+ // ctos -+ { -+ __ pop(ctos); -+ // field address -+ if (!is_static) { -+ pop_and_check_object(obj); -+ } -+ __ add(off, obj, off); // if static, obj from cache, else obj from stack. -+ const Address field(off, 0); -+ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg, noreg); -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_cputfield, bc, x11, true, byte_no); -+ } -+ __ j(Done); ++ default: ++ ShouldNotReachHere(); + } + -+ __ bind(notChar); -+ __ sub(t0, flags, (u1)stos); -+ __ bnez(t0, notShort); ++ __ stop("bad ldc/condy"); ++} + -+ // stos -+ { -+ __ pop(stos); -+ // field address -+ if (!is_static) { -+ pop_and_check_object(obj); -+ } -+ __ add(off, obj, off); // if static, obj from cache, else obj from stack. -+ const Address field(off, 0); -+ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg, noreg); -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_sputfield, bc, x11, true, byte_no); -+ } -+ __ j(Done); -+ } ++void TemplateTable::locals_index(Register reg, int offset) ++{ ++ __ lbu(reg, at_bcp(offset)); ++ __ neg(reg, reg); ++} + -+ __ bind(notShort); -+ __ sub(t0, flags, (u1)ltos); -+ __ bnez(t0, notLong); ++void TemplateTable::iload() { ++ iload_internal(); ++} + -+ // ltos -+ { -+ __ pop(ltos); -+ // field address -+ if (!is_static) { -+ pop_and_check_object(obj); -+ } -+ __ add(off, obj, off); // if static, obj from cache, else obj from stack. -+ const Address field(off, 0); -+ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg); -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_lputfield, bc, x11, true, byte_no); -+ } -+ __ j(Done); -+ } ++void TemplateTable::nofast_iload() { ++ iload_internal(may_not_rewrite); ++} + -+ __ bind(notLong); -+ __ sub(t0, flags, (u1)ftos); -+ __ bnez(t0, notFloat); ++void TemplateTable::iload_internal(RewriteControl rc) { ++ transition(vtos, itos); ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ const Register bc = x14; + -+ // ftos -+ { -+ __ pop(ftos); -+ // field address -+ if (!is_static) { -+ pop_and_check_object(obj); -+ } -+ __ add(off, obj, off); // if static, obj from cache, else obj from stack. -+ const Address field(off, 0); -+ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg, noreg); -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_fputfield, bc, x11, true, byte_no); -+ } -+ __ j(Done); -+ } ++ // get next bytecode ++ __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); + -+ __ bind(notFloat); -+#ifdef ASSERT -+ __ sub(t0, flags, (u1)dtos); -+ __ bnez(t0, notDouble); -+#endif ++ // if _iload, wait to rewrite to iload2. We only want to rewrite the ++ // last two iloads in a pair. Comparing against fast_iload means that ++ // the next bytecode is neither an iload or a caload, and therefore ++ // an iload pair. ++ __ mv(t1, Bytecodes::_iload); ++ __ beq(x11, t1, done); + -+ // dtos -+ { -+ __ pop(dtos); -+ // field address -+ if (!is_static) { -+ pop_and_check_object(obj); -+ } -+ __ add(off, obj, off); // if static, obj from cache, else obj from stack. -+ const Address field(off, 0); -+ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg, noreg); -+ if (rc == may_rewrite) { -+ patch_bytecode(Bytecodes::_fast_dputfield, bc, x11, true, byte_no); -+ } -+ } ++ // if _fast_iload rewrite to _fast_iload2 ++ __ mv(t1, Bytecodes::_fast_iload); ++ __ mv(bc, Bytecodes::_fast_iload2); ++ __ beq(x11, t1, rewrite); + -+#ifdef ASSERT -+ __ j(Done); ++ // if _caload rewrite to _fast_icaload ++ __ mv(t1, Bytecodes::_caload); ++ __ mv(bc, Bytecodes::_fast_icaload); ++ __ beq(x11, t1, rewrite); + -+ __ bind(notDouble); -+ __ stop("Bad state"); -+#endif ++ // else rewrite to _fast_iload ++ __ mv(bc, Bytecodes::_fast_iload); + -+ __ bind(Done); ++ // rewrite ++ // bc: new bytecode ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_iload, bc, x11, false); ++ __ bind(done); + -+ { -+ Label notVolatile; -+ __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore); -+ __ bind(notVolatile); + } ++ ++ // do iload, get the local value into tos ++ locals_index(x11); ++ __ lw(x10, iaddress(x11, x10, _masm)); +} + -+void TemplateTable::putfield(int byte_no) ++void TemplateTable::fast_iload2() +{ -+ putfield_or_static(byte_no, false); ++ transition(vtos, itos); ++ locals_index(x11); ++ __ lw(x10, iaddress(x11, x10, _masm)); ++ __ push(itos); ++ locals_index(x11, 3); ++ __ lw(x10, iaddress(x11, x10, _masm)); +} + -+void TemplateTable::nofast_putfield(int byte_no) { -+ putfield_or_static(byte_no, false, may_not_rewrite); ++void TemplateTable::fast_iload() ++{ ++ transition(vtos, itos); ++ locals_index(x11); ++ __ lw(x10, iaddress(x11, x10, _masm)); +} + -+void TemplateTable::putstatic(int byte_no) { -+ putfield_or_static(byte_no, true); ++void TemplateTable::lload() ++{ ++ transition(vtos, ltos); ++ __ lbu(x11, at_bcp(1)); ++ __ slli(x11, x11, LogBytesPerWord); ++ __ sub(x11, xlocals, x11); ++ __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1))); +} + -+void TemplateTable::jvmti_post_fast_field_mod() ++void TemplateTable::fload() +{ -+ if (JvmtiExport::can_post_field_modification()) { -+ // Check to see if a field modification watch has been set before -+ // we take the time to call into the VM. -+ Label L2; -+ int32_t offset = 0; -+ __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset); -+ __ lwu(c_rarg3, Address(t0, offset)); -+ __ beqz(c_rarg3, L2); -+ __ pop_ptr(x9); // copy the object pointer from tos -+ __ verify_oop(x9); -+ __ push_ptr(x9); // put the object pointer back on tos -+ // Save tos values before call_VM() clobbers them. Since we have -+ // to do it for every data type, we use the saved values as the -+ // jvalue object. -+ switch (bytecode()) { // load values into the jvalue object -+ case Bytecodes::_fast_aputfield: __ push_ptr(x10); break; -+ case Bytecodes::_fast_bputfield: // fall through -+ case Bytecodes::_fast_zputfield: // fall through -+ case Bytecodes::_fast_sputfield: // fall through -+ case Bytecodes::_fast_cputfield: // fall through -+ case Bytecodes::_fast_iputfield: __ push_i(x10); break; -+ case Bytecodes::_fast_dputfield: __ push_d(); break; -+ case Bytecodes::_fast_fputfield: __ push_f(); break; -+ case Bytecodes::_fast_lputfield: __ push_l(x10); break; -+ -+ default: -+ ShouldNotReachHere(); -+ } -+ __ mv(c_rarg3, esp); // points to jvalue on the stack -+ // access constant pool cache entry -+ __ get_cache_entry_pointer_at_bcp(c_rarg2, x10, 1); -+ __ verify_oop(x9); -+ // x9: object pointer copied above -+ // c_rarg2: cache entry pointer -+ // c_rarg3: jvalue object on the stack -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::post_field_modification), -+ x9, c_rarg2, c_rarg3); -+ -+ switch (bytecode()) { // restore tos values -+ case Bytecodes::_fast_aputfield: __ pop_ptr(x10); break; -+ case Bytecodes::_fast_bputfield: // fall through -+ case Bytecodes::_fast_zputfield: // fall through -+ case Bytecodes::_fast_sputfield: // fall through -+ case Bytecodes::_fast_cputfield: // fall through -+ case Bytecodes::_fast_iputfield: __ pop_i(x10); break; -+ case Bytecodes::_fast_dputfield: __ pop_d(); break; -+ case Bytecodes::_fast_fputfield: __ pop_f(); break; -+ case Bytecodes::_fast_lputfield: __ pop_l(x10); break; -+ default: break; -+ } -+ __ bind(L2); -+ } ++ transition(vtos, ftos); ++ locals_index(x11); ++ __ flw(f10, faddress(x11, t0, _masm)); +} + -+void TemplateTable::fast_storefield(TosState state) ++void TemplateTable::dload() +{ -+ transition(state, vtos); ++ transition(vtos, dtos); ++ __ lbu(x11, at_bcp(1)); ++ __ slli(x11, x11, LogBytesPerWord); ++ __ sub(x11, xlocals, x11); ++ __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1))); ++} + -+ ByteSize base = ConstantPoolCache::base_offset(); ++void TemplateTable::aload() ++{ ++ transition(vtos, atos); ++ locals_index(x11); ++ __ ld(x10, iaddress(x11, x10, _masm)); + -+ jvmti_post_fast_field_mod(); ++} + -+ // access constant pool cache -+ __ get_cache_and_index_at_bcp(x12, x11, 1); ++void TemplateTable::locals_index_wide(Register reg) { ++ __ lhu(reg, at_bcp(2)); ++ __ revb_h_h_u(reg, reg); // reverse bytes in half-word and zero-extend ++ __ neg(reg, reg); ++} + -+ // Must prevent reordering of the following cp cache loads with bytecode load -+ __ membar(MacroAssembler::LoadLoad); ++void TemplateTable::wide_iload() { ++ transition(vtos, itos); ++ locals_index_wide(x11); ++ __ lw(x10, iaddress(x11, t0, _masm)); ++} + -+ // test for volatile with x13 -+ __ lwu(x13, Address(x12, in_bytes(base + -+ ConstantPoolCacheEntry::flags_offset()))); ++void TemplateTable::wide_lload() ++{ ++ transition(vtos, ltos); ++ __ lhu(x11, at_bcp(2)); ++ __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend ++ __ slli(x11, x11, LogBytesPerWord); ++ __ sub(x11, xlocals, x11); ++ __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1))); ++} + -+ // replace index with field offset from cache entry -+ __ ld(x11, Address(x12, in_bytes(base + ConstantPoolCacheEntry::f2_offset()))); ++void TemplateTable::wide_fload() ++{ ++ transition(vtos, ftos); ++ locals_index_wide(x11); ++ __ flw(f10, faddress(x11, t0, _masm)); ++} + -+ { -+ Label notVolatile; -+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore); -+ __ bind(notVolatile); -+ } ++void TemplateTable::wide_dload() ++{ ++ transition(vtos, dtos); ++ __ lhu(x11, at_bcp(2)); ++ __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend ++ __ slli(x11, x11, LogBytesPerWord); ++ __ sub(x11, xlocals, x11); ++ __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1))); ++} + -+ // Get object from stack -+ pop_and_check_object(x12); ++void TemplateTable::wide_aload() ++{ ++ transition(vtos, atos); ++ locals_index_wide(x11); ++ __ ld(x10, aaddress(x11, t0, _masm)); ++} + -+ // field address -+ __ add(x11, x12, x11); -+ const Address field(x11, 0); -+ -+ // access field -+ switch (bytecode()) { -+ case Bytecodes::_fast_aputfield: -+ do_oop_store(_masm, field, x10, IN_HEAP); -+ break; -+ case Bytecodes::_fast_lputfield: -+ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg); -+ break; -+ case Bytecodes::_fast_iputfield: -+ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg, noreg); -+ break; -+ case Bytecodes::_fast_zputfield: -+ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg, noreg); -+ break; -+ case Bytecodes::_fast_bputfield: -+ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg, noreg); -+ break; -+ case Bytecodes::_fast_sputfield: -+ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg, noreg); -+ break; -+ case Bytecodes::_fast_cputfield: -+ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg, noreg); -+ break; -+ case Bytecodes::_fast_fputfield: -+ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg, noreg); -+ break; -+ case Bytecodes::_fast_dputfield: -+ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg, noreg); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+ -+ { -+ Label notVolatile; -+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore); -+ __ bind(notVolatile); ++void TemplateTable::index_check(Register array, Register index) ++{ ++ // destroys x11, t0 ++ // check array ++ __ null_check(array, arrayOopDesc::length_offset_in_bytes()); ++ // sign extend index for use by indexed load ++ // check index ++ const Register length = t0; ++ __ lwu(length, Address(array, arrayOopDesc::length_offset_in_bytes())); ++ if (index != x11) { ++ assert(x11 != array, "different registers"); ++ __ mv(x11, index); + } ++ Label ok; ++ __ addw(index, index, zr); ++ __ bltu(index, length, ok); ++ __ mv(x13, array); ++ __ mv(t0, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); ++ __ jr(t0); ++ __ bind(ok); +} + -+void TemplateTable::fast_accessfield(TosState state) ++void TemplateTable::iaload() +{ -+ transition(atos, state); -+ // Do the JVMTI work here to avoid disturbing the register state below -+ if (JvmtiExport::can_post_field_access()) { -+ // Check to see if a field access watch has been set before we -+ // take the time to call into the VM. -+ Label L1; -+ int32_t offset = 0; -+ __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_access_count_addr()), offset); -+ __ lwu(x12, Address(t0, offset)); -+ __ beqz(x12, L1); -+ // access constant pool cache entry -+ __ get_cache_entry_pointer_at_bcp(c_rarg2, t1, 1); -+ __ verify_oop(x10); -+ __ push_ptr(x10); // save object pointer before call_VM() clobbers it -+ __ mv(c_rarg1, x10); -+ // c_rarg1: object pointer copied above -+ // c_rarg2: cache entry pointer -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::post_field_access), -+ c_rarg1, c_rarg2); -+ __ pop_ptr(x10); // restore object pointer -+ __ bind(L1); -+ } -+ -+ // access constant pool cache -+ __ get_cache_and_index_at_bcp(x12, x11, 1); -+ -+ // Must prevent reordering of the following cp cache loads with bytecode load -+ __ membar(MacroAssembler::LoadLoad); -+ -+ __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() + -+ ConstantPoolCacheEntry::f2_offset()))); -+ __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() + -+ ConstantPoolCacheEntry::flags_offset()))); -+ -+ // x10: object -+ __ verify_oop(x10); -+ __ null_check(x10); -+ __ add(x11, x10, x11); -+ const Address field(x11, 0); ++ transition(itos, itos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); ++ __ shadd(x10, x11, x10, t0, 2); ++ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); ++ __ addw(x10, x10, zr); // signed extended ++} + -+ if (!UseBarriersForVolatile) { -+ Label notVolatile; -+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::AnyAny); -+ __ bind(notVolatile); -+ } ++void TemplateTable::laload() ++{ ++ transition(itos, ltos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); ++ __ shadd(x10, x11, x10, t0, 3); ++ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); ++} + -+ // access field -+ switch (bytecode()) { -+ case Bytecodes::_fast_agetfield: -+ do_oop_load(_masm, field, x10, IN_HEAP); -+ __ verify_oop(x10); -+ break; -+ case Bytecodes::_fast_lgetfield: -+ __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg); -+ break; -+ case Bytecodes::_fast_igetfield: -+ __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg); -+ __ addw(x10, x10, zr); // signed extended -+ break; -+ case Bytecodes::_fast_bgetfield: -+ __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg); -+ break; -+ case Bytecodes::_fast_sgetfield: -+ __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg); -+ break; -+ case Bytecodes::_fast_cgetfield: -+ __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg); -+ break; -+ case Bytecodes::_fast_fgetfield: -+ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); -+ break; -+ case Bytecodes::_fast_dgetfield: -+ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+ { -+ Label notVolatile; -+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -+ __ bind(notVolatile); -+ } ++void TemplateTable::faload() ++{ ++ transition(itos, ftos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); ++ __ shadd(x10, x11, x10, t0, 2); ++ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + -+void TemplateTable::fast_xaccess(TosState state) ++void TemplateTable::daload() +{ -+ transition(vtos, state); ++ transition(itos, dtos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); ++ __ shadd(x10, x11, x10, t0, 3); ++ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); ++} + -+ // get receiver -+ __ ld(x10, aaddress(0)); -+ // access constant pool cache -+ __ get_cache_and_index_at_bcp(x12, x13, 2); -+ __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() + -+ ConstantPoolCacheEntry::f2_offset()))); ++void TemplateTable::aaload() ++{ ++ transition(itos, atos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); ++ __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop); ++ do_oop_load(_masm, ++ Address(x10), ++ x10, ++ IS_ARRAY); ++} + -+ if (!UseBarriersForVolatile) { -+ Label notVolatile; -+ __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() + -+ ConstantPoolCacheEntry::flags_offset()))); -+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::AnyAny); -+ __ bind(notVolatile); -+ } ++void TemplateTable::baload() ++{ ++ transition(itos, itos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); ++ __ shadd(x10, x11, x10, t0, 0); ++ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); ++} + -+ // make sure exception is reported in correct bcp range (getfield is -+ // next instruction) -+ __ addi(xbcp, xbcp, 1); -+ __ null_check(x10); -+ switch (state) { -+ case itos: -+ __ add(x10, x10, x11); -+ __ access_load_at(T_INT, IN_HEAP, x10, Address(x10, 0), noreg, noreg); -+ __ addw(x10, x10, zr); // signed extended -+ break; -+ case atos: -+ __ add(x10, x10, x11); -+ do_oop_load(_masm, Address(x10, 0), x10, IN_HEAP); -+ __ verify_oop(x10); -+ break; -+ case ftos: -+ __ add(t0, x10, x11); -+ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(t0), noreg, noreg); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } ++void TemplateTable::caload() ++{ ++ transition(itos, itos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); ++ __ shadd(x10, x11, x10, t0, 1); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); ++} + -+ { -+ Label notVolatile; -+ __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() + -+ ConstantPoolCacheEntry::flags_offset()))); -+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -+ __ bind(notVolatile); -+ } ++// iload followed by caload frequent pair ++void TemplateTable::fast_icaload() ++{ ++ transition(vtos, itos); ++ // load index out of locals ++ locals_index(x12); ++ __ lw(x11, iaddress(x12, x11, _masm)); ++ __ pop_ptr(x10); + -+ __ sub(xbcp, xbcp, 1); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11, kills t0 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11 ++ __ shadd(x10, x11, x10, t0, 1); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + -+//----------------------------------------------------------------------------- -+// Calls -+ -+void TemplateTable::count_calls(Register method, Register temp) ++void TemplateTable::saload() +{ -+ __ call_Unimplemented(); ++ transition(itos, itos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11, kills t0 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1); ++ __ shadd(x10, x11, x10, t0, 1); ++ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + -+void TemplateTable::prepare_invoke(int byte_no, -+ Register method, // linked method (or i-klass) -+ Register index, // itable index, MethodType, etc. -+ Register recv, // if caller wants to see it -+ Register flags // if caller wants to test it -+ ) { -+ // determine flags -+ const Bytecodes::Code code = bytecode(); -+ const bool is_invokeinterface = code == Bytecodes::_invokeinterface; -+ const bool is_invokedynamic = code == Bytecodes::_invokedynamic; -+ const bool is_invokehandle = code == Bytecodes::_invokehandle; -+ const bool is_invokevirtual = code == Bytecodes::_invokevirtual; -+ const bool is_invokespecial = code == Bytecodes::_invokespecial; -+ const bool load_receiver = (recv != noreg); -+ const bool save_flags = (flags != noreg); -+ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), ""); -+ assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); -+ assert(flags == noreg || flags == x13, ""); -+ assert(recv == noreg || recv == x12, ""); -+ -+ // setup registers & access constant pool cache -+ if (recv == noreg) { -+ recv = x12; -+ } -+ if (flags == noreg) { -+ flags = x13; -+ } -+ assert_different_registers(method, index, recv, flags); ++void TemplateTable::iload(int n) ++{ ++ transition(vtos, itos); ++ __ lw(x10, iaddress(n)); ++} + -+ // save 'interpreter return address' -+ __ save_bcp(); ++void TemplateTable::lload(int n) ++{ ++ transition(vtos, ltos); ++ __ ld(x10, laddress(n)); ++} + -+ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); ++void TemplateTable::fload(int n) ++{ ++ transition(vtos, ftos); ++ __ flw(f10, faddress(n)); ++} + -+ // maybe push appendix to arguments (just before return address) -+ if (is_invokedynamic || is_invokehandle) { -+ Label L_no_push; -+ __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::has_appendix_shift); -+ __ beqz(t0, L_no_push); -+ // Push the appendix as a trailing parameter. -+ // This must be done before we get the receiver, -+ // since the parameter_size includes it. -+ __ push_reg(x9); -+ __ mv(x9, index); -+ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); -+ __ load_resolved_reference_at_index(index, x9); -+ __ pop_reg(x9); -+ __ push_reg(index); // push appendix (MethodType, CallSite, etc.) -+ __ bind(L_no_push); -+ } ++void TemplateTable::dload(int n) ++{ ++ transition(vtos, dtos); ++ __ fld(f10, daddress(n)); ++} + -+ // load receiver if needed (note: no return address pushed yet) -+ if (load_receiver) { -+ __ andi(recv, flags, ConstantPoolCacheEntry::parameter_size_mask); // parameter_size_mask = 1 << 8 -+ __ shadd(t0, recv, esp, t0, 3); -+ __ ld(recv, Address(t0, -Interpreter::expr_offset_in_bytes(1))); -+ __ verify_oop(recv); -+ } ++void TemplateTable::aload(int n) ++{ ++ transition(vtos, atos); ++ __ ld(x10, iaddress(n)); ++} + -+ // compute return type -+ __ slli(t1, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits)); -+ __ srli(t1, t1, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> t1:0~3 ++void TemplateTable::aload_0() { ++ aload_0_internal(); ++} + -+ // load return address -+ { -+ const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code); -+ __ mv(t0, table_addr); -+ __ shadd(t0, t1, t0, t1, 3); -+ __ ld(ra, Address(t0, 0)); -+ } ++void TemplateTable::nofast_aload_0() { ++ aload_0_internal(may_not_rewrite); +} + -+void TemplateTable::invokevirtual_helper(Register index, -+ Register recv, -+ Register flags) -+{ -+ // Uses temporary registers x10, x13 -+ assert_different_registers(index, recv, x10, x13); -+ // Test for an invoke of a final method -+ Label notFinal; -+ __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::is_vfinal_shift); -+ __ beqz(t0, notFinal); ++void TemplateTable::aload_0_internal(RewriteControl rc) { ++ // According to bytecode histograms, the pairs: ++ // ++ // _aload_0, _fast_igetfield ++ // _aload_0, _fast_agetfield ++ // _aload_0, _fast_fgetfield ++ // ++ // occur frequently. If RewriteFrequentPairs is set, the (slow) ++ // _aload_0 bytecode checks if the next bytecode is either ++ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then ++ // rewrites the current bytecode into a pair bytecode; otherwise it ++ // rewrites the current bytecode into _fast_aload_0 that doesn't do ++ // the pair check anymore. ++ // ++ // Note: If the next bytecode is _getfield, the rewrite must be ++ // delayed, otherwise we may miss an opportunity for a pair. ++ // ++ // Also rewrite frequent pairs ++ // aload_0, aload_1 ++ // aload_0, iload_1 ++ // These bytecodes with a small amount of code are most profitable ++ // to rewrite ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ const Register bc = x14; + -+ const Register method = index; // method must be xmethod -+ assert(method == xmethod, "methodOop must be xmethod for interpreter calling convention"); ++ // get next bytecode ++ __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); + -+ // do the call - the index is actually the method to call -+ // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method* ++ // if _getfield then wait with rewrite ++ __ mv(t1, Bytecodes::Bytecodes::_getfield); ++ __ beq(x11, t1, done); + -+ // It's final, need a null check here! -+ __ null_check(recv); ++ // if _igetfield then rewrite to _fast_iaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); ++ __ mv(t1, Bytecodes::_fast_igetfield); ++ __ mv(bc, Bytecodes::_fast_iaccess_0); ++ __ beq(x11, t1, rewrite); + -+ // profile this call -+ __ profile_final_call(x10); -+ __ profile_arguments_type(x10, method, x14, true); ++ // if _agetfield then rewrite to _fast_aaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); ++ __ mv(t1, Bytecodes::_fast_agetfield); ++ __ mv(bc, Bytecodes::_fast_aaccess_0); ++ __ beq(x11, t1, rewrite); + -+ __ jump_from_interpreted(method); ++ // if _fgetfield then rewrite to _fast_faccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); ++ __ mv(t1, Bytecodes::_fast_fgetfield); ++ __ mv(bc, Bytecodes::_fast_faccess_0); ++ __ beq(x11, t1, rewrite); + -+ __ bind(notFinal); ++ // else rewrite to _fast_aload0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition"); ++ __ mv(bc, Bytecodes::Bytecodes::_fast_aload_0); + -+ // get receiver klass -+ __ null_check(recv, oopDesc::klass_offset_in_bytes()); -+ __ load_klass(x10, recv); ++ // rewrite ++ // bc: new bytecode ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_aload_0, bc, x11, false); + -+ // profile this call -+ __ profile_virtual_call(x10, xlocals, x13); ++ __ bind(done); ++ } + -+ // get target methodOop & entry point -+ __ lookup_virtual_method(x10, index, method); -+ __ profile_arguments_type(x13, method, x14, true); -+ __ jump_from_interpreted(method); ++ // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop). ++ aload(0); +} + -+void TemplateTable::invokevirtual(int byte_no) ++void TemplateTable::istore() +{ -+ transition(vtos, vtos); -+ assert(byte_no == f2_byte, "use this argument"); ++ transition(itos, vtos); ++ locals_index(x11); ++ __ sw(x10, iaddress(x11, t0, _masm)); ++} + -+ prepare_invoke(byte_no, xmethod, noreg, x12, x13); ++void TemplateTable::lstore() ++{ ++ transition(ltos, vtos); ++ locals_index(x11); ++ __ sd(x10, laddress(x11, t0, _masm)); ++} + -+ // xmethod: index (actually a Method*) -+ // x12: receiver -+ // x13: flags ++void TemplateTable::fstore() { ++ transition(ftos, vtos); ++ locals_index(x11); ++ __ fsw(f10, iaddress(x11, t0, _masm)); ++} + -+ invokevirtual_helper(xmethod, x12, x13); ++void TemplateTable::dstore() { ++ transition(dtos, vtos); ++ locals_index(x11); ++ __ fsd(f10, daddress(x11, t0, _masm)); +} + -+void TemplateTable::invokespecial(int byte_no) ++void TemplateTable::astore() +{ + transition(vtos, vtos); -+ assert(byte_no == f1_byte, "use this argument"); -+ -+ prepare_invoke(byte_no, xmethod, noreg, // get f1 Method* -+ x12); // get receiver also for null check -+ __ verify_oop(x12); -+ __ null_check(x12); -+ // do the call -+ __ profile_call(x10); -+ __ profile_arguments_type(x10, xmethod, xbcp, false); -+ __ jump_from_interpreted(xmethod); ++ __ pop_ptr(x10); ++ locals_index(x11); ++ __ sd(x10, aaddress(x11, t0, _masm)); +} + -+void TemplateTable::invokestatic(int byte_no) -+{ ++void TemplateTable::wide_istore() { + transition(vtos, vtos); -+ assert(byte_no == f1_byte, "use this arugment"); -+ -+ prepare_invoke(byte_no, xmethod); // get f1 Method* -+ // do the call -+ __ profile_call(x10); -+ __ profile_arguments_type(x10, xmethod, x14, false); -+ __ jump_from_interpreted(xmethod); ++ __ pop_i(); ++ locals_index_wide(x11); ++ __ sw(x10, iaddress(x11, t0, _masm)); +} + -+void TemplateTable::fast_invokevfinal(int byte_no) -+{ -+ __ call_Unimplemented(); ++void TemplateTable::wide_lstore() { ++ transition(vtos, vtos); ++ __ pop_l(); ++ locals_index_wide(x11); ++ __ sd(x10, laddress(x11, t0, _masm)); +} + -+void TemplateTable::invokeinterface(int byte_no) { ++void TemplateTable::wide_fstore() { + transition(vtos, vtos); -+ assert(byte_no == f1_byte, "use this argument"); ++ __ pop_f(); ++ locals_index_wide(x11); ++ __ fsw(f10, faddress(x11, t0, _masm)); ++} + -+ prepare_invoke(byte_no, x10, xmethod, // get f1 Klass*, f2 Method* -+ x12, x13); // recv, flags ++void TemplateTable::wide_dstore() { ++ transition(vtos, vtos); ++ __ pop_d(); ++ locals_index_wide(x11); ++ __ fsd(f10, daddress(x11, t0, _masm)); ++} + -+ // x10: interface klass (from f1) -+ // xmethod: method (from f2) -+ // x12: receiver -+ // x13: flags ++void TemplateTable::wide_astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(x10); ++ locals_index_wide(x11); ++ __ sd(x10, aaddress(x11, t0, _masm)); ++} + -+ // First check for Object case, then private interface method, -+ // then regular interface method. ++void TemplateTable::iastore() { ++ transition(itos, vtos); ++ __ pop_i(x11); ++ __ pop_ptr(x13); ++ // x10: value ++ // x11: index ++ // x13: array ++ index_check(x13, x11); // prefer index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); ++ __ shadd(t0, x11, x13, t0, 2); ++ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg); ++} + -+ // Special case of invokeinterface called for virtual method of -+ // java.lang.Object. See cpCache.cpp for details -+ Label notObjectMethod; -+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_forced_virtual_shift); -+ __ beqz(t0, notObjectMethod); ++void TemplateTable::lastore() { ++ transition(ltos, vtos); ++ __ pop_i(x11); ++ __ pop_ptr(x13); ++ // x10: value ++ // x11: index ++ // x13: array ++ index_check(x13, x11); // prefer index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); ++ __ shadd(t0, x11, x13, t0, 3); ++ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg); ++} + -+ invokevirtual_helper(xmethod, x12, x13); -+ __ bind(notObjectMethod); ++void TemplateTable::fastore() { ++ transition(ftos, vtos); ++ __ pop_i(x11); ++ __ pop_ptr(x13); ++ // f10: value ++ // x11: index ++ // x13: array ++ index_check(x13, x11); // prefer index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); ++ __ shadd(t0, x11, x13, t0, 2); ++ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg); ++} + -+ Label no_such_interface; ++void TemplateTable::dastore() { ++ transition(dtos, vtos); ++ __ pop_i(x11); ++ __ pop_ptr(x13); ++ // f10: value ++ // x11: index ++ // x13: array ++ index_check(x13, x11); // prefer index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); ++ __ shadd(t0, x11, x13, t0, 3); ++ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg); ++} + -+ // Check for private method invocation - indicated by vfinal -+ Label notVFinal; -+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_vfinal_shift); -+ __ beqz(t0, notVFinal); ++void TemplateTable::aastore() { ++ Label is_null, ok_is_subtype, done; ++ transition(vtos, vtos); ++ // stack: ..., array, index, value ++ __ ld(x10, at_tos()); // value ++ __ ld(x12, at_tos_p1()); // index ++ __ ld(x13, at_tos_p2()); // array + -+ // Check receiver klass into x13 - also a null check -+ __ null_check(x12, oopDesc::klass_offset_in_bytes()); -+ __ load_klass(x13, x12); ++ index_check(x13, x12); // kills x11 ++ __ add(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); ++ __ shadd(x14, x14, x13, x14, LogBytesPerHeapOop); + -+ Label subtype; -+ __ check_klass_subtype(x13, x10, x14, subtype); -+ // If we get here the typecheck failed -+ __ j(no_such_interface); -+ __ bind(subtype); ++ Address element_address(x14, 0); + -+ __ profile_final_call(x10); -+ __ profile_arguments_type(x10, xmethod, x14, true); -+ __ jump_from_interpreted(xmethod); ++ // do array store check - check for NULL value first ++ __ beqz(x10, is_null); + -+ __ bind(notVFinal); ++ // Move subklass into x11 ++ __ load_klass(x11, x10); ++ // Move superklass into x10 ++ __ load_klass(x10, x13); ++ __ ld(x10, Address(x10, ++ ObjArrayKlass::element_klass_offset())); ++ // Compress array + index * oopSize + 12 into a single register. Frees x12. + -+ // Get receiver klass into x13 - also a null check -+ __ restore_locals(); -+ __ null_check(x12, oopDesc::klass_offset_in_bytes()); -+ __ load_klass(x13, x12); ++ // Generate subtype check. Blows x12, x15 ++ // Superklass in x10. Subklass in x11. ++ __ gen_subtype_check(x11, ok_is_subtype); //todo + -+ Label no_such_method; ++ // Come here on failure ++ // object is at TOS ++ __ j(Interpreter::_throw_ArrayStoreException_entry); + -+ // Preserve method for the throw_AbstractMethodErrorVerbose. -+ __ mv(x28, xmethod); -+ // Receiver subtype check against REFC. -+ // Superklass in x10. Subklass in x13. Blows t1, x30 -+ __ lookup_interface_method(// inputs: rec. class, interface, itable index -+ x13, x10, noreg, -+ // outputs: scan temp. reg, scan temp. reg -+ t1, x30, -+ no_such_interface, -+ /*return_method=*/false); ++ // Come here on success ++ __ bind(ok_is_subtype); + -+ // profile this call -+ __ profile_virtual_call(x13, x30, x9); ++ // Get the value we will store ++ __ ld(x10, at_tos()); ++ // Now store using the appropriate barrier ++ do_oop_store(_masm, element_address, x10, IS_ARRAY); ++ __ j(done); + -+ // Get declaring interface class from method, and itable index -+ __ ld(x10, Address(xmethod, Method::const_offset())); -+ __ ld(x10, Address(x10, ConstMethod::constants_offset())); -+ __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes())); -+ __ lwu(xmethod, Address(xmethod, Method::itable_index_offset())); -+ __ subw(xmethod, xmethod, Method::itable_index_max); -+ __ negw(xmethod, xmethod); ++ // Have a NULL in x10, x13=array, x12=index. Store NULL at ary[idx] ++ __ bind(is_null); ++ __ profile_null_seen(x12); + -+ // Preserve recvKlass for throw_AbstractMethodErrorVerbose -+ __ mv(xlocals, x13); -+ __ lookup_interface_method(// inputs: rec. class, interface, itable index -+ xlocals, x10, xmethod, -+ // outputs: method, scan temp. reg -+ xmethod, x30, -+ no_such_interface); ++ // Store a NULL ++ do_oop_store(_masm, element_address, noreg, IS_ARRAY); + -+ // xmethod: methodOop to call -+ // x12: receiver -+ // Check for abstract method error -+ // Note: This should be done more efficiently via a throw_abstract_method_error -+ // interpreter entry point and a conditional jump to it in case of a null -+ // method. -+ __ beqz(xmethod, no_such_method); ++ // Pop stack arguments ++ __ bind(done); ++ __ add(esp, esp, 3 * Interpreter::stackElementSize); + -+ __ profile_arguments_type(x13, xmethod, x30, true); ++} + -+ // do the call -+ // x12: receiver -+ // xmethod,: methodOop -+ __ jump_from_interpreted(xmethod); -+ __ should_not_reach_here(); ++void TemplateTable::bastore() ++{ ++ transition(itos, vtos); ++ __ pop_i(x11); ++ __ pop_ptr(x13); ++ // x10: value ++ // x11: index ++ // x13: array ++ index_check(x13, x11); // prefer index in x11 + -+ // exception handling code follows ... -+ // note: must restore interpreter registers to canonical -+ // state for exception handling to work correctly! ++ // Need to check whether array is boolean or byte ++ // since both types share the bastore bytecode. ++ __ load_klass(x12, x13); ++ __ lwu(x12, Address(x12, Klass::layout_helper_offset())); ++ Label L_skip; ++ __ andi(t0, x12, Klass::layout_helper_boolean_diffbit()); ++ __ beqz(t0, L_skip); ++ __ andi(x10, x10, 1); // if it is a T_BOOLEAN array, mask the stored value to 0/1 ++ __ bind(L_skip); + -+ __ bind(no_such_method); -+ // throw exception -+ __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) -+ __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) -+ // Pass arguments for generating a verbose error message. -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), x13, x28); -+ // the call_VM checks for exception, so we should never return here. -+ __ should_not_reach_here(); ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); + -+ __ bind(no_such_interface); -+ // throw exceptiong -+ __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) -+ __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) -+ // Pass arguments for generating a verbose error message. -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), x13, x10); -+ // the call_VM checks for exception, so we should never return here. -+ __ should_not_reach_here(); -+ return; ++ __ add(x11, x13, x11); ++ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg); +} + -+void TemplateTable::invokehandle(int byte_no) { -+ transition(vtos, vtos); -+ assert(byte_no == f1_byte, "use this argument"); ++void TemplateTable::castore() ++{ ++ transition(itos, vtos); ++ __ pop_i(x11); ++ __ pop_ptr(x13); ++ // x10: value ++ // x11: index ++ // x13: array ++ index_check(x13, x11); // prefer index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); ++ __ shadd(t0, x11, x13, t0, 1); ++ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg); ++} + -+ prepare_invoke(byte_no, xmethod, x10, x12); -+ __ verify_method_ptr(x12); -+ __ verify_oop(x12); -+ __ null_check(x12); ++void TemplateTable::sastore() ++{ ++ castore(); ++} + -+ // FIXME: profile the LambdaForm also ++void TemplateTable::istore(int n) ++{ ++ transition(itos, vtos); ++ __ sd(x10, iaddress(n)); ++} + -+ // x30 is safe to use here as a temp reg because it is about to -+ // be clobbered by jump_from_interpreted(). -+ __ profile_final_call(x30); -+ __ profile_arguments_type(x30, xmethod, x14, true); ++void TemplateTable::lstore(int n) ++{ ++ transition(ltos, vtos); ++ __ sd(x10, laddress(n)); ++} + -+ __ jump_from_interpreted(xmethod); ++void TemplateTable::fstore(int n) ++{ ++ transition(ftos, vtos); ++ __ fsw(f10, faddress(n)); +} + -+void TemplateTable::invokedynamic(int byte_no) { -+ transition(vtos, vtos); -+ assert(byte_no == f1_byte, "use this argument"); ++void TemplateTable::dstore(int n) ++{ ++ transition(dtos, vtos); ++ __ fsd(f10, daddress(n)); ++} + -+ prepare_invoke(byte_no, xmethod, x10); ++void TemplateTable::astore(int n) ++{ ++ transition(vtos, vtos); ++ __ pop_ptr(x10); ++ __ sd(x10, iaddress(n)); ++} + -+ // x10: CallSite object (from cpool->resolved_references[]) -+ // xmethod: MH.linkToCallSite method (from f2) ++void TemplateTable::pop() ++{ ++ transition(vtos, vtos); ++ __ addi(esp, esp, Interpreter::stackElementSize); ++} + -+ // Note: x10_callsite is already pushed by prepare_invoke ++void TemplateTable::pop2() ++{ ++ transition(vtos, vtos); ++ __ addi(esp, esp, 2 * Interpreter::stackElementSize); ++} + -+ // %%% should make a type profile for any invokedynamic that takes a ref argument -+ // profile this call -+ __ profile_call(xbcp); -+ __ profile_arguments_type(x13, xmethod, x30, false); ++void TemplateTable::dup() ++{ ++ transition(vtos, vtos); ++ __ ld(x10, Address(esp, 0)); ++ __ push_reg(x10); ++ // stack: ..., a, a ++} + -+ __ verify_oop(x10); ++void TemplateTable::dup_x1() ++{ ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ ld(x10, at_tos()); // load b ++ __ ld(x12, at_tos_p1()); // load a ++ __ sd(x10, at_tos_p1()); // store b ++ __ sd(x12, at_tos()); // store a ++ __ push_reg(x10); // push b ++ // stack: ..., b, a, b ++} + -+ __ jump_from_interpreted(xmethod); ++void TemplateTable::dup_x2() ++{ ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ ld(x10, at_tos()); // load c ++ __ ld(x12, at_tos_p2()); // load a ++ __ sd(x10, at_tos_p2()); // store c in a ++ __ push_reg(x10); // push c ++ // stack: ..., c, b, c, c ++ __ ld(x10, at_tos_p2()); // load b ++ __ sd(x12, at_tos_p2()); // store a in b ++ // stack: ..., c, a, c, c ++ __ sd(x10, at_tos_p1()); // store b in c ++ // stack: ..., c, a, b, c +} + -+//----------------------------------------------------------------------------- -+// Allocation ++void TemplateTable::dup2() ++{ ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ ld(x10, at_tos_p1()); // load a ++ __ push_reg(x10); // push a ++ __ ld(x10, at_tos_p1()); // load b ++ __ push_reg(x10); // push b ++ // stack: ..., a, b, a, b ++} + -+void TemplateTable::_new() { -+ transition(vtos, atos); ++void TemplateTable::dup2_x1() ++{ ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ ld(x12, at_tos()); // load c ++ __ ld(x10, at_tos_p1()); // load b ++ __ push_reg(x10); // push b ++ __ push_reg(x12); // push c ++ // stack: ..., a, b, c, b, c ++ __ sd(x12, at_tos_p3()); // store c in b ++ // stack: ..., a, c, c, b, c ++ __ ld(x12, at_tos_p4()); // load a ++ __ sd(x12, at_tos_p2()); // store a in 2nd c ++ // stack: ..., a, c, a, b, c ++ __ sd(x10, at_tos_p4()); // store b in a ++ // stack: ..., b, c, a, b, c ++} + -+ __ get_unsigned_2_byte_index_at_bcp(x13, 1); -+ Label slow_case; -+ Label done; -+ Label initialize_header; -+ Label initialize_object; // including clearing the fields ++void TemplateTable::dup2_x2() ++{ ++ transition(vtos, vtos); ++ // stack: ..., a, b, c, d ++ __ ld(x12, at_tos()); // load d ++ __ ld(x10, at_tos_p1()); // load c ++ __ push_reg(x10); // push c ++ __ push_reg(x12); // push d ++ // stack: ..., a, b, c, d, c, d ++ __ ld(x10, at_tos_p4()); // load b ++ __ sd(x10, at_tos_p2()); // store b in d ++ __ sd(x12, at_tos_p4()); // store d in b ++ // stack: ..., a, d, c, b, c, d ++ __ ld(x12, at_tos_p5()); // load a ++ __ ld(x10, at_tos_p3()); // load c ++ __ sd(x12, at_tos_p3()); // store a in c ++ __ sd(x10, at_tos_p5()); // store c in a ++ // stack: ..., c, d, a, b, c, d ++} + -+ __ get_cpool_and_tags(x14, x10); -+ // Make sure the class we're about to instantiate has been resolved. -+ // This is done before loading InstanceKlass to be consistent with the order -+ // how Constant Pool is update (see ConstantPool::klass_at_put) -+ const int tags_offset = Array::base_offset_in_bytes(); -+ __ add(t0, x10, x13); -+ __ la(t0, Address(t0, tags_offset)); -+ __ membar(MacroAssembler::AnyAny); -+ __ lbu(t0, t0); -+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -+ __ sub(t1, t0, (u1)JVM_CONSTANT_Class); -+ __ bnez(t1, slow_case); ++void TemplateTable::swap() ++{ ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ ld(x12, at_tos_p1()); // load a ++ __ ld(x10, at_tos()); // load b ++ __ sd(x12, at_tos()); // store a in b ++ __ sd(x10, at_tos_p1()); // store b in a ++ // stack: ..., b, a ++} + -+ // get InstanceKlass -+ __ load_resolved_klass_at_offset(x14, x13, x14, t0); ++void TemplateTable::iop2(Operation op) ++{ ++ transition(itos, itos); ++ // x10 <== x11 op x10 ++ __ pop_i(x11); ++ switch (op) { ++ case add : __ addw(x10, x11, x10); break; ++ case sub : __ subw(x10, x11, x10); break; ++ case mul : __ mulw(x10, x11, x10); break; ++ case _and : __ andrw(x10, x11, x10); break; ++ case _or : __ orrw(x10, x11, x10); break; ++ case _xor : __ xorrw(x10, x11, x10); break; ++ case shl : __ sllw(x10, x11, x10); break; ++ case shr : __ sraw(x10, x11, x10); break; ++ case ushr : __ srlw(x10, x11, x10); break; ++ default : ShouldNotReachHere(); ++ } ++} + -+ // make sure klass is initialized & doesn't have finalizer -+ // make sure klass is fully initialized -+ __ lbu(t0, Address(x14, InstanceKlass::init_state_offset())); -+ __ sub(t1, t0, (u1)InstanceKlass::fully_initialized); -+ __ bnez(t1, slow_case); ++void TemplateTable::lop2(Operation op) ++{ ++ transition(ltos, ltos); ++ // x10 <== x11 op x10 ++ __ pop_l(x11); ++ switch (op) { ++ case add : __ add(x10, x11, x10); break; ++ case sub : __ sub(x10, x11, x10); break; ++ case mul : __ mul(x10, x11, x10); break; ++ case _and : __ andr(x10, x11, x10); break; ++ case _or : __ orr(x10, x11, x10); break; ++ case _xor : __ xorr(x10, x11, x10); break; ++ default : ShouldNotReachHere(); ++ } ++} + -+ // get instance_size in InstanceKlass (scaled to a count of bytes) -+ __ lwu(x13, Address(x14, Klass::layout_helper_offset())); -+ // test to see if it has a finalizer or is malformed in some way -+ __ andi(t0, x13, Klass::_lh_instance_slow_path_bit); -+ __ bnez(t0, slow_case); ++void TemplateTable::idiv() ++{ ++ transition(itos, itos); ++ // explicitly check for div0 ++ Label no_div0; ++ __ bnez(x10, no_div0); ++ __ mv(t0, Interpreter::_throw_ArithmeticException_entry); ++ __ jr(t0); ++ __ bind(no_div0); ++ __ pop_i(x11); ++ // x10 <== x11 idiv x10 ++ __ corrected_idivl(x10, x11, x10, /* want_remainder */ false); ++} + -+ // Allocate the instance: -+ // If TLAB is enabled: -+ // Try to allocate in the TLAB. -+ // If fails, go to the slow path. -+ // Else If inline contiguous allocations are enabled: -+ // Try to allocate in eden. -+ // If fails due to heap end, go to slow path -+ // -+ // If TLAB is enabled OR inline contiguous is enabled: -+ // Initialize the allocation. -+ // Exit. -+ // Go to slow path. -+ const bool allow_shared_alloc = Universe::heap()->supports_inline_contig_alloc(); ++void TemplateTable::irem() ++{ ++ transition(itos, itos); ++ // explicitly check for div0 ++ Label no_div0; ++ __ bnez(x10, no_div0); ++ __ mv(t0, Interpreter::_throw_ArithmeticException_entry); ++ __ jr(t0); ++ __ bind(no_div0); ++ __ pop_i(x11); ++ // x10 <== x11 irem x10 ++ __ corrected_idivl(x10, x11, x10, /* want_remainder */ true); ++} + -+ if (UseTLAB) { -+ __ tlab_allocate(x10, x13, 0, noreg, x11, slow_case); ++void TemplateTable::lmul() ++{ ++ transition(ltos, ltos); ++ __ pop_l(x11); ++ __ mul(x10, x10, x11); ++} + -+ if (ZeroTLAB) { -+ // the fields have been already cleared -+ __ j(initialize_header); -+ } else { -+ // initialize both the header and fields -+ __ j(initialize_object); -+ } -+ } else { -+ // Allocation in the shared Eden, if allowed. -+ // -+ // x13: instance size in bytes -+ if (allow_shared_alloc) { -+ __ eden_allocate(x10, x13, 0, x28, slow_case); -+ } -+ } -+ -+ // If USETLAB or allow_shared_alloc are true, the object is created above and -+ // there is an initialized need. Otherwise, skip and go to the slow path. -+ if (UseTLAB || allow_shared_alloc) { -+ // The object is initialized before the header. If the object size is -+ // zero, go directly to the header initialization. -+ __ bind(initialize_object); -+ __ sub(x13, x13, sizeof(oopDesc)); -+ __ beqz(x13, initialize_header); -+ -+ // Initialize obejct fields -+ { -+ __ add(x12, x10, sizeof(oopDesc)); -+ Label loop; -+ __ bind(loop); -+ __ sd(zr, Address(x12)); -+ __ add(x12, x12, BytesPerLong); -+ __ sub(x13, x13, BytesPerLong); -+ __ bnez(x13, loop); -+ } ++void TemplateTable::ldiv() ++{ ++ transition(ltos, ltos); ++ // explicitly check for div0 ++ Label no_div0; ++ __ bnez(x10, no_div0); ++ __ mv(t0, Interpreter::_throw_ArithmeticException_entry); ++ __ jr(t0); ++ __ bind(no_div0); ++ __ pop_l(x11); ++ // x10 <== x11 ldiv x10 ++ __ corrected_idivq(x10, x11, x10, /* want_remainder */ false); ++} + -+ // initialize object hader only. -+ __ bind(initialize_header); -+ if (UseBiasedLocking) { -+ __ ld(t0, Address(x14, Klass::prototype_header_offset())); -+ } else { -+ __ mv(t0, (intptr_t)markOopDesc::prototype()); -+ } -+ __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); -+ __ store_klass_gap(x10, zr); // zero klass gap for compressed oops -+ __ store_klass(x10, x14); // store klass last ++void TemplateTable::lrem() ++{ ++ transition(ltos, ltos); ++ // explicitly check for div0 ++ Label no_div0; ++ __ bnez(x10, no_div0); ++ __ mv(t0, Interpreter::_throw_ArithmeticException_entry); ++ __ jr(t0); ++ __ bind(no_div0); ++ __ pop_l(x11); ++ // x10 <== x11 lrem x10 ++ __ corrected_idivq(x10, x11, x10, /* want_remainder */ true); ++} + -+ { -+ SkipIfEqual skip(_masm, &DTraceAllocProbes, false); -+ // Trigger dtrace event for fastpath -+ __ push(atos); // save the return value -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10); -+ __ pop(atos); // restore the return value -+ } -+ __ j(done); -+ } ++void TemplateTable::lshl() ++{ ++ transition(itos, ltos); ++ // shift count is in x10 ++ __ pop_l(x11); ++ __ sll(x10, x11, x10); ++} + -+ // slow case -+ __ bind(slow_case); -+ __ get_constant_pool(c_rarg1); -+ __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); -+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2); -+ __ verify_oop(x10); ++void TemplateTable::lshr() ++{ ++ transition(itos, ltos); ++ // shift count is in x10 ++ __ pop_l(x11); ++ __ sra(x10, x11, x10); ++} + -+ // continue -+ __ bind(done); -+ // Must prevent reordering of stores for object initialization with stores that publish the new object. -+ __ membar(MacroAssembler::StoreStore); ++void TemplateTable::lushr() ++{ ++ transition(itos, ltos); ++ // shift count is in x10 ++ __ pop_l(x11); ++ __ srl(x10, x11, x10); +} + -+void TemplateTable::newarray() { -+ transition(itos, atos); -+ __ load_unsigned_byte(c_rarg1, at_bcp(1)); -+ __ mv(c_rarg2, x10); -+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), -+ c_rarg1, c_rarg2); -+ // Must prevent reordering of stores for object initialization with stores that publish the new object. -+ __ membar(MacroAssembler::StoreStore); ++void TemplateTable::fop2(Operation op) ++{ ++ transition(ftos, ftos); ++ switch (op) { ++ case add: ++ __ pop_f(f11); ++ __ fadd_s(f10, f11, f10); ++ break; ++ case sub: ++ __ pop_f(f11); ++ __ fsub_s(f10, f11, f10); ++ break; ++ case mul: ++ __ pop_f(f11); ++ __ fmul_s(f10, f11, f10); ++ break; ++ case div: ++ __ pop_f(f11); ++ __ fdiv_s(f10, f11, f10); ++ break; ++ case rem: ++ __ fmv_s(f11, f10); ++ __ pop_f(f10); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } +} + -+void TemplateTable::anewarray() { -+ transition(itos, atos); -+ __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); -+ __ get_constant_pool(c_rarg1); -+ __ mv(c_rarg3, x10); -+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), -+ c_rarg1, c_rarg2, c_rarg3); -+ // Must prevent reordering of stores for object initialization with stores that publish the new object. -+ __ membar(MacroAssembler::StoreStore); ++void TemplateTable::dop2(Operation op) ++{ ++ transition(dtos, dtos); ++ switch (op) { ++ case add: ++ __ pop_d(f11); ++ __ fadd_d(f10, f11, f10); ++ break; ++ case sub: ++ __ pop_d(f11); ++ __ fsub_d(f10, f11, f10); ++ break; ++ case mul: ++ __ pop_d(f11); ++ __ fmul_d(f10, f11, f10); ++ break; ++ case div: ++ __ pop_d(f11); ++ __ fdiv_d(f10, f11, f10); ++ break; ++ case rem: ++ __ fmv_d(f11, f10); ++ __ pop_d(f10); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } +} + -+void TemplateTable::arraylength() { -+ transition(atos, itos); -+ __ null_check(x10, arrayOopDesc::length_offset_in_bytes()); -+ __ lwu(x10, Address(x10, arrayOopDesc::length_offset_in_bytes())); ++void TemplateTable::ineg() ++{ ++ transition(itos, itos); ++ __ negw(x10, x10); +} + -+void TemplateTable::checkcast() ++void TemplateTable::lneg() +{ -+ transition(atos, atos); -+ Label done, is_null, ok_is_subtype, quicked, resolved; -+ __ beqz(x10, is_null); ++ transition(ltos, ltos); ++ __ neg(x10, x10); ++} + -+ // Get cpool & tags index -+ __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array -+ __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index -+ // See if bytecode has already been quicked -+ __ add(t0, x13, Array::base_offset_in_bytes()); -+ __ add(x11, t0, x9); -+ __ membar(MacroAssembler::AnyAny); -+ __ lbu(x11, x11); -+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -+ __ sub(t0, x11, (u1)JVM_CONSTANT_Class); -+ __ beqz(t0, quicked); ++void TemplateTable::fneg() ++{ ++ transition(ftos, ftos); ++ __ fneg_s(f10, f10); ++} + -+ __ push(atos); // save receiver for result, and for GC -+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); -+ // vm_result_2 has metadata result -+ __ get_vm_result_2(x10, xthread); -+ __ pop_reg(x13); // restore receiver -+ __ j(resolved); ++void TemplateTable::dneg() ++{ ++ transition(dtos, dtos); ++ __ fneg_d(f10, f10); ++} + -+ // Get superklass in x10 and subklass in x13 -+ __ bind(quicked); -+ __ mv(x13, x10); // Save object in x13; x10 needed for subtype check -+ __ load_resolved_klass_at_offset(x12, x9, x10, t0); // x10 = klass ++void TemplateTable::iinc() ++{ ++ transition(vtos, vtos); ++ __ load_signed_byte(x11, at_bcp(2)); // get constant ++ locals_index(x12); ++ __ ld(x10, iaddress(x12, x10, _masm)); ++ __ addw(x10, x10, x11); ++ __ sd(x10, iaddress(x12, t0, _masm)); ++} + -+ __ bind(resolved); -+ __ load_klass(x9, x13); ++void TemplateTable::wide_iinc() ++{ ++ transition(vtos, vtos); ++ __ lwu(x11, at_bcp(2)); // get constant and index ++ __ revb_h_w_u(x11, x11); // reverse bytes in half-word (32bit) and zero-extend ++ __ zero_extend(x12, x11, 16); ++ __ neg(x12, x12); ++ __ slli(x11, x11, 32); ++ __ srai(x11, x11, 48); ++ __ ld(x10, iaddress(x12, t0, _masm)); ++ __ addw(x10, x10, x11); ++ __ sd(x10, iaddress(x12, t0, _masm)); ++} + -+ // Generate subtype check. Blows x12, x15. Object in x13. -+ // Superklass in x10. Subklass in x9. -+ __ gen_subtype_check(x9, ok_is_subtype); ++void TemplateTable::convert() ++{ ++ // Checking ++#ifdef ASSERT ++ { ++ TosState tos_in = ilgl; ++ TosState tos_out = ilgl; ++ switch (bytecode()) { ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_in = itos; break; ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_l2d: tos_in = ltos; break; ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_f2d: tos_in = ftos; break; ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_d2l: // fall through ++ case Bytecodes::_d2f: tos_in = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ switch (bytecode()) { ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_out = itos; break; ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_d2l: tos_out = ltos; break; ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_d2f: tos_out = ftos; break; ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_l2d: // fall through ++ case Bytecodes::_f2d: tos_out = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ transition(tos_in, tos_out); ++ } ++#endif // ASSERT + -+ // Come here on failure -+ __ push_reg(x13); -+ // object is at TOS -+ __ j(Interpreter::_throw_ClassCastException_entry); ++ // Conversion ++ switch (bytecode()) { ++ case Bytecodes::_i2l: ++ __ sign_extend(x10, x10, 32); ++ break; ++ case Bytecodes::_i2f: ++ __ fcvt_s_w(f10, x10); ++ break; ++ case Bytecodes::_i2d: ++ __ fcvt_d_w(f10, x10); ++ break; ++ case Bytecodes::_i2b: ++ __ sign_extend(x10, x10, 8); ++ break; ++ case Bytecodes::_i2c: ++ __ zero_extend(x10, x10, 16); ++ break; ++ case Bytecodes::_i2s: ++ __ sign_extend(x10, x10, 16); ++ break; ++ case Bytecodes::_l2i: ++ __ addw(x10, x10, zr); ++ break; ++ case Bytecodes::_l2f: ++ __ fcvt_s_l(f10, x10); ++ break; ++ case Bytecodes::_l2d: ++ __ fcvt_d_l(f10, x10); ++ break; ++ case Bytecodes::_f2i: ++ __ fcvt_w_s_safe(x10, f10); ++ break; ++ case Bytecodes::_f2l: ++ __ fcvt_l_s_safe(x10, f10); ++ break; ++ case Bytecodes::_f2d: ++ __ fcvt_d_s(f10, f10); ++ break; ++ case Bytecodes::_d2i: ++ __ fcvt_w_d_safe(x10, f10); ++ break; ++ case Bytecodes::_d2l: ++ __ fcvt_l_d_safe(x10, f10); ++ break; ++ case Bytecodes::_d2f: ++ __ fcvt_s_d(f10, f10); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} + -+ // Come here on success -+ __ bind(ok_is_subtype); -+ __ mv(x10, x13); // Restore object in x13 ++void TemplateTable::lcmp() ++{ ++ transition(ltos, itos); ++ __ pop_l(x11); ++ __ cmp_l2i(t0, x11, x10); ++ __ mv(x10, t0); ++} + -+ // Collect counts on whether this test sees NULLs a lot or not. -+ if (ProfileInterpreter) { -+ __ j(done); -+ __ bind(is_null); -+ __ profile_null_seen(x12); ++void TemplateTable::float_cmp(bool is_float, int unordered_result) ++{ ++ // For instruction feq, flt and fle, the result is 0 if either operand is NaN ++ if (is_float) { ++ __ pop_f(f11); ++ // if unordered_result < 0: ++ // we want -1 for unordered or less than, 0 for equal and 1 for ++ // greater than. ++ // else: ++ // we want -1 for less than, 0 for equal and 1 for unordered or ++ // greater than. ++ // f11 primary, f10 secondary ++ __ float_compare(x10, f11, f10, unordered_result); + } else { -+ __ bind(is_null); // same as 'done' ++ __ pop_d(f11); ++ // if unordered_result < 0: ++ // we want -1 for unordered or less than, 0 for equal and 1 for ++ // greater than. ++ // else: ++ // we want -1 for less than, 0 for equal and 1 for unordered or ++ // greater than. ++ // f11 primary, f10 secondary ++ __ double_compare(x10, f11, f10, unordered_result); + } -+ __ bind(done); +} + -+void TemplateTable::instanceof() { -+ transition(atos, itos); -+ Label done, is_null, ok_is_subtype, quicked, resolved; -+ __ beqz(x10, is_null); -+ -+ // Get cpool & tags index -+ __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array -+ __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index -+ // See if bytecode has already been quicked -+ __ add(t0, x13, Array::base_offset_in_bytes()); -+ __ add(x11, t0, x9); -+ __ membar(MacroAssembler::AnyAny); -+ __ lbu(x11, x11); -+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -+ __ sub(t0, x11, (u1)JVM_CONSTANT_Class); -+ __ beqz(t0, quicked); -+ -+ __ push(atos); // save receiver for result, and for GC -+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); -+ // vm_result_2 has metadata result -+ __ get_vm_result_2(x10, xthread); -+ __ pop_reg(x13); // restore receiver -+ __ verify_oop(x13); -+ __ load_klass(x13, x13); -+ __ j(resolved); -+ -+ // Get superklass in x10 and subklass in x13 -+ __ bind(quicked); -+ __ load_klass(x13, x10); -+ __ load_resolved_klass_at_offset(x12, x9, x10, t0); -+ -+ __ bind(resolved); -+ -+ // Generate subtype check. Blows x12, x15 -+ // Superklass in x10. Subklass in x13. -+ __ gen_subtype_check(x13, ok_is_subtype); ++void TemplateTable::branch(bool is_jsr, bool is_wide) ++{ ++ // We might be moving to a safepoint. The thread which calls ++ // Interpreter::notice_safepoints() will effectively flush its cache ++ // when it makes a system call, but we need to do something to ++ // ensure that we see the changed dispatch table. ++ __ membar(MacroAssembler::LoadLoad); + -+ // Come here on failure -+ __ mv(x10, zr); -+ __ j(done); -+ // Come here on success -+ __ bind(ok_is_subtype); -+ __ mv(x10, 1); ++ __ profile_taken_branch(x10, x11); ++ const ByteSize be_offset = MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset(); ++ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset(); + -+ // Collect counts on whether this test sees NULLs a lot or not. -+ if (ProfileInterpreter) { -+ __ j(done); -+ __ bind(is_null); -+ __ profile_null_seen(x12); ++ // load branch displacement ++ if (!is_wide) { ++ __ lhu(x12, at_bcp(1)); ++ __ revb_h_h(x12, x12); // reverse bytes in half-word and sign-extend + } else { -+ __ bind(is_null); // same as 'done' ++ __ lwu(x12, at_bcp(1)); ++ __ revb_w_w(x12, x12); // reverse bytes in word and sign-extend + } -+ __ bind(done); -+ // x10 = 0: obj == NULL or obj is not an instanceof the specified klass -+ // x10 = 1: obj != NULL and obj is an instanceof the specified klass -+} + -+//----------------------------------------------------------------------------- -+// Breakpoints -+void TemplateTable::_breakpoint() { -+ // Note: We get here even if we are single stepping.. -+ // jbug inists on setting breakpoints at every bytecode -+ // even if we are in single step mode. ++ // Handle all the JSR stuff here, then exit. ++ // It's much shorter and cleaner than intermingling with the non-JSR ++ // normal-branch stuff occurring below. + -+ transition(vtos, vtos); ++ if (is_jsr) { ++ // compute return address as bci ++ __ ld(t1, Address(xmethod, Method::const_offset())); ++ __ add(t1, t1, ++ in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3)); ++ __ sub(x11, xbcp, t1); ++ __ push_i(x11); ++ // Adjust the bcp by the 16-bit displacement in x12 ++ __ add(xbcp, xbcp, x12); ++ __ load_unsigned_byte(t0, Address(xbcp, 0)); ++ // load the next target bytecode into t0, it is the argument of dispatch_only ++ __ dispatch_only(vtos, /*generate_poll*/true); ++ return; ++ } + -+ // get the unpatched byte code -+ __ get_method(c_rarg1); -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::get_original_bytecode_at), -+ c_rarg1, xbcp); -+ __ mv(x9, x10); ++ // Normal (non-jsr) branch handling + -+ // post the breakpoint event -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), -+ xmethod, xbcp); ++ // Adjust the bcp by the displacement in x12 ++ __ add(xbcp, xbcp, x12); + -+ // complete the execution of original bytecode -+ __ mv(t0, x9); -+ __ dispatch_only_normal(vtos); -+} ++ assert(UseLoopCounter || !UseOnStackReplacement, ++ "on-stack-replacement requires loop counters"); ++ Label backedge_counter_overflow; ++ Label dispatch; ++ if (UseLoopCounter) { ++ // increment backedge counter for backward branches ++ // x10: MDO ++ // x11: MDO bumped taken-count ++ // x12: target offset ++ __ bgtz(x12, dispatch); // count only if backward branch + -+//----------------------------------------------------------------------------- -+// Exceptions -+ -+void TemplateTable::athrow() { -+ transition(atos, vtos); -+ __ null_check(x10); -+ __ j(Interpreter::throw_exception_entry()); -+} ++ // check if MethodCounters exists ++ Label has_counters; ++ __ ld(t0, Address(xmethod, Method::method_counters_offset())); ++ __ bnez(t0, has_counters); ++ __ push_reg(x10); ++ __ push_reg(x11); ++ __ push_reg(x12); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::build_method_counters), xmethod); ++ __ pop_reg(x12); ++ __ pop_reg(x11); ++ __ pop_reg(x10); ++ __ ld(t0, Address(xmethod, Method::method_counters_offset())); ++ __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory ++ __ bind(has_counters); + -+//----------------------------------------------------------------------------- -+// Synchronization -+// -+// Note: monitorenter & exit are symmetric routines; which is reflected -+// in the assembly code structure as well -+// -+// Stack layout: -+// -+// [expressions ] <--- esp = expression stack top -+// .. -+// [expressions ] -+// [monitor entry] <--- monitor block top = expression stack bot -+// .. -+// [monitor entry] -+// [frame data ] <--- monitor block bot -+// ... -+// [saved fp ] <--- fp -+void TemplateTable::monitorenter() -+{ -+ transition(atos, vtos); ++ Label no_mdo; ++ int increment = InvocationCounter::count_increment; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); ++ __ beqz(x11, no_mdo); ++ // Increment the MDO backedge counter ++ const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(x11, in_bytes(MethodData::backedge_mask_offset())); ++ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, ++ x10, t0, false, ++ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); ++ __ j(dispatch); ++ } ++ __ bind(no_mdo); ++ // Increment backedge counter in MethodCounters* ++ __ ld(t0, Address(xmethod, Method::method_counters_offset())); ++ const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset())); ++ __ increment_mask_and_jump(Address(t0, be_offset), increment, mask, ++ x10, t1, false, ++ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); ++ __ bind(dispatch); ++ } + -+ // check for NULL object -+ __ null_check(x10); ++ // Pre-load the next target bytecode into t0 ++ __ load_unsigned_byte(t0, Address(xbcp, 0)); + -+ const Address monitor_block_top( -+ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); -+ const Address monitor_block_bot( -+ fp, frame::interpreter_frame_initial_sp_offset * wordSize); -+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ // continue with the bytecode @ target ++ // t0: target bytecode ++ // xbcp: target bcp ++ __ dispatch_only(vtos, /*generate_poll*/true); + -+ Label allocated; ++ if (UseLoopCounter && UseOnStackReplacement) { ++ // invocation counter overflow ++ __ bind(backedge_counter_overflow); ++ __ neg(x12, x12); ++ __ add(x12, x12, xbcp); // branch xbcp ++ // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), ++ x12); ++ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode + -+ // initialize entry pointer -+ __ mv(c_rarg1, zr); // points to free slot or NULL ++ // x10: osr nmethod (osr ok) or NULL (osr not possible) ++ // w11: target bytecode ++ // x12: temporary ++ __ beqz(x10, dispatch); // test result -- no osr if null ++ // nmethod may have been invalidated (VM may block upon call_VM return) ++ __ lbu(x12, Address(x10, nmethod::state_offset())); ++ if (nmethod::in_use != 0) { ++ __ sub(x12, x12, nmethod::in_use); ++ } ++ __ bnez(x12, dispatch); + -+ // find a free slot in the monitor block (result in c_rarg1) -+ { -+ Label entry, loop, exit, notUsed; -+ __ ld(c_rarg3, monitor_block_top); // points to current entry, -+ // starting with top-most entry -+ __ la(c_rarg2, monitor_block_bot); // points to word before bottom ++ // We have the address of an on stack replacement routine in x10 ++ // We need to prepare to execute the OSR method. First we must ++ // migrate the locals and monitors off of the stack. + -+ __ j(entry); ++ __ mv(x9, x10); // save the nmethod + -+ __ bind(loop); -+ // check if current entry is used -+ // if not used then remember entry in c_rarg1 -+ __ ld(t0, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes())); -+ __ bnez(t0, notUsed); -+ __ mv(c_rarg1, c_rarg3); -+ __ bind(notUsed); -+ // check if current entry is for same object -+ // if same object then stop searching -+ __ beq(x10, t0, exit); -+ // otherwise advance to next entry -+ __ add(c_rarg3, c_rarg3, entry_size); -+ __ bind(entry); -+ // check if bottom reached -+ // if not at bottom then check this entry -+ __ bne(c_rarg3, c_rarg2, loop); -+ __ bind(exit); -+ } ++ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); + -+ __ bnez(c_rarg1, allocated); // check if a slot has been found and -+ // if found, continue with that on ++ // x10 is OSR buffer, move it to expected parameter location ++ __ mv(j_rarg0, x10); + -+ // allocate one if there's no free slot -+ { -+ Label entry, loop; -+ // 1. compute new pointers // esp: old expression stack top -+ __ ld(c_rarg1, monitor_block_bot); // c_rarg1: old expression stack bottom -+ __ sub(esp, esp, entry_size); // move expression stack top -+ __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom -+ __ mv(c_rarg3, esp); // set start value for copy loop -+ __ sd(c_rarg1, monitor_block_bot); // set new monitor block bottom -+ __ sub(sp, sp, entry_size); // make room for the monitor ++ // remove activation ++ // get sender esp ++ __ ld(esp, ++ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ // remove frame anchor ++ __ leave(); ++ // Ensure compiled code always sees stack at proper alignment ++ __ andi(sp, esp, -16); + -+ __ j(entry); -+ // 2. move expression stack contents -+ __ bind(loop); -+ __ ld(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack -+ // word from old location -+ __ sd(c_rarg2, Address(c_rarg3, 0)); // and store it at new location -+ __ add(c_rarg3, c_rarg3, wordSize); // advance to next word -+ __ bind(entry); -+ __ bne(c_rarg3, c_rarg1, loop); // check if bottom reached.if not at bottom -+ // then copy next word -+ } ++ // and begin the OSR nmethod ++ __ ld(t0, Address(x9, nmethod::osr_entry_point_offset())); ++ __ jr(t0); ++ } ++} + -+ // call run-time routine -+ // c_rarg1: points to monitor entry -+ __ bind(allocated); ++void TemplateTable::if_0cmp(Condition cc) ++{ ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; + -+ // Increment bcp to point to the next bytecode, so exception -+ // handling for async. exceptions work correctly. -+ // The object has already been poped from the stack, so the -+ // expression stack looks correct. -+ __ addi(xbcp, xbcp, 1); ++ __ addw(x10, x10, zr); ++ switch (cc) { ++ case equal: ++ __ bnez(x10, not_taken); ++ break; ++ case not_equal: ++ __ beqz(x10, not_taken); ++ break; ++ case less: ++ __ bgez(x10, not_taken); ++ break; ++ case less_equal: ++ __ bgtz(x10, not_taken); ++ break; ++ case greater: ++ __ blez(x10, not_taken); ++ break; ++ case greater_equal: ++ __ bltz(x10, not_taken); ++ break; ++ default: ++ break; ++ } + -+ // store object -+ __ sd(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); -+ __ lock_object(c_rarg1); ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(x10); ++} + -+ // check to make sure this monitor doesn't cause stack overflow after locking -+ __ save_bcp(); // in case of exception -+ __ generate_stack_overflow_check(0); ++void TemplateTable::if_icmp(Condition cc) ++{ ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ __ pop_i(x11); ++ __ addw(x10, x10, zr); ++ switch (cc) { ++ case equal: ++ __ bne(x11, x10, not_taken); ++ break; ++ case not_equal: ++ __ beq(x11, x10, not_taken); ++ break; ++ case less: ++ __ bge(x11, x10, not_taken); ++ break; ++ case less_equal: ++ __ bgt(x11, x10, not_taken); ++ break; ++ case greater: ++ __ ble(x11, x10, not_taken); ++ break; ++ case greater_equal: ++ __ blt(x11, x10, not_taken); ++ break; ++ default: ++ break; ++ } + -+ // The bcp has already been incremented. Just need to dispatch to -+ // next instruction. -+ __ dispatch_next(vtos); ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(x10); +} + -+void TemplateTable::monitorexit() ++void TemplateTable::if_nullcmp(Condition cc) +{ + transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ if (cc == equal) { ++ __ bnez(x10, not_taken); ++ } else { ++ __ beqz(x10, not_taken); ++ } ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(x10); ++} + -+ // check for NULL object -+ __ null_check(x10); -+ -+ const Address monitor_block_top( -+ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); -+ const Address monitor_block_bot( -+ fp, frame::interpreter_frame_initial_sp_offset * wordSize); -+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; -+ -+ Label found; -+ -+ // find matching slot -+ { -+ Label entry, loop; -+ __ ld(c_rarg1, monitor_block_top); // points to current entry, -+ // starting with top-most entry -+ __ la(c_rarg2, monitor_block_bot); // points to word before bottom -+ // of monitor block -+ __ j(entry); ++void TemplateTable::if_acmp(Condition cc) ++{ ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ __ pop_ptr(x11); + -+ __ bind(loop); -+ // check if current entry is for same object -+ __ ld(t0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); -+ // if same object then stop searching -+ __ beq(x10, t0, found); -+ // otherwise advance to next entry -+ __ add(c_rarg1, c_rarg1, entry_size); -+ __ bind(entry); -+ // check if bottom reached -+ // if not at bottom then check this entry -+ __ bne(c_rarg1, c_rarg2, loop); ++ if (cc == equal) { ++ __ bne(x11, x10, not_taken); ++ } else if (cc == not_equal) { ++ __ beq(x11, x10, not_taken); + } ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(x10); ++} + -+ // error handling. Unlocking was not block-structured -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::throw_illegal_monitor_state_exception)); -+ __ should_not_reach_here(); ++void TemplateTable::ret() { ++ transition(vtos, vtos); ++ // We might be moving to a safepoint. The thread which calls ++ // Interpreter::notice_safepoints() will effectively flush its cache ++ // when it makes a system call, but we need to do something to ++ // ensure that we see the changed dispatch table. ++ __ membar(MacroAssembler::LoadLoad); + -+ // call run-time routine -+ __ bind(found); -+ __ push_ptr(x10); // make sure object is on stack (contract with oopMaps) -+ __ unlock_object(c_rarg1); -+ __ pop_ptr(x10); // discard object ++ locals_index(x11); ++ __ ld(x11, aaddress(x11, t1, _masm)); // get return bci, compute return bcp ++ __ profile_ret(x11, x12); ++ __ ld(xbcp, Address(xmethod, Method::const_offset())); ++ __ add(xbcp, xbcp, x11); ++ __ addi(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); ++ __ dispatch_next(vtos, 0, /*generate_poll*/true); +} + -+// Wide instructions -+void TemplateTable::wide() -+{ -+ __ load_unsigned_byte(x9, at_bcp(1)); -+ __ mv(t0, (address)Interpreter::_wentry_point); -+ __ shadd(t0, x9, t0, t1, 3); -+ __ ld(t0, Address(t0)); -+ __ jr(t0); ++void TemplateTable::wide_ret() { ++ transition(vtos, vtos); ++ locals_index_wide(x11); ++ __ ld(x11, aaddress(x11, t0, _masm)); // get return bci, compute return bcp ++ __ profile_ret(x11, x12); ++ __ ld(xbcp, Address(xmethod, Method::const_offset())); ++ __ add(xbcp, xbcp, x11); ++ __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); ++ __ dispatch_next(vtos, 0, /*generate_poll*/true); +} + -+// Multi arrays -+void TemplateTable::multianewarray() { -+ transition(vtos, atos); -+ __ load_unsigned_byte(x10, at_bcp(3)); // get number of dimensions -+ // last dim is on top of stack; we want address of first one: -+ // first_addr = last_addr + (ndims - 1) * wordSize -+ __ shadd(c_rarg1, x10, esp, c_rarg1, 3); -+ __ sub(c_rarg1, c_rarg1, wordSize); -+ call_VM(x10, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), -+ c_rarg1); -+ __ load_unsigned_byte(x11, at_bcp(3)); -+ __ shadd(esp, x11, esp, t0, 3); ++void TemplateTable::tableswitch() { ++ Label default_case, continue_execution; ++ transition(itos, vtos); ++ // align xbcp ++ __ la(x11, at_bcp(BytesPerInt)); ++ __ andi(x11, x11, -BytesPerInt); ++ // load lo & hi ++ __ lwu(x12, Address(x11, BytesPerInt)); ++ __ lwu(x13, Address(x11, 2 * BytesPerInt)); ++ __ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend ++ __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend ++ // check against lo & hi ++ __ blt(x10, x12, default_case); ++ __ bgt(x10, x13, default_case); ++ // lookup dispatch offset ++ __ subw(x10, x10, x12); ++ __ shadd(x13, x10, x11, t0, 2); ++ __ lwu(x13, Address(x13, 3 * BytesPerInt)); ++ __ profile_switch_case(x10, x11, x12); ++ // continue execution ++ __ bind(continue_execution); ++ __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend ++ __ add(xbcp, xbcp, x13); ++ __ load_unsigned_byte(t0, Address(xbcp)); ++ __ dispatch_only(vtos, /*generate_poll*/true); ++ // handle default ++ __ bind(default_case); ++ __ profile_switch_default(x10); ++ __ lwu(x13, Address(x11, 0)); ++ __ j(continue_execution); +} -diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp -new file mode 100644 -index 000000000..b437c8f4c ---- /dev/null -+++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp -@@ -0,0 +1,42 @@ -+/* -+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ + -+#ifndef CPU_RISCV_TEMPLATETABLE_RISCV_HPP -+#define CPU_RISCV_TEMPLATETABLE_RISCV_HPP ++void TemplateTable::lookupswitch() { ++ transition(itos, itos); ++ __ stop("lookupswitch bytecode should have been rewritten"); ++} + -+static void prepare_invoke(int byte_no, -+ Register method, // linked method (or i-klass) -+ Register index = noreg, // itable index, MethodType, etc. -+ Register recv = noreg, // if caller wants to see it -+ Register flags = noreg // if caller wants to test it -+ ); -+static void invokevirtual_helper(Register index, Register recv, -+ Register flags); ++void TemplateTable::fast_linearswitch() { ++ transition(itos, vtos); ++ Label loop_entry, loop, found, continue_execution; ++ // bswap x10 so we can avoid bswapping the table entries ++ __ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend ++ // align xbcp ++ __ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of ++ // this instruction (change offsets ++ // below) ++ __ andi(x9, x9, -BytesPerInt); ++ // set counter ++ __ lwu(x11, Address(x9, BytesPerInt)); ++ __ revb_w(x11, x11); ++ __ j(loop_entry); ++ // table search ++ __ bind(loop); ++ __ shadd(t0, x11, x9, t0, 3); ++ __ lw(t0, Address(t0, 2 * BytesPerInt)); ++ __ beq(x10, t0, found); ++ __ bind(loop_entry); ++ __ addi(x11, x11, -1); ++ __ bgez(x11, loop); ++ // default case ++ __ profile_switch_default(x10); ++ __ lwu(x13, Address(x9, 0)); ++ __ j(continue_execution); ++ // entry found -> get offset ++ __ bind(found); ++ __ shadd(t0, x11, x9, t0, 3); ++ __ lwu(x13, Address(t0, 3 * BytesPerInt)); ++ __ profile_switch_case(x11, x10, x9); ++ // continue execution ++ __ bind(continue_execution); ++ __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend ++ __ add(xbcp, xbcp, x13); ++ __ lbu(t0, Address(xbcp, 0)); ++ __ dispatch_only(vtos, /*generate_poll*/true); ++} + -+// Helpers -+static void index_check(Register array, Register index); ++void TemplateTable::fast_binaryswitch() { ++ transition(itos, vtos); ++ // Implementation using the following core algorithm: ++ // ++ // int binary_search(int key, LookupswitchPair* array, int n) ++ // binary_search start: ++ // #Binary search according to "Methodik des Programmierens" by ++ // # Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. ++ // int i = 0; ++ // int j = n; ++ // while (i + 1 < j) do ++ // # invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) ++ // # with Q: for all i: 0 <= i < n: key < a[i] ++ // # where a stands for the array and assuming that the (inexisting) ++ // # element a[n] is infinitely big. ++ // int h = (i + j) >> 1 ++ // # i < h < j ++ // if (key < array[h].fast_match()) ++ // then [j = h] ++ // else [i = h] ++ // end ++ // # R: a[i] <= key < a[i+1] or Q ++ // # (i.e., if key is within array, i is the correct index) ++ // return i ++ // binary_search end + -+#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp -new file mode 100644 -index 000000000..03079aec0 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp -@@ -0,0 +1,43 @@ -+/* -+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ + -+#ifndef CPU_RISCV_VMSTRUCTS_RISCV_HPP -+#define CPU_RISCV_VMSTRUCTS_RISCV_HPP ++ // Register allocation ++ const Register key = x10; // already set (tosca) ++ const Register array = x11; ++ const Register i = x12; ++ const Register j = x13; ++ const Register h = x14; ++ const Register temp = x15; + -+// These are the CPU-specific fields, types and integer -+// constants required by the Serviceability Agent. This file is -+// referenced by vmStructs.cpp. ++ // Find array start ++ __ la(array, at_bcp(3 * BytesPerInt)); // btw: should be able to ++ // get rid of this ++ // instruction (change ++ // offsets below) ++ __ andi(array, array, -BytesPerInt); + -+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ -+ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) ++ // Initialize i & j ++ __ mv(i, zr); // i = 0 ++ __ lwu(j, Address(array, -BytesPerInt)); // j = length(array) + -+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) ++ // Convert j into native byteordering ++ __ revb_w(j, j); + -+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ // And start ++ Label entry; ++ __ j(entry); + -+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ // binary search loop ++ { ++ Label loop; ++ __ bind(loop); ++ __ addw(h, i, j); // h = i + j ++ __ srliw(h, h, 1); // h = (i + j) >> 1 ++ // if [key < array[h].fast_match()] ++ // then [j = h] ++ // else [i = h] ++ // Convert array[h].match to native byte-ordering before compare ++ __ shadd(temp, h, array, temp, 3); ++ __ ld(temp, Address(temp, 0)); ++ __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend + -+#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp -new file mode 100644 -index 000000000..dd4f5c9ae ---- /dev/null -+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp -@@ -0,0 +1,91 @@ -+/* -+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ Label L_done, L_greater; ++ __ bge(key, temp, L_greater); ++ // if [key < array[h].fast_match()] then j = h ++ __ mv(j, h); ++ __ j(L_done); ++ __ bind(L_greater); ++ // if [key >= array[h].fast_match()] then i = h ++ __ mv(i, h); ++ __ bind(L_done); + -+#include "memory/allocation.hpp" -+#include "memory/allocation.inline.hpp" -+#include "runtime/os.inline.hpp" -+#include "vm_version_ext_riscv.hpp" ++ // while [i + 1 < j] ++ __ bind(entry); ++ __ addiw(h, i, 1); // i + 1 ++ __ blt(h, j, loop); // i + 1 < j ++ } + -+// VM_Version_Ext statics -+int VM_Version_Ext::_no_of_threads = 0; -+int VM_Version_Ext::_no_of_cores = 0; -+int VM_Version_Ext::_no_of_sockets = 0; -+bool VM_Version_Ext::_initialized = false; -+char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; -+char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; ++ // end of binary search, result index is i (must check again!) ++ Label default_case; ++ // Convert array[i].match to native byte-ordering before compare ++ __ shadd(temp, i, array, temp, 3); ++ __ ld(temp, Address(temp, 0)); ++ __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend ++ __ bne(key, temp, default_case); + -+void VM_Version_Ext::initialize_cpu_information(void) { -+ // do nothing if cpu info has been initialized -+ if (_initialized) { -+ return; -+ } ++ // entry found -> j = offset ++ __ shadd(temp, i, array, temp, 3); ++ __ lwu(j, Address(temp, BytesPerInt)); ++ __ profile_switch_case(i, key, array); ++ __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend + -+ int core_id = -1; -+ int chip_id = -1; -+ int len = 0; -+ char* src_string = NULL; ++ __ add(temp, xbcp, j); ++ __ load_unsigned_byte(t0, Address(temp, 0)); + -+ _no_of_cores = os::processor_count(); -+ _no_of_threads = _no_of_cores; -+ _no_of_sockets = _no_of_cores; -+ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); -+ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string); -+ _initialized = true; -+} ++ __ add(xbcp, xbcp, j); ++ __ la(xbcp, Address(xbcp, 0)); ++ __ dispatch_only(vtos, /*generate_poll*/true); + -+int VM_Version_Ext::number_of_threads(void) { -+ initialize_cpu_information(); -+ return _no_of_threads; -+} ++ // default case -> j = default offset ++ __ bind(default_case); ++ __ profile_switch_default(i); ++ __ lwu(j, Address(array, -2 * BytesPerInt)); ++ __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend + -+int VM_Version_Ext::number_of_cores(void) { -+ initialize_cpu_information(); -+ return _no_of_cores; -+} ++ __ add(temp, xbcp, j); ++ __ load_unsigned_byte(t0, Address(temp, 0)); + -+int VM_Version_Ext::number_of_sockets(void) { -+ initialize_cpu_information(); -+ return _no_of_sockets; ++ __ add(xbcp, xbcp, j); ++ __ la(xbcp, Address(xbcp, 0)); ++ __ dispatch_only(vtos, /*generate_poll*/true); +} + -+const char* VM_Version_Ext::cpu_name(void) { -+ initialize_cpu_information(); -+ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); -+ if (NULL == tmp) { -+ return NULL; -+ } -+ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); -+ return tmp; -+} ++void TemplateTable::_return(TosState state) ++{ ++ transition(state, state); ++ assert(_desc->calls_vm(), ++ "inconsistent calls_vm information"); // call in remove_activation + -+const char* VM_Version_Ext::cpu_description(void) { -+ initialize_cpu_information(); -+ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); -+ if (NULL == tmp) { -+ return NULL; ++ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { ++ assert(state == vtos, "only valid state"); ++ ++ __ ld(c_rarg1, aaddress(0)); ++ __ load_klass(x13, c_rarg1); ++ __ lwu(x13, Address(x13, Klass::access_flags_offset())); ++ Label skip_register_finalizer; ++ __ andi(t0, x13, JVM_ACC_HAS_FINALIZER); ++ __ beqz(t0, skip_register_finalizer); ++ ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1); ++ ++ __ bind(skip_register_finalizer); + } -+ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); -+ return tmp; -+} -diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp -new file mode 100644 -index 000000000..0982b6668 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp -@@ -0,0 +1,55 @@ -+/* -+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ + -+#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP -+#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP ++ // Issue a StoreStore barrier after all stores but before return ++ // from any constructor for any class with a final field. We don't ++ // know if this is a finalizer, so we always do so. ++ if (_desc->bytecode() == Bytecodes::_return) { ++ __ membar(MacroAssembler::StoreStore); ++ } + -+#include "runtime/vm_version.hpp" -+#include "utilities/macros.hpp" ++ // Narrow result if state is itos but result type is smaller. ++ // Need to narrow in the return bytecode rather than in generate_return_entry ++ // since compiled code callers expect the result to already be narrowed. ++ if (state == itos) { ++ __ narrow(x10); ++ } + -+class VM_Version_Ext : public VM_Version { -+ private: -+ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; -+ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; ++ __ remove_activation(state); ++ __ ret(); ++} + -+ static int _no_of_threads; -+ static int _no_of_cores; -+ static int _no_of_sockets; -+ static bool _initialized; -+ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; -+ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; + -+ public: -+ static int number_of_threads(void); -+ static int number_of_cores(void); -+ static int number_of_sockets(void); ++// ---------------------------------------------------------------------------- ++// Volatile variables demand their effects be made known to all CPU's ++// in order. Store buffers on most chips allow reads & writes to ++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode ++// without some kind of memory barrier (i.e., it's not sufficient that ++// the interpreter does not reorder volatile references, the hardware ++// also must not reorder them). ++// ++// According to the new Java Memory Model (JMM): ++// (1) All volatiles are serialized wrt to each other. ALSO reads & ++// writes act as aquire & release, so: ++// (2) A read cannot let unrelated NON-volatile memory refs that ++// happen after the read float up to before the read. It's OK for ++// non-volatile memory refs that happen before the volatile read to ++// float down below it. ++// (3) Similar a volatile write cannot let unrelated NON-volatile ++// memory refs that happen BEFORE the write float down to after the ++// write. It's OK for non-volatile memory refs that happen after the ++// volatile write to float up before it. ++// ++// We only put in barriers around volatile refs (they are expensive), ++// not _between_ memory refs (that would require us to track the ++// flavor of the previous memory refs). Requirements (2) and (3) ++// require some barriers before volatile stores and after volatile ++// loads. These nearly cover requirement (1) but miss the ++// volatile-store-volatile-load case. This final case is placed after ++// volatile-stores although it could just as well go before ++// volatile-loads. + -+ static const char* cpu_name(void); -+ static const char* cpu_description(void); -+ static void initialize_cpu_information(void); ++void TemplateTable::resolve_cache_and_index(int byte_no, ++ Register Rcache, ++ Register index, ++ size_t index_size) { ++ const Register temp = x9; ++ assert_different_registers(Rcache, index, temp); + -+}; ++ Label resolved, clinit_barrier_slow; + -+#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -new file mode 100644 -index 000000000..31d5bb5f4 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -@@ -0,0 +1,190 @@ -+/* -+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ Bytecodes::Code code = bytecode(); ++ switch (code) { ++ case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; ++ case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; ++ default: break; ++ } + -+#include "precompiled.hpp" -+#include "runtime/java.hpp" -+#include "runtime/vm_version.hpp" -+#include "utilities/macros.hpp" -+#include "utilities/formatBuffer.hpp" ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); ++ __ mv(t0, (int) code); ++ __ beq(temp, t0, resolved); + -+#include OS_HEADER_INLINE(os) ++ // resolve first time through ++ // Class initialization barrier slow path lands here as well. ++ __ bind(clinit_barrier_slow); + -+const char* VM_Version::_uarch = ""; -+uint32_t VM_Version::_initial_vector_length = 0; ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); ++ __ mv(temp, (int) code); ++ __ call_VM(noreg, entry, temp); + -+void VM_Version::initialize() { -+ get_os_cpu_info(); ++ // Update registers with resolved info ++ __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); ++ // n.b. unlike x86 Rcache is now rcpool plus the indexed offset ++ // so all clients ofthis method must be modified accordingly ++ __ bind(resolved); + -+ if (FLAG_IS_DEFAULT(UseFMA)) { -+ FLAG_SET_DEFAULT(UseFMA, true); -+ } -+ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { -+ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0); ++ // Class initialization barrier for static methods ++ if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) { ++ __ load_resolved_method_at_index(byte_no, temp, Rcache); ++ __ load_method_holder(temp, temp); ++ __ clinit_barrier(temp, t0, NULL, &clinit_barrier_slow); + } ++} + -+ if (UseAES || UseAESIntrinsics) { -+ if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { -+ warning("AES instructions are not available on this CPU"); -+ FLAG_SET_DEFAULT(UseAES, false); -+ } -+ if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { -+ warning("AES intrinsics are not available on this CPU"); -+ FLAG_SET_DEFAULT(UseAESIntrinsics, false); -+ } -+ } ++// The Rcache and index registers must be set before call ++// n.b unlike x86 cache already includes the index offset ++void TemplateTable::load_field_cp_cache_entry(Register obj, ++ Register cache, ++ Register index, ++ Register off, ++ Register flags, ++ bool is_static = false) { ++ assert_different_registers(cache, index, flags, off); + -+ if (UseAESCTRIntrinsics) { -+ warning("AES/CTR intrinsics are not available on this CPU"); -+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); -+ } ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ // Field offset ++ __ ld(off, Address(cache, in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::f2_offset()))); ++ // Flags ++ __ lwu(flags, Address(cache, in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::flags_offset()))); + -+ if (UseSHA) { -+ warning("SHA instructions are not available on this CPU"); -+ FLAG_SET_DEFAULT(UseSHA, false); ++ // klass overwrite register ++ if (is_static) { ++ __ ld(obj, Address(cache, in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::f1_offset()))); ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ __ ld(obj, Address(obj, mirror_offset)); ++ __ resolve_oop_handle(obj); + } ++} + -+ if (UseSHA1Intrinsics) { -+ warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); -+ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); -+ } ++void TemplateTable::load_invoke_cp_cache_entry(int byte_no, ++ Register method, ++ Register itable_index, ++ Register flags, ++ bool is_invokevirtual, ++ bool is_invokevfinal, /*unused*/ ++ bool is_invokedynamic) { ++ // setup registers ++ const Register cache = t1; ++ const Register index = x14; ++ assert_different_registers(method, flags); ++ assert_different_registers(method, cache, index); ++ assert_different_registers(itable_index, flags); ++ assert_different_registers(itable_index, cache, index); ++ // determine constant pool cache field offsets ++ assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant"); ++ const int method_offset = in_bytes(ConstantPoolCache::base_offset() + ++ (is_invokevirtual ? ++ ConstantPoolCacheEntry::f2_offset() : ++ ConstantPoolCacheEntry::f1_offset())); ++ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()); ++ // access constant pool cache fields ++ const int index_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::f2_offset()); + -+ if (UseSHA256Intrinsics) { -+ warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); -+ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); -+ } ++ const size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2)); ++ resolve_cache_and_index(byte_no, cache, index, index_size); ++ __ ld(method, Address(cache, method_offset)); + -+ if (UseSHA512Intrinsics) { -+ warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); -+ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); ++ if (itable_index != noreg) { ++ __ ld(itable_index, Address(cache, index_offset)); + } ++ __ lwu(flags, Address(cache, flags_offset)); ++} + -+ if (UseCRC32Intrinsics) { -+ warning("CRC32Intrinsics instructions are not available on this CPU."); -+ FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); -+ } ++// The registers cache and index expected to be set before call. ++// Correct values of the cache and index registers are preserved. ++void TemplateTable::jvmti_post_field_access(Register cache, Register index, ++ bool is_static, bool has_tos) { ++ // do the JVMTI work here to avoid disturbing the register state below ++ // We use c_rarg registers here beacause we want to use the register used in ++ // the call to the VM ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we ++ // take the time to call into the VM. ++ Label L1; ++ assert_different_registers(cache, index, x10); ++ int32_t offset = 0; ++ __ la_patchable(t0, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), offset); ++ __ lwu(x10, Address(t0, offset)); + -+ if (UseCRC32CIntrinsics) { -+ warning("CRC32CIntrinsics instructions are not available on this CPU."); -+ FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); -+ } ++ __ beqz(x10, L1); + -+ if (UseRVV) { -+ if (!(_features & CPU_V)) { -+ warning("RVV is not supported on this CPU"); -+ FLAG_SET_DEFAULT(UseRVV, false); ++ __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1); ++ __ la(c_rarg2, Address(c_rarg2, in_bytes(ConstantPoolCache::base_offset()))); ++ ++ if (is_static) { ++ __ mv(c_rarg1, zr); // NULL object reference + } else { -+ // read vector length from vector CSR vlenb -+ _initial_vector_length = get_current_vector_length(); ++ __ ld(c_rarg1, at_tos()); // get object pointer without popping it ++ __ verify_oop(c_rarg1); + } ++ // c_rarg1: object pointer or NULL ++ // c_rarg2: cache entry pointer ++ // c_rarg3: jvalue object on the stack ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_access), ++ c_rarg1, c_rarg2, c_rarg3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); + } ++} + -+ if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) { -+ FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true); -+ } ++void TemplateTable::pop_and_check_object(Register r) ++{ ++ __ pop_ptr(r); ++ __ null_check(r); // for field access must check obj. ++ __ verify_oop(r); ++} + -+ if (UseZbb) { -+ if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { -+ FLAG_SET_DEFAULT(UsePopCountInstruction, true); -+ } -+ } else { -+ FLAG_SET_DEFAULT(UsePopCountInstruction, false); ++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) ++{ ++ const Register cache = x12; ++ const Register index = x13; ++ const Register obj = x14; ++ const Register off = x9; ++ const Register flags = x10; ++ const Register raw_flags = x16; ++ const Register bc = x14; // uses same reg as obj, so don't mix them ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_access(cache, index, is_static, false); ++ load_field_cp_cache_entry(obj, cache, index, off, raw_flags, is_static); ++ ++ if (!is_static) { ++ // obj is on the stack ++ pop_and_check_object(obj); + } + -+ char buf[512]; -+ buf[0] = '\0'; -+ if (_uarch != NULL && strcmp(_uarch, "") != 0) snprintf(buf, sizeof(buf), "%s,", _uarch); -+ strcat(buf, "rv64"); -+#define ADD_FEATURE_IF_SUPPORTED(id, name, bit) if (_features & CPU_##id) strcat(buf, name); -+ CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED) -+#undef ADD_FEATURE_IF_SUPPORTED ++ __ add(off, obj, off); ++ const Address field(off); + -+ _features_string = os::strdup(buf); ++ Label Done, notByte, notBool, notInt, notShort, notChar, ++ notLong, notFloat, notObj, notDouble; + -+#ifdef COMPILER2 -+ initialize_c2(); -+#endif // COMPILER2 -+} ++ __ slli(flags, raw_flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ++ ConstantPoolCacheEntry::tos_state_bits)); ++ __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); + -+#ifdef COMPILER2 -+void VM_Version::initialize_c2() { -+ // lack of cmove in riscv -+ if (UseCMoveUnconditionally) { -+ FLAG_SET_DEFAULT(UseCMoveUnconditionally, false); -+ } -+ if (ConditionalMoveLimit > 0) { -+ FLAG_SET_DEFAULT(ConditionalMoveLimit, 0); ++ assert(btos == 0, "change code, btos != 0"); ++ __ bnez(flags, notByte); ++ ++ // Dont't rewrite getstatic, only getfield ++ if (is_static) { ++ rc = may_not_rewrite; + } + -+ if (!UseRVV) { -+ FLAG_SET_DEFAULT(SpecialEncodeISOArray, false); ++ // btos ++ __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg); ++ __ push(btos); ++ // Rewrite bytecode to be faster ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11); + } ++ __ j(Done); + -+ if (!UseRVV && MaxVectorSize) { -+ FLAG_SET_DEFAULT(MaxVectorSize, 0); -+ } ++ __ bind(notByte); ++ __ sub(t0, flags, (u1)ztos); ++ __ bnez(t0, notBool); + -+ if (UseRVV) { -+ if (FLAG_IS_DEFAULT(MaxVectorSize)) { -+ MaxVectorSize = _initial_vector_length; -+ } else if (MaxVectorSize < 16) { -+ warning("RVV does not support vector length less than 16 bytes. Disabling RVV."); -+ UseRVV = false; -+ } else if (is_power_of_2(MaxVectorSize)) { -+ if (MaxVectorSize > _initial_vector_length) { -+ warning("Current system only supports max RVV vector length %d. Set MaxVectorSize to %d", -+ _initial_vector_length, _initial_vector_length); -+ } -+ MaxVectorSize = _initial_vector_length; -+ } else { -+ vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize)); -+ } ++ // ztos (same code as btos) ++ __ access_load_at(T_BOOLEAN, IN_HEAP, x10, field, noreg, noreg); ++ __ push(ztos); ++ // Rewirte bytecode to be faster ++ if (rc == may_rewrite) { ++ // uses btos rewriting, no truncating to t/f bit is needed for getfield ++ patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11); + } ++ __ j(Done); + -+ // disable prefetch -+ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { -+ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); ++ __ bind(notBool); ++ __ sub(t0, flags, (u1)atos); ++ __ bnez(t0, notObj); ++ // atos ++ do_oop_load(_masm, field, x10, IN_HEAP); ++ __ push(atos); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_agetfield, bc, x11); + } ++ __ j(Done); + -+ if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { -+ FLAG_SET_DEFAULT(UseMulAddIntrinsic, true); ++ __ bind(notObj); ++ __ sub(t0, flags, (u1)itos); ++ __ bnez(t0, notInt); ++ // itos ++ __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg); ++ __ addw(x10, x10, zr); // signed extended ++ __ push(itos); ++ // Rewrite bytecode to be faster ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_igetfield, bc, x11); + } ++ __ j(Done); + -+ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { -+ FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true); ++ __ bind(notInt); ++ __ sub(t0, flags, (u1)ctos); ++ __ bnez(t0, notChar); ++ // ctos ++ __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg); ++ __ push(ctos); ++ // Rewrite bytecode to be faster ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cgetfield, bc, x11); + } ++ __ j(Done); + -+ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { -+ FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true); ++ __ bind(notChar); ++ __ sub(t0, flags, (u1)stos); ++ __ bnez(t0, notShort); ++ // stos ++ __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg); ++ __ push(stos); ++ // Rewrite bytecode to be faster ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sgetfield, bc, x11); + } ++ __ j(Done); + -+ if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { -+ FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true); ++ __ bind(notShort); ++ __ sub(t0, flags, (u1)ltos); ++ __ bnez(t0, notLong); ++ // ltos ++ __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg); ++ __ push(ltos); ++ // Rewrite bytecode to be faster ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_lgetfield, bc, x11); + } ++ __ j(Done); + -+ if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { -+ FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true); ++ __ bind(notLong); ++ __ sub(t0, flags, (u1)ftos); ++ __ bnez(t0, notFloat); ++ // ftos ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); ++ __ push(ftos); ++ // Rewrite bytecode to be faster ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fgetfield, bc, x11); + } -+} -+#endif // COMPILER2 -diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp -new file mode 100644 -index 000000000..0178e6d75 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp -@@ -0,0 +1,65 @@ -+/* -+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ __ j(Done); + -+#ifndef CPU_RISCV_VM_VERSION_RISCV_HPP -+#define CPU_RISCV_VM_VERSION_RISCV_HPP ++ __ bind(notFloat); ++#ifdef ASSERT ++ __ sub(t0, flags, (u1)dtos); ++ __ bnez(t0, notDouble); ++#endif ++ // dtos ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* ftos */, field, noreg, noreg); ++ __ push(dtos); ++ // Rewrite bytecode to be faster ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dgetfield, bc, x11); ++ } ++#ifdef ASSERT ++ __ j(Done); + -+#include "runtime/abstract_vm_version.hpp" -+#include "runtime/globals_extension.hpp" -+#include "utilities/sizes.hpp" ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif + -+class VM_Version : public Abstract_VM_Version { -+public: -+ // Initialization -+ static void initialize(); ++ __ bind(Done); + -+ enum Feature_Flag { -+#define CPU_FEATURE_FLAGS(decl) \ -+ decl(I, "i", 8) \ -+ decl(M, "m", 12) \ -+ decl(A, "a", 0) \ -+ decl(F, "f", 5) \ -+ decl(D, "d", 3) \ -+ decl(C, "c", 2) \ -+ decl(V, "v", 21) ++ Label notVolatile; ++ __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ __ bind(notVolatile); ++} + -+#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit), -+ CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG) -+#undef DECLARE_CPU_FEATURE_FLAG -+ }; ++void TemplateTable::getfield(int byte_no) ++{ ++ getfield_or_static(byte_no, false); ++} + -+protected: -+ static const char* _uarch; -+ static uint32_t _initial_vector_length; -+ static void get_os_cpu_info(); -+ static uint32_t get_current_vector_length(); ++void TemplateTable::nofast_getfield(int byte_no) { ++ getfield_or_static(byte_no, false, may_not_rewrite); ++} + -+#ifdef COMPILER2 -+private: -+ static void initialize_c2(); -+#endif // COMPILER2 -+}; ++void TemplateTable::getstatic(int byte_no) ++{ ++ getfield_or_static(byte_no, true); ++} + -+#endif // CPU_RISCV_VM_VERSION_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp -new file mode 100644 -index 000000000..6572d9334 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp -@@ -0,0 +1,60 @@ -+/* -+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++// The registers cache and index expected to be set before call. ++// The function may destroy various registers, just not the cache and index registers. ++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { ++ transition(vtos, vtos); + -+#include "precompiled.hpp" -+#include "asm/assembler.hpp" -+#include "code/vmreg.hpp" ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + -+void VMRegImpl::set_regName() { -+ Register reg = ::as_Register(0); -+ int i = 0; -+ for ( ; i < ConcreteRegisterImpl::max_gpr ; ) { -+ for (int j = 0; j < RegisterImpl::max_slots_per_register; j++) { -+ regName[i++] = reg->name(); -+ } -+ reg = reg->successor(); -+ } ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L1; ++ assert_different_registers(cache, index, x10); ++ int32_t offset = 0; ++ __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset); ++ __ lwu(x10, Address(t0, offset)); ++ __ beqz(x10, L1); + -+ FloatRegister freg = ::as_FloatRegister(0); -+ for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { -+ for (int j = 0; j < FloatRegisterImpl::max_slots_per_register; j++) { -+ regName[i++] = freg->name(); -+ } -+ freg = freg->successor(); -+ } ++ __ get_cache_and_index_at_bcp(c_rarg2, t0, 1); + -+ VectorRegister vreg = ::as_VectorRegister(0); -+ for ( ; i < ConcreteRegisterImpl::max_vpr ; ) { -+ for (int j = 0; j < VectorRegisterImpl::max_slots_per_register; j++) { -+ regName[i++] = vreg->name(); ++ if (is_static) { ++ // Life is simple. Null out the object pointer. ++ __ mv(c_rarg1, zr); ++ } else { ++ // Life is harder. The stack holds the value on top, followed by ++ // the object. We don't know the size of the value, though; it ++ // could be one or two words depending on its type. As a result, ++ // we must find the type to determine where the object is. ++ __ lwu(c_rarg3, Address(c_rarg2, ++ in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::flags_offset()))); ++ __ srli(c_rarg3, c_rarg3, ConstantPoolCacheEntry::tos_state_shift); ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ Label nope2, done, ok; ++ __ ld(c_rarg1, at_tos_p1()); // initially assume a one word jvalue ++ __ sub(t0, c_rarg3, ltos); ++ __ beqz(t0, ok); ++ __ sub(t0, c_rarg3, dtos); ++ __ bnez(t0, nope2); ++ __ bind(ok); ++ __ ld(c_rarg1, at_tos_p2()); // ltos (two word jvalue); ++ __ bind(nope2); + } -+ vreg = vreg->successor(); -+ } -+ -+ for ( ; i < ConcreteRegisterImpl::number_of_registers; i++) { -+ regName[i] = "NON-GPR-FPR-VPR"; ++ // cache entry pointer ++ __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset)); ++ // object (tos) ++ __ mv(c_rarg3, esp); ++ // c_rarg1: object pointer set up above (NULL if static) ++ // c_rarg2: cache entry pointer ++ // c_rarg3: jvalue object on the stack ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ c_rarg1, c_rarg2, c_rarg3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); + } +} -diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp -new file mode 100644 -index 000000000..ec76a1db1 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp -@@ -0,0 +1,64 @@ -+/* -+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ + -+#ifndef CPU_RISCV_VMREG_RISCV_HPP -+#define CPU_RISCV_VMREG_RISCV_HPP ++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); + -+inline bool is_Register() { -+ return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; -+} ++ const Register cache = x12; ++ const Register index = x13; ++ const Register obj = x12; ++ const Register off = x9; ++ const Register flags = x10; ++ const Register bc = x14; + -+inline bool is_FloatRegister() { -+ return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; -+} ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_mod(cache, index, is_static); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + -+inline bool is_VectorRegister() { -+ return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr; -+} ++ Label Done; ++ __ mv(x15, flags); + -+inline Register as_Register() { -+ assert( is_Register(), "must be"); -+ return ::as_Register(value() / RegisterImpl::max_slots_per_register); -+} ++ { ++ Label notVolatile; ++ __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore); ++ __ bind(notVolatile); ++ } + -+inline FloatRegister as_FloatRegister() { -+ assert( is_FloatRegister() && is_even(value()), "must be" ); -+ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) / -+ FloatRegisterImpl::max_slots_per_register); -+} ++ Label notByte, notBool, notInt, notShort, notChar, ++ notLong, notFloat, notObj, notDouble; + -+inline VectorRegister as_VectorRegister() { -+ assert( is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be" ); -+ return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) / -+ VectorRegisterImpl::max_slots_per_register); -+} ++ __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ++ ConstantPoolCacheEntry::tos_state_bits)); ++ __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); + -+inline bool is_concrete() { -+ assert(is_reg(), "must be"); -+ return is_even(value()); -+} ++ assert(btos == 0, "change code, btos != 0"); ++ __ bnez(flags, notByte); + -+#endif // CPU_RISCV_VMREG_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp -new file mode 100644 -index 000000000..9605e59f4 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp -@@ -0,0 +1,47 @@ -+/* -+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ // Don't rewrite putstatic, only putfield ++ if (is_static) { ++ rc = may_not_rewrite; ++ } + -+#ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP -+#define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP ++ // btos ++ { ++ __ pop(btos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); // off register as temparator register. ++ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); ++ } + -+inline VMReg RegisterImpl::as_VMReg() { -+ if( this == noreg ) { -+ return VMRegImpl::Bad(); ++ __ bind(notByte); ++ __ sub(t0, flags, (u1)ztos); ++ __ bnez(t0, notBool); ++ ++ // ztos ++ { ++ __ pop(ztos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_zputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); + } -+ return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register); -+} + -+inline VMReg FloatRegisterImpl::as_VMReg() { -+ return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) + -+ ConcreteRegisterImpl::max_gpr); -+} ++ __ bind(notBool); ++ __ sub(t0, flags, (u1)atos); ++ __ bnez(t0, notObj); + -+inline VMReg VectorRegisterImpl::as_VMReg() { -+ return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) + -+ ConcreteRegisterImpl::max_fpr); -+} ++ // atos ++ { ++ __ pop(atos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ // Store into the field ++ do_oop_store(_masm, field, x10, IN_HEAP); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_aputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); ++ } + -+#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP -diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp -new file mode 100644 -index 000000000..b2aa87ab8 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp -@@ -0,0 +1,260 @@ -+/* -+ * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ __ bind(notObj); ++ __ sub(t0, flags, (u1)itos); ++ __ bnez(t0, notInt); + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "assembler_riscv.inline.hpp" -+#include "code/vtableStubs.hpp" -+#include "interp_masm_riscv.hpp" -+#include "memory/resourceArea.hpp" -+#include "oops/compiledICHolder.hpp" -+#include "oops/instanceKlass.hpp" -+#include "oops/klassVtable.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "vmreg_riscv.inline.hpp" -+#ifdef COMPILER2 -+#include "opto/runtime.hpp" -+#endif ++ // itos ++ { ++ __ pop(itos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_iputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); ++ } + -+// machine-dependent part of VtableStubs: create VtableStub of correct size and -+// initialize its code ++ __ bind(notInt); ++ __ sub(t0, flags, (u1)ctos); ++ __ bnez(t0, notChar); + -+#define __ masm-> ++ // ctos ++ { ++ __ pop(ctos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); ++ } + -+#ifndef PRODUCT -+extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); -+#endif ++ __ bind(notChar); ++ __ sub(t0, flags, (u1)stos); ++ __ bnez(t0, notShort); + -+VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { -+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. -+ const int stub_code_length = code_size_limit(true); -+ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); -+ // Can be NULL if there is no free space in the code cache. -+ if (s == NULL) { -+ return NULL; ++ // stos ++ { ++ __ pop(stos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); + } + -+ // Count unused bytes in instruction sequences of variable size. -+ // We add them to the computed buffer size in order to avoid -+ // overflow in subsequently generated stubs. -+ address start_pc = NULL; -+ int slop_bytes = 0; -+ int slop_delta = 0; -+ -+ ResourceMark rm; -+ CodeBuffer cb(s->entry_point(), stub_code_length); -+ MacroAssembler* masm = new MacroAssembler(&cb); -+ assert_cond(masm != NULL); ++ __ bind(notShort); ++ __ sub(t0, flags, (u1)ltos); ++ __ bnez(t0, notLong); + -+#if (!defined(PRODUCT) && defined(COMPILER2)) -+ if (CountCompiledCalls) { -+ __ la(t2, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); -+ __ increment(Address(t2)); ++ // ltos ++ { ++ __ pop(ltos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_lputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); + } -+#endif + -+ // get receiver (need to skip return address on top of stack) -+ assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); ++ __ bind(notLong); ++ __ sub(t0, flags, (u1)ftos); ++ __ bnez(t0, notFloat); + -+ // get receiver klass -+ address npe_addr = __ pc(); -+ __ load_klass(t2, j_rarg0); ++ // ftos ++ { ++ __ pop(ftos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); ++ } + -+#ifndef PRODUCT -+ if (DebugVtables) { -+ Label L; -+ start_pc = __ pc(); ++ __ bind(notFloat); ++#ifdef ASSERT ++ __ sub(t0, flags, (u1)dtos); ++ __ bnez(t0, notDouble); ++#endif + -+ // check offset vs vtable length -+ __ lwu(t0, Address(t2, Klass::vtable_length_offset())); -+ __ mvw(t1, vtable_index * vtableEntry::size()); -+ __ bgt(t0, t1, L); -+ __ enter(); -+ __ mv(x12, vtable_index); ++ // dtos ++ { ++ __ pop(dtos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dputfield, bc, x11, true, byte_no); ++ } ++ } + -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, x12); -+ const ptrdiff_t estimate = 256; -+ const ptrdiff_t codesize = __ pc() - start_pc; -+ slop_delta = estimate - codesize; // call_VM varies in length, depending on data -+ slop_bytes += slop_delta; -+ assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize); ++#ifdef ASSERT ++ __ j(Done); + -+ __ leave(); -+ __ bind(L); -+ } -+#endif // PRODUCT ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif + -+ start_pc = __ pc(); -+ __ lookup_virtual_method(t2, vtable_index, xmethod); -+ // lookup_virtual_method generates -+ // 4 instructions (maximum value encountered in normal case):li(lui + addiw) + add + ld -+ // 1 instruction (best case):ld * 1 -+ slop_delta = 16 - (int)(__ pc() - start_pc); -+ slop_bytes += slop_delta; -+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ bind(Done); + -+#ifndef PRODUCT -+ if (DebugVtables) { -+ Label L; -+ __ beqz(xmethod, L); -+ __ ld(t0, Address(xmethod, Method::from_compiled_offset())); -+ __ bnez(t0, L); -+ __ stop("Vtable entry is NULL"); -+ __ bind(L); ++ { ++ Label notVolatile; ++ __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore); ++ __ bind(notVolatile); + } -+#endif // PRODUCT ++} + -+ // x10: receiver klass -+ // xmethod: Method* -+ // x12: receiver -+ address ame_addr = __ pc(); -+ __ ld(t0, Address(xmethod, Method::from_compiled_offset())); -+ __ jr(t0); ++void TemplateTable::putfield(int byte_no) ++{ ++ putfield_or_static(byte_no, false); ++} + -+ masm->flush(); -+ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0); ++void TemplateTable::nofast_putfield(int byte_no) { ++ putfield_or_static(byte_no, false, may_not_rewrite); ++} + -+ return s; ++void TemplateTable::putstatic(int byte_no) { ++ putfield_or_static(byte_no, true); +} + -+VtableStub* VtableStubs::create_itable_stub(int itable_index) { -+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. -+ const int stub_code_length = code_size_limit(false); -+ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); -+ // Can be NULL if there is no free space in the code cache. -+ if (s == NULL) { -+ return NULL; -+ } -+ // Count unused bytes in instruction sequences of variable size. -+ // We add them to the computed buffer size in order to avoid -+ // overflow in subsequently generated stubs. -+ address start_pc = NULL; -+ int slop_bytes = 0; -+ int slop_delta = 0; ++void TemplateTable::jvmti_post_fast_field_mod() ++{ ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L2; ++ int32_t offset = 0; ++ __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset); ++ __ lwu(c_rarg3, Address(t0, offset)); ++ __ beqz(c_rarg3, L2); ++ __ pop_ptr(x9); // copy the object pointer from tos ++ __ verify_oop(x9); ++ __ push_ptr(x9); // put the object pointer back on tos ++ // Save tos values before call_VM() clobbers them. Since we have ++ // to do it for every data type, we use the saved values as the ++ // jvalue object. ++ switch (bytecode()) { // load values into the jvalue object ++ case Bytecodes::_fast_aputfield: __ push_ptr(x10); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ push_i(x10); break; ++ case Bytecodes::_fast_dputfield: __ push_d(); break; ++ case Bytecodes::_fast_fputfield: __ push_f(); break; ++ case Bytecodes::_fast_lputfield: __ push_l(x10); break; + -+ ResourceMark rm; -+ CodeBuffer cb(s->entry_point(), stub_code_length); -+ MacroAssembler* masm = new MacroAssembler(&cb); -+ assert_cond(masm != NULL); ++ default: ++ ShouldNotReachHere(); ++ } ++ __ mv(c_rarg3, esp); // points to jvalue on the stack ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(c_rarg2, x10, 1); ++ __ verify_oop(x9); ++ // x9: object pointer copied above ++ // c_rarg2: cache entry pointer ++ // c_rarg3: jvalue object on the stack ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ x9, c_rarg2, c_rarg3); + -+#if (!defined(PRODUCT) && defined(COMPILER2)) -+ if (CountCompiledCalls) { -+ __ la(x18, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); -+ __ increment(Address(x18)); ++ switch (bytecode()) { // restore tos values ++ case Bytecodes::_fast_aputfield: __ pop_ptr(x10); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ pop_i(x10); break; ++ case Bytecodes::_fast_dputfield: __ pop_d(); break; ++ case Bytecodes::_fast_fputfield: __ pop_f(); break; ++ case Bytecodes::_fast_lputfield: __ pop_l(x10); break; ++ default: break; ++ } ++ __ bind(L2); + } -+#endif ++} + -+ // get receiver (need to skip return address on top of stack) -+ assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); ++void TemplateTable::fast_storefield(TosState state) ++{ ++ transition(state, vtos); + -+ // Entry arguments: -+ // t2: CompiledICHolder -+ // j_rarg0: Receiver ++ ByteSize base = ConstantPoolCache::base_offset(); + -+ // This stub is called from compiled code which has no callee-saved registers, -+ // so all registers except arguments are free at this point. -+ const Register recv_klass_reg = x18; -+ const Register holder_klass_reg = x19; // declaring interface klass (DECC) -+ const Register resolved_klass_reg = xmethod; // resolved interface klass (REFC) -+ const Register temp_reg = x28; -+ const Register temp_reg2 = x29; -+ const Register icholder_reg = t1; ++ jvmti_post_fast_field_mod(); + -+ Label L_no_such_interface; ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(x12, x11, 1); + -+ __ ld(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset())); -+ __ ld(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset())); ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ membar(MacroAssembler::LoadLoad); + -+ start_pc = __ pc(); ++ // test for volatile with x13 ++ __ lwu(x13, Address(x12, in_bytes(base + ++ ConstantPoolCacheEntry::flags_offset()))); + -+ // get receiver klass (also an implicit null-check) -+ address npe_addr = __ pc(); -+ __ load_klass(recv_klass_reg, j_rarg0); ++ // replace index with field offset from cache entry ++ __ ld(x11, Address(x12, in_bytes(base + ConstantPoolCacheEntry::f2_offset()))); + -+ // Receiver subtype check against REFC. -+ __ lookup_interface_method(// inputs: rec. class, interface -+ recv_klass_reg, resolved_klass_reg, noreg, -+ // outputs: scan temp. reg1, scan temp. reg2 -+ temp_reg2, temp_reg, -+ L_no_such_interface, -+ /*return_method=*/false); ++ { ++ Label notVolatile; ++ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore); ++ __ bind(notVolatile); ++ } + -+ const ptrdiff_t typecheckSize = __ pc() - start_pc; -+ start_pc = __ pc(); ++ // Get object from stack ++ pop_and_check_object(x12); + -+ // Get selected method from declaring class and itable index -+ __ lookup_interface_method(// inputs: rec. class, interface, itable index -+ recv_klass_reg, holder_klass_reg, itable_index, -+ // outputs: method, scan temp. reg -+ xmethod, temp_reg, -+ L_no_such_interface); ++ // field address ++ __ add(x11, x12, x11); ++ const Address field(x11, 0); + -+ const ptrdiff_t lookupSize = __ pc() - start_pc; ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_aputfield: ++ do_oop_store(_masm, field, x10, IN_HEAP); ++ break; ++ case Bytecodes::_fast_lputfield: ++ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg); ++ break; ++ case Bytecodes::_fast_iputfield: ++ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg); ++ break; ++ case Bytecodes::_fast_zputfield: ++ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg); ++ break; ++ case Bytecodes::_fast_bputfield: ++ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg); ++ break; ++ case Bytecodes::_fast_sputfield: ++ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg); ++ break; ++ case Bytecodes::_fast_cputfield: ++ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg); ++ break; ++ case Bytecodes::_fast_fputfield: ++ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg); ++ break; ++ case Bytecodes::_fast_dputfield: ++ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } + -+ // Reduce "estimate" such that "padding" does not drop below 8. -+ const ptrdiff_t estimate = 256; -+ const ptrdiff_t codesize = typecheckSize + lookupSize; -+ slop_delta = (int)(estimate - codesize); -+ slop_bytes += slop_delta; -+ assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize); ++ { ++ Label notVolatile; ++ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore); ++ __ bind(notVolatile); ++ } ++} + -+#ifdef ASSERT -+ if (DebugVtables) { -+ Label L2; -+ __ beqz(xmethod, L2); -+ __ ld(t0, Address(xmethod, Method::from_compiled_offset())); -+ __ bnez(t0, L2); -+ __ stop("compiler entrypoint is null"); -+ __ bind(L2); ++void TemplateTable::fast_accessfield(TosState state) ++{ ++ transition(atos, state); ++ // Do the JVMTI work here to avoid disturbing the register state below ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we ++ // take the time to call into the VM. ++ Label L1; ++ int32_t offset = 0; ++ __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_access_count_addr()), offset); ++ __ lwu(x12, Address(t0, offset)); ++ __ beqz(x12, L1); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(c_rarg2, t1, 1); ++ __ verify_oop(x10); ++ __ push_ptr(x10); // save object pointer before call_VM() clobbers it ++ __ mv(c_rarg1, x10); ++ // c_rarg1: object pointer copied above ++ // c_rarg2: cache entry pointer ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_access), ++ c_rarg1, c_rarg2); ++ __ pop_ptr(x10); // restore object pointer ++ __ bind(L1); + } -+#endif // ASSERT + -+ // xmethod: Method* -+ // j_rarg0: receiver -+ address ame_addr = __ pc(); -+ __ ld(t0, Address(xmethod, Method::from_compiled_offset())); -+ __ jr(t0); ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(x12, x11, 1); + -+ __ bind(L_no_such_interface); -+ // Handle IncompatibleClassChangeError in itable stubs. -+ // More detailed error message. -+ // We force resolving of the call site by jumping to the "handle -+ // wrong method" stub, and so let the interpreter runtime do all the -+ // dirty work. -+ assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order"); -+ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ membar(MacroAssembler::LoadLoad); + -+ masm->flush(); -+ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); ++ __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::f2_offset()))); ++ __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()))); + -+ return s; -+} ++ // x10: object ++ __ verify_oop(x10); ++ __ null_check(x10); ++ __ add(x11, x10, x11); ++ const Address field(x11, 0); + -+int VtableStub::pd_code_alignment() { -+ // riscv cache line size is 64 bytes, but we want to limit alignment loss. -+ const unsigned int icache_line_size = wordSize; -+ return icache_line_size; ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_agetfield: ++ do_oop_load(_masm, field, x10, IN_HEAP); ++ __ verify_oop(x10); ++ break; ++ case Bytecodes::_fast_lgetfield: ++ __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg); ++ break; ++ case Bytecodes::_fast_igetfield: ++ __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg); ++ __ addw(x10, x10, zr); // signed extended ++ break; ++ case Bytecodes::_fast_bgetfield: ++ __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg); ++ break; ++ case Bytecodes::_fast_sgetfield: ++ __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg); ++ break; ++ case Bytecodes::_fast_cgetfield: ++ __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg); ++ break; ++ case Bytecodes::_fast_fgetfield: ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); ++ break; ++ case Bytecodes::_fast_dgetfield: ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ { ++ Label notVolatile; ++ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ __ bind(notVolatile); ++ } +} -diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp -index 897be2209..3b836fe6b 100644 ---- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp -+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp -@@ -1447,7 +1447,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op - } - - // result = condition ? opr1 : opr2 --void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { -+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, -+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr || cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on s390"); + - Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual; - switch (condition) { - case lir_cond_equal: acond = Assembler::bcondEqual; ncond = Assembler::bcondNotEqual; break; -diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad -index e335f473d..53ad912cb 100644 ---- a/src/hotspot/cpu/s390/s390.ad -+++ b/src/hotspot/cpu/s390/s390.ad -@@ -1522,14 +1522,16 @@ const bool Matcher::match_rule_supported(int opcode) { - // BUT: make sure match rule is not disabled by a false predicate! - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - // TODO - // Identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen. -- bool ret_value = match_rule_supported(opcode); -+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { -+ return false; -+ } - // Add rules here. - -- return ret_value; // Per default match rules are supported. -+ return true; // Per default match rules are supported. - } - - int Matcher::regnum_to_fpu_offset(int regnum) { -@@ -1578,6 +1580,14 @@ const uint Matcher::vector_shift_count_ideal_reg(int size) { - return Node::NotAMachineReg; - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ - // z/Architecture does support misaligned store/load at minimal extra cost. - const bool Matcher::misaligned_vectors_ok() { - return true; -diff --git a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad -index 7a2798a51..7d9b17b44 100644 ---- a/src/hotspot/cpu/sparc/sparc.ad -+++ b/src/hotspot/cpu/sparc/sparc.ad -@@ -1710,7 +1710,7 @@ const bool Matcher::match_rule_supported(int opcode) { - return true; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - - // TODO - // identify extra cases that we might want to provide match rules for -@@ -1764,6 +1764,14 @@ const int Matcher::min_vector_size(const BasicType bt) { - return max_vector_size(bt); // Same as max. - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} ++void TemplateTable::fast_xaccess(TosState state) ++{ ++ transition(vtos, state); + - // SPARC doesn't support misaligned vectors store/load. - const bool Matcher::misaligned_vectors_ok() { - return false; -diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp -index cee3140f4..d38c63600 100644 ---- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp -+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp -@@ -1970,7 +1970,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { - } - } - --void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { -+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, -+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on x86"); ++ // get receiver ++ __ ld(x10, aaddress(0)); ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(x12, x13, 2); ++ __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::f2_offset()))); + - Assembler::Condition acond, ncond; - switch (condition) { - case lir_cond_equal: acond = Assembler::equal; ncond = Assembler::notEqual; break; -diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp -index 82fd8522b..8016d328a 100644 ---- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp -+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp -@@ -6606,6 +6606,99 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register - bind(DONE_LABEL); - } // string_indexof_char - -+void MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, -+ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) { -+ ShortBranchVerifier sbv(this); -+ assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); -+ -+ int stride = 16; -+ -+ Label FOUND_CHAR, SCAN_TO_CHAR_INIT, SCAN_TO_CHAR_LOOP, -+ SCAN_TO_16_CHAR, SCAN_TO_16_CHAR_LOOP, SCAN_TO_32_CHAR_LOOP, -+ RET_NOT_FOUND, SCAN_TO_16_CHAR_INIT, -+ FOUND_SEQ_CHAR, DONE_LABEL; -+ -+ movptr(result, str1); -+ if (UseAVX >= 2) { -+ cmpl(cnt1, stride); -+ jcc(Assembler::less, SCAN_TO_CHAR_INIT); -+ cmpl(cnt1, stride*2); -+ jcc(Assembler::less, SCAN_TO_16_CHAR_INIT); -+ movdl(vec1, ch); -+ vpbroadcastb(vec1, vec1, Assembler::AVX_256bit); -+ vpxor(vec2, vec2); -+ movl(tmp, cnt1); -+ andl(tmp, 0xFFFFFFE0); //vector count (in chars) -+ andl(cnt1,0x0000001F); //tail count (in chars) -+ -+ bind(SCAN_TO_32_CHAR_LOOP); -+ vmovdqu(vec3, Address(result, 0)); -+ vpcmpeqb(vec3, vec3, vec1, Assembler::AVX_256bit); -+ vptest(vec2, vec3); -+ jcc(Assembler::carryClear, FOUND_CHAR); -+ addptr(result, 32); -+ subl(tmp, stride*2); -+ jcc(Assembler::notZero, SCAN_TO_32_CHAR_LOOP); -+ jmp(SCAN_TO_16_CHAR); -+ -+ bind(SCAN_TO_16_CHAR_INIT); -+ movdl(vec1, ch); -+ pxor(vec2, vec2); -+ pshufb(vec1, vec2); -+ } -+ -+ bind(SCAN_TO_16_CHAR); -+ cmpl(cnt1, stride); -+ jcc(Assembler::less, SCAN_TO_CHAR_INIT);//less than 16 entires left -+ if (UseAVX < 2) { -+ movdl(vec1, ch); -+ pxor(vec2, vec2); -+ pshufb(vec1, vec2); -+ } -+ movl(tmp, cnt1); -+ andl(tmp, 0xFFFFFFF0); //vector count (in bytes) -+ andl(cnt1,0x0000000F); //tail count (in bytes) -+ -+ bind(SCAN_TO_16_CHAR_LOOP); -+ movdqu(vec3, Address(result, 0)); -+ pcmpeqb(vec3, vec1); -+ ptest(vec2, vec3); -+ jcc(Assembler::carryClear, FOUND_CHAR); -+ addptr(result, 16); -+ subl(tmp, stride); -+ jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);//last 16 items... -+ -+ bind(SCAN_TO_CHAR_INIT); -+ testl(cnt1, cnt1); -+ jcc(Assembler::zero, RET_NOT_FOUND); -+ bind(SCAN_TO_CHAR_LOOP); -+ load_unsigned_byte(tmp, Address(result, 0)); -+ cmpl(ch, tmp); -+ jccb(Assembler::equal, FOUND_SEQ_CHAR); -+ addptr(result, 1); -+ subl(cnt1, 1); -+ jccb(Assembler::zero, RET_NOT_FOUND); -+ jmp(SCAN_TO_CHAR_LOOP); -+ -+ bind(RET_NOT_FOUND); -+ movl(result, -1); -+ jmpb(DONE_LABEL); -+ -+ bind(FOUND_CHAR); -+ if (UseAVX >= 2) { -+ vpmovmskb(tmp, vec3); -+ } else { -+ pmovmskb(tmp, vec3); ++ // make sure exception is reported in correct bcp range (getfield is ++ // next instruction) ++ __ addi(xbcp, xbcp, 1); ++ __ null_check(x10); ++ switch (state) { ++ case itos: ++ __ add(x10, x10, x11); ++ __ access_load_at(T_INT, IN_HEAP, x10, Address(x10, 0), noreg, noreg); ++ __ addw(x10, x10, zr); // signed extended ++ break; ++ case atos: ++ __ add(x10, x10, x11); ++ do_oop_load(_masm, Address(x10, 0), x10, IN_HEAP); ++ __ verify_oop(x10); ++ break; ++ case ftos: ++ __ add(x10, x10, x11); ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(x10), noreg, noreg); ++ break; ++ default: ++ ShouldNotReachHere(); + } -+ bsfl(ch, tmp); -+ addptr(result, ch); -+ -+ bind(FOUND_SEQ_CHAR); -+ subptr(result, str1); + -+ bind(DONE_LABEL); -+} // stringL_indexof_char -+ - // helper function for string_compare - void MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2, - Address::ScaleFactor scale, Address::ScaleFactor scale1, -diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp -index 1bed0cce9..47a062c11 100644 ---- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp -+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp -@@ -1659,6 +1659,8 @@ public: - #ifdef COMPILER2 - void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, - XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); -+ void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, -+ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); - - // IndexOf strings. - // Small strings are loaded through stack if they cross page boundary. -diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad -index baa7cc774..238d8729b 100644 ---- a/src/hotspot/cpu/x86/x86.ad -+++ b/src/hotspot/cpu/x86/x86.ad -@@ -1511,10 +1511,13 @@ const bool Matcher::match_rule_supported(int opcode) { - return ret_value; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - // identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen - bool ret_value = match_rule_supported(opcode); -+ if (!vector_size_supported(bt, vlen)) { -+ ret_value = false; -+ } - if (ret_value) { - switch (opcode) { - case Op_AbsVB: -@@ -1642,6 +1645,15 @@ const int Matcher::min_vector_size(const BasicType bt) { - return MIN2(size,max_size); - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} ++ { ++ Label notVolatile; ++ __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()))); ++ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ __ bind(notVolatile); ++ } + -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; ++ __ sub(xbcp, xbcp, 1); +} + ++//----------------------------------------------------------------------------- ++// Calls + - // Vector ideal reg corresponding to specified size in bytes - const uint Matcher::vector_ideal_reg(int size) { - assert(MaxVectorSize >= size, ""); -diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad -index bc9947327..bbe49bd62 100644 ---- a/src/hotspot/cpu/x86/x86_32.ad -+++ b/src/hotspot/cpu/x86/x86_32.ad -@@ -11909,12 +11909,12 @@ instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2 - ins_pipe( pipe_slow ); - %} - --instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, -+instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, - eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ -- predicate(UseSSE42Intrinsics); -+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n) -> encoding() == StrIntrinsicNode::U)); - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); - effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); -- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} -+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} - ins_encode %{ - __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, - $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); -@@ -11922,6 +11922,19 @@ instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, - ins_pipe( pipe_slow ); - %} - -+instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, -+ eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ -+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); -+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} -+ ins_encode %{ -+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, -+ $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); -+ %} -+ ins_pipe( pipe_slow ); -+%} -+ - // fast array equals - instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, - regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) -diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad -index 7e6739ffe..53f887ea6 100644 ---- a/src/hotspot/cpu/x86/x86_64.ad -+++ b/src/hotspot/cpu/x86/x86_64.ad -@@ -2975,7 +2975,7 @@ frame - RAX_H_num // Op_RegL - }; - // Excluded flags and vector registers. -- assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type"); -+ assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type"); - return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); - %} - %} -@@ -11509,13 +11509,13 @@ instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI - ins_pipe( pipe_slow ); - %} - --instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, -- rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr) -+instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, -+ rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr) - %{ -- predicate(UseSSE42Intrinsics); -+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); - effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); -- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} -+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} - ins_encode %{ - __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, - $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); -@@ -11523,6 +11523,20 @@ instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, - ins_pipe( pipe_slow ); - %} - -+instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, -+ rbx_RegI result, legVecS tmp_vec1, legVecS tmp_vec2, legVecS tmp_vec3, rcx_RegI tmp, rFlagsReg cr) -+%{ -+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); -+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} -+ ins_encode %{ -+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, -+ $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register); -+ %} -+ ins_pipe( pipe_slow ); -+%} -+ - // fast string equals - instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result, - legVecS tmp1, legVecS tmp2, rbx_RegI tmp3, rFlagsReg cr) -diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp -index 74945999e..6c79d20a4 100644 ---- a/src/hotspot/os/linux/os_linux.cpp -+++ b/src/hotspot/os/linux/os_linux.cpp -@@ -1903,7 +1903,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) { - {EM_PARISC, EM_PARISC, ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"}, - {EM_68K, EM_68K, ELFCLASS32, ELFDATA2MSB, (char*)"M68k"}, - {EM_AARCH64, EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"}, -- {EM_RISCV, EM_RISCV, ELFCLASS64, ELFDATA2LSB, (char*)"RISC-V"}, -+#ifdef _LP64 -+ {EM_RISCV, EM_RISCV, ELFCLASS64, ELFDATA2LSB, (char*)"RISC-V64"}, -+#else -+ {EM_RISCV, EM_RISCV, ELFCLASS32, ELFDATA2LSB, (char*)"RISC-V32"}, -+#endif - {EM_LOONGARCH, EM_LOONGARCH, ELFCLASS64, ELFDATA2LSB, (char*)"LoongArch"}, - }; - -@@ -2735,6 +2739,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) { - strncpy(cpuinfo, "IA64", length); - #elif defined(PPC) - strncpy(cpuinfo, "PPC64", length); -+#elif defined(RISCV) -+ strncpy(cpuinfo, LP64_ONLY("RISCV64") NOT_LP64("RISCV32"), length); - #elif defined(S390) - strncpy(cpuinfo, "S390", length); - #elif defined(SPARC) -@@ -3966,7 +3972,8 @@ size_t os::Linux::find_large_page_size() { - IA64_ONLY(256 * M) - PPC_ONLY(4 * M) - S390_ONLY(1 * M) -- SPARC_ONLY(4 * M); -+ SPARC_ONLY(4 * M) -+ RISCV64_ONLY(2 * M); - #endif // ZERO - - FILE *fp = fopen("/proc/meminfo", "r"); -diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp -new file mode 100644 -index 000000000..961fff011 ---- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp -@@ -0,0 +1,113 @@ -+/* -+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#ifndef OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP -+#define OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP ++void TemplateTable::prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index, // itable index, MethodType, etc. ++ Register recv, // if caller wants to see it ++ Register flags // if caller wants to test it ++ ) { ++ // determine flags ++ const Bytecodes::Code code = bytecode(); ++ const bool is_invokeinterface = code == Bytecodes::_invokeinterface; ++ const bool is_invokedynamic = code == Bytecodes::_invokedynamic; ++ const bool is_invokehandle = code == Bytecodes::_invokehandle; ++ const bool is_invokevirtual = code == Bytecodes::_invokevirtual; ++ const bool is_invokespecial = code == Bytecodes::_invokespecial; ++ const bool load_receiver = (recv != noreg); ++ const bool save_flags = (flags != noreg); ++ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), ""); ++ assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); ++ assert(flags == noreg || flags == x13, ""); ++ assert(recv == noreg || recv == x12, ""); + -+#include "vm_version_riscv.hpp" ++ // setup registers & access constant pool cache ++ if (recv == noreg) { ++ recv = x12; ++ } ++ if (flags == noreg) { ++ flags = x13; ++ } ++ assert_different_registers(method, index, recv, flags); + -+// Implementation of class atomic -+// Note that memory_order_conservative requires a full barrier after atomic stores. -+// See https://patchwork.kernel.org/patch/3575821/ ++ // save 'interpreter return address' ++ __ save_bcp(); + -+#define FULL_MEM_BARRIER __sync_synchronize() -+#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); -+#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); ++ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); + -+template -+struct Atomic::PlatformAdd -+ : public Atomic::AddAndFetch > -+{ -+ template -+ D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { -+ D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE); -+ FULL_MEM_BARRIER; -+ return res; ++ // maybe push appendix to arguments (just before return address) ++ if (is_invokedynamic || is_invokehandle) { ++ Label L_no_push; ++ __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::has_appendix_shift); ++ __ beqz(t0, L_no_push); ++ // Push the appendix as a trailing parameter. ++ // This must be done before we get the receiver, ++ // since the parameter_size includes it. ++ __ push_reg(x9); ++ __ mv(x9, index); ++ __ load_resolved_reference_at_index(index, x9); ++ __ pop_reg(x9); ++ __ push_reg(index); // push appendix (MethodType, CallSite, etc.) ++ __ bind(L_no_push); + } -+}; -+ -+template -+template -+inline T Atomic::PlatformXchg::operator()(T exchange_value, -+ T volatile* dest, -+ atomic_memory_order order) const { -+ STATIC_ASSERT(byte_size == sizeof(T)); -+ T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE); -+ FULL_MEM_BARRIER; -+ return res; -+} + -+// No direct support for cmpxchg of bytes; emulate using int. -+template -+template -+inline T Atomic::PlatformCmpxchg::operator()(T exchange_value, -+ T volatile* dest, -+ T compare_value, -+ atomic_memory_order order) const { -+ STATIC_ASSERT(byte_size == sizeof(T)); -+ T value = compare_value; -+ if (order != memory_order_relaxed) { -+ FULL_MEM_BARRIER; ++ // load receiver if needed (note: no return address pushed yet) ++ if (load_receiver) { ++ __ andi(recv, flags, ConstantPoolCacheEntry::parameter_size_mask); // parameter_size_mask = 1 << 8 ++ __ shadd(t0, recv, esp, t0, 3); ++ __ ld(recv, Address(t0, -Interpreter::expr_offset_in_bytes(1))); ++ __ verify_oop(recv); + } + -+ __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */ false, -+ __ATOMIC_RELAXED, __ATOMIC_RELAXED); ++ // compute return type ++ __ slli(t1, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits)); ++ __ srli(t1, t1, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> t1:0~3 + -+ if (order != memory_order_relaxed) { -+ FULL_MEM_BARRIER; ++ // load return address ++ { ++ const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code); ++ __ mv(t0, table_addr); ++ __ shadd(t0, t1, t0, t1, 3); ++ __ ld(ra, Address(t0, 0)); + } -+ return value; +} + -+template<> -+template -+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, -+ T volatile* dest, -+ T compare_value, -+ atomic_memory_order order) const { -+ STATIC_ASSERT(4 == sizeof(T)); -+ if (order != memory_order_relaxed) { -+ FULL_MEM_BARRIER; -+ } -+ T rv; -+ int tmp; -+ __asm volatile( -+ "1:\n\t" -+ " addiw %[tmp], %[cv], 0\n\t" // make sure compare_value signed_extend -+ " lr.w.aq %[rv], (%[dest])\n\t" -+ " bne %[rv], %[tmp], 2f\n\t" -+ " sc.w.rl %[tmp], %[ev], (%[dest])\n\t" -+ " bnez %[tmp], 1b\n\t" -+ "2:\n\t" -+ : [rv] "=&r" (rv), [tmp] "=&r" (tmp) -+ : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value) -+ : "memory"); -+ if (order != memory_order_relaxed) { -+ FULL_MEM_BARRIER; -+ } -+ return rv; -+} ++void TemplateTable::invokevirtual_helper(Register index, ++ Register recv, ++ Register flags) ++{ ++ // Uses temporary registers x10, x13 ++ assert_different_registers(index, recv, x10, x13); ++ // Test for an invoke of a final method ++ Label notFinal; ++ __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::is_vfinal_shift); ++ __ beqz(t0, notFinal); + -+#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp -new file mode 100644 -index 000000000..44f04d1a9 ---- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp -@@ -0,0 +1,44 @@ -+/* -+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ */ ++ const Register method = index; // method must be xmethod ++ assert(method == xmethod, "Method must be xmethod for interpreter calling convention"); + -+#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP -+#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP ++ // do the call - the index is actually the method to call ++ // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method* + -+#include ++ // It's final, need a null check here! ++ __ null_check(recv); + -+// Efficient swapping of data bytes from Java byte -+// ordering to native byte ordering and vice versa. -+inline u2 Bytes::swap_u2(u2 x) { -+ return bswap_16(x); -+} ++ // profile this call ++ __ profile_final_call(x10); ++ __ profile_arguments_type(x10, method, x14, true); + -+inline u4 Bytes::swap_u4(u4 x) { -+ return bswap_32(x); -+} ++ __ jump_from_interpreted(method); + -+inline u8 Bytes::swap_u8(u8 x) { -+ return bswap_64(x); -+} ++ __ bind(notFinal); + -+#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp -new file mode 100644 -index 000000000..645b40a7c ---- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp -@@ -0,0 +1,116 @@ -+/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ // get receiver klass ++ __ null_check(recv, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(x10, recv); + -+#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP -+#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP ++ // profile this call ++ __ profile_virtual_call(x10, xlocals, x13); + -+static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ (void)memmove(to, from, count * HeapWordSize); ++ // get target Method & entry point ++ __ lookup_virtual_method(x10, index, method); ++ __ profile_arguments_type(x13, method, x14, true); ++ __ jump_from_interpreted(method); +} + -+static inline void pd_disjoint_words_helper(const HeapWord* from, HeapWord* to, size_t count, bool is_atomic) { -+ switch (count) { -+ case 8: to[7] = from[7]; // fall through -+ case 7: to[6] = from[6]; // fall through -+ case 6: to[5] = from[5]; // fall through -+ case 5: to[4] = from[4]; // fall through -+ case 4: to[3] = from[3]; // fall through -+ case 3: to[2] = from[2]; // fall through -+ case 2: to[1] = from[1]; // fall through -+ case 1: to[0] = from[0]; // fall through -+ case 0: break; -+ default: -+ if(is_atomic) { -+ while (count-- > 0) { *to++ = *from++; } -+ } else { -+ memcpy(to, from, count * HeapWordSize); -+ } -+ } -+} ++void TemplateTable::invokevirtual(int byte_no) ++{ ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); + -+static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ pd_disjoint_words_helper(from, to, count, false); -+} ++ prepare_invoke(byte_no, xmethod, noreg, x12, x13); + -+static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { -+ pd_disjoint_words_helper(from, to, count, true); -+} ++ // xmethod: index (actually a Method*) ++ // x12: receiver ++ // x13: flags + -+static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ pd_conjoint_words(from, to, count); ++ invokevirtual_helper(xmethod, x12, x13); +} + -+static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ pd_disjoint_words(from, to, count); -+} ++void TemplateTable::invokespecial(int byte_no) ++{ ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); + -+static void pd_conjoint_bytes(const void* from, void* to, size_t count) { -+ (void)memmove(to, from, count); ++ prepare_invoke(byte_no, xmethod, noreg, // get f1 Method* ++ x12); // get receiver also for null check ++ __ verify_oop(x12); ++ __ null_check(x12); ++ // do the call ++ __ profile_call(x10); ++ __ profile_arguments_type(x10, xmethod, xbcp, false); ++ __ jump_from_interpreted(xmethod); +} + -+static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { -+ pd_conjoint_bytes(from, to, count); -+} ++void TemplateTable::invokestatic(int byte_no) ++{ ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this arugment"); + -+static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { -+ _Copy_conjoint_jshorts_atomic(from, to, count); ++ prepare_invoke(byte_no, xmethod); // get f1 Method* ++ // do the call ++ __ profile_call(x10); ++ __ profile_arguments_type(x10, xmethod, x14, false); ++ __ jump_from_interpreted(xmethod); +} + -+static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { -+ _Copy_conjoint_jints_atomic(from, to, count); ++void TemplateTable::fast_invokevfinal(int byte_no) ++{ ++ __ call_Unimplemented(); +} + -+static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { -+ _Copy_conjoint_jlongs_atomic(from, to, count); -+} ++void TemplateTable::invokeinterface(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); + -+static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { -+ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size."); -+ _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); -+} ++ prepare_invoke(byte_no, x10, xmethod, // get f1 Klass*, f2 Method* ++ x12, x13); // recv, flags + -+static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_bytes(from, to, count); -+} ++ // x10: interface klass (from f1) ++ // xmethod: method (from f2) ++ // x12: receiver ++ // x13: flags + -+static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_jshorts(from, to, count); -+} ++ // First check for Object case, then private interface method, ++ // then regular interface method. + -+static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_jints(from, to, count); -+} ++ // Special case of invokeinterface called for virtual method of ++ // java.lang.Object. See cpCache.cpp for details ++ Label notObjectMethod; ++ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_forced_virtual_shift); ++ __ beqz(t0, notObjectMethod); + -+static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_jlongs(from, to, count); -+} ++ invokevirtual_helper(xmethod, x12, x13); ++ __ bind(notObjectMethod); + -+static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { -+ assert(!UseCompressedOops, "foo!"); -+ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); -+ _Copy_arrayof_conjoint_jlongs(from, to, count); -+} ++ Label no_such_interface; + -+#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp -new file mode 100644 -index 000000000..041cdf4ff ---- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp -@@ -0,0 +1,43 @@ -+/* -+ * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ // Check for private method invocation - indicated by vfinal ++ Label notVFinal; ++ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_vfinal_shift); ++ __ beqz(t0, notVFinal); + -+#ifndef OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP -+#define OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP ++ // Check receiver klass into x13 - also a null check ++ __ null_check(x12, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(x13, x12); + -+// Sets the default values for platform dependent flags used by the runtime system. -+// (see globals.hpp) ++ Label subtype; ++ __ check_klass_subtype(x13, x10, x14, subtype); ++ // If we get here the typecheck failed ++ __ j(no_such_interface); ++ __ bind(subtype); + -+define_pd_global(bool, DontYieldALot, false); -+define_pd_global(intx, ThreadStackSize, 2048); // 0 => use system default -+define_pd_global(intx, VMThreadStackSize, 2048); ++ __ profile_final_call(x10); ++ __ profile_arguments_type(x10, xmethod, x14, true); ++ __ jump_from_interpreted(xmethod); + -+define_pd_global(intx, CompilerThreadStackSize, 2048); ++ __ bind(notVFinal); + -+define_pd_global(uintx, JVMInvokeMethodSlack, 8192); ++ // Get receiver klass into x13 - also a null check ++ __ restore_locals(); ++ __ null_check(x12, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(x13, x12); + -+// Used on 64 bit platforms for UseCompressedOops base address -+define_pd_global(uintx, HeapBaseMinAddress, 2 * G); ++ Label no_such_method; + -+#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp -new file mode 100644 -index 000000000..842aa51e0 ---- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp -@@ -0,0 +1,73 @@ -+/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ // Preserve method for the throw_AbstractMethodErrorVerbose. ++ __ mv(x28, xmethod); ++ // Receiver subtype check against REFC. ++ // Superklass in x10. Subklass in x13. Blows t1, x30 ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ x13, x10, noreg, ++ // outputs: scan temp. reg, scan temp. reg ++ t1, x30, ++ no_such_interface, ++ /*return_method=*/false); + -+#ifndef OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP -+#define OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP ++ // profile this call ++ __ profile_virtual_call(x13, x30, x9); + -+// Included in orderAccess.hpp header file. ++ // Get declaring interface class from method, and itable index ++ __ load_method_holder(x10, xmethod); ++ __ lwu(xmethod, Address(xmethod, Method::itable_index_offset())); ++ __ subw(xmethod, xmethod, Method::itable_index_max); ++ __ negw(xmethod, xmethod); + -+#include "vm_version_riscv.hpp" ++ // Preserve recvKlass for throw_AbstractMethodErrorVerbose ++ __ mv(xlocals, x13); ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ xlocals, x10, xmethod, ++ // outputs: method, scan temp. reg ++ xmethod, x30, ++ no_such_interface); + -+// Implementation of class OrderAccess. ++ // xmethod: Method to call ++ // x12: receiver ++ // Check for abstract method error ++ // Note: This should be done more efficiently via a throw_abstract_method_error ++ // interpreter entry point and a conditional jump to it in case of a null ++ // method. ++ __ beqz(xmethod, no_such_method); + -+inline void OrderAccess::loadload() { acquire(); } -+inline void OrderAccess::storestore() { release(); } -+inline void OrderAccess::loadstore() { acquire(); } -+inline void OrderAccess::storeload() { fence(); } ++ __ profile_arguments_type(x13, xmethod, x30, true); + -+inline void OrderAccess::acquire() { -+ READ_MEM_BARRIER; -+} ++ // do the call ++ // x12: receiver ++ // xmethod: Method ++ __ jump_from_interpreted(xmethod); ++ __ should_not_reach_here(); + -+inline void OrderAccess::release() { -+ WRITE_MEM_BARRIER; -+} ++ // exception handling code follows ... ++ // note: must restore interpreter registers to canonical ++ // state for exception handling to work correctly! + -+inline void OrderAccess::fence() { -+ FULL_MEM_BARRIER; ++ __ bind(no_such_method); ++ // throw exception ++ __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) ++ __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) ++ // Pass arguments for generating a verbose error message. ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), x13, x28); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ __ bind(no_such_interface); ++ // throw exceptiong ++ __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) ++ __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) ++ // Pass arguments for generating a verbose error message. ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), x13, x10); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ return; +} + -+template -+struct OrderAccess::PlatformOrderedLoad -+{ -+ template -+ T operator()(const volatile T* p) const { T data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } -+}; ++void TemplateTable::invokehandle(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); + -+template -+struct OrderAccess::PlatformOrderedStore -+{ -+ template -+ void operator()(T v, volatile T* p) const { __atomic_store(p, &v, __ATOMIC_RELEASE); } -+}; ++ prepare_invoke(byte_no, xmethod, x10, x12); ++ __ verify_method_ptr(x12); ++ __ verify_oop(x12); ++ __ null_check(x12); + -+template -+struct OrderAccess::PlatformOrderedStore -+{ -+ template -+ void operator()(T v, volatile T* p) const { release_store(p, v); fence(); } -+}; ++ // FIXME: profile the LambdaForm also + -+#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp -new file mode 100644 -index 000000000..37947701b ---- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp -@@ -0,0 +1,628 @@ -+/* -+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ // x30 is safe to use here as a temp reg because it is about to ++ // be clobbered by jump_from_interpreted(). ++ __ profile_final_call(x30); ++ __ profile_arguments_type(x30, xmethod, x14, true); + -+// no precompiled headers -+#include "asm/macroAssembler.hpp" -+#include "classfile/classLoader.hpp" -+#include "classfile/systemDictionary.hpp" -+#include "classfile/vmSymbols.hpp" -+#include "code/codeCache.hpp" -+#include "code/icBuffer.hpp" -+#include "code/nativeInst.hpp" -+#include "code/vtableStubs.hpp" -+#include "interpreter/interpreter.hpp" -+#include "jvm.h" -+#include "memory/allocation.inline.hpp" -+#include "os_share_linux.hpp" -+#include "prims/jniFastGetField.hpp" -+#include "prims/jvm_misc.hpp" -+#include "runtime/arguments.hpp" -+#include "runtime/extendedPC.hpp" -+#include "runtime/frame.inline.hpp" -+#include "runtime/interfaceSupport.inline.hpp" -+#include "runtime/java.hpp" -+#include "runtime/javaCalls.hpp" -+#include "runtime/mutexLocker.hpp" -+#include "runtime/osThread.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/stubRoutines.hpp" -+#include "runtime/thread.inline.hpp" -+#include "runtime/timer.hpp" -+#include "utilities/debug.hpp" -+#include "utilities/events.hpp" -+#include "utilities/vmError.hpp" ++ __ jump_from_interpreted(xmethod); ++} + -+// put OS-includes here -+# include -+# include -+# include -+# include -+# include -+# include -+# include -+# include -+# include -+# include -+# include -+# include -+# include -+# include -+# include -+# include -+# include -+# include ++void TemplateTable::invokedynamic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); + -+#define REG_LR 1 -+#define REG_FP 8 ++ prepare_invoke(byte_no, xmethod, x10); + -+NOINLINE address os::current_stack_pointer() { -+ return (address)__builtin_frame_address(0); -+} ++ // x10: CallSite object (from cpool->resolved_references[]) ++ // xmethod: MH.linkToCallSite method (from f2) + -+char* os::non_memory_address_word() { -+ // Must never look like an address returned by reserve_memory, -+ return (char*) -1; -+} ++ // Note: x10_callsite is already pushed by prepare_invoke + -+address os::Linux::ucontext_get_pc(const ucontext_t * uc) { -+ return (address)uc->uc_mcontext.__gregs[REG_PC]; -+} ++ // %%% should make a type profile for any invokedynamic that takes a ref argument ++ // profile this call ++ __ profile_call(xbcp); ++ __ profile_arguments_type(x13, xmethod, x30, false); + -+void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { -+ uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc; -+} ++ __ verify_oop(x10); + -+intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { -+ return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP]; ++ __ jump_from_interpreted(xmethod); +} + -+intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { -+ return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; -+} ++//----------------------------------------------------------------------------- ++// Allocation + -+// For Forte Analyzer AsyncGetCallTrace profiling support - thread -+// is currently interrupted by SIGPROF. -+// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal -+// frames. Currently we don't do that on Linux, so it's the same as -+// os::fetch_frame_from_context(). -+ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, -+ const ucontext_t* uc, -+ intptr_t** ret_sp, -+ intptr_t** ret_fp) { ++void TemplateTable::_new() { ++ transition(vtos, atos); + -+ assert(thread != NULL, "just checking"); -+ assert(ret_sp != NULL, "just checking"); -+ assert(ret_fp != NULL, "just checking"); ++ __ get_unsigned_2_byte_index_at_bcp(x13, 1); ++ Label slow_case; ++ Label done; ++ Label initialize_header; ++ Label initialize_object; // including clearing the fields + -+ return os::fetch_frame_from_context(uc, ret_sp, ret_fp); -+} ++ __ get_cpool_and_tags(x14, x10); ++ // Make sure the class we're about to instantiate has been resolved. ++ // This is done before loading InstanceKlass to be consistent with the order ++ // how Constant Pool is update (see ConstantPool::klass_at_put) ++ const int tags_offset = Array::base_offset_in_bytes(); ++ __ add(t0, x10, x13); ++ __ la(t0, Address(t0, tags_offset)); ++ __ membar(MacroAssembler::AnyAny); ++ __ lbu(t0, t0); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ __ sub(t1, t0, (u1)JVM_CONSTANT_Class); ++ __ bnez(t1, slow_case); + -+ExtendedPC os::fetch_frame_from_context(const void* ucVoid, -+ intptr_t** ret_sp, intptr_t** ret_fp) { ++ // get InstanceKlass ++ __ load_resolved_klass_at_offset(x14, x13, x14, t0); + -+ ExtendedPC epc; -+ const ucontext_t* uc = (const ucontext_t*)ucVoid; ++ // make sure klass is initialized & doesn't have finalizer ++ // make sure klass is fully initialized ++ __ lbu(t0, Address(x14, InstanceKlass::init_state_offset())); ++ __ sub(t1, t0, (u1)InstanceKlass::fully_initialized); ++ __ bnez(t1, slow_case); + -+ if (uc != NULL) { -+ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); -+ if (ret_sp != NULL) { -+ *ret_sp = os::Linux::ucontext_get_sp(uc); -+ } -+ if (ret_fp != NULL) { -+ *ret_fp = os::Linux::ucontext_get_fp(uc); -+ } -+ } else { -+ // construct empty ExtendedPC for return value checking -+ epc = ExtendedPC(NULL); -+ if (ret_sp != NULL) { -+ *ret_sp = (intptr_t *)NULL; -+ } -+ if (ret_fp != NULL) { -+ *ret_fp = (intptr_t *)NULL; -+ } -+ } ++ // get instance_size in InstanceKlass (scaled to a count of bytes) ++ __ lwu(x13, Address(x14, Klass::layout_helper_offset())); ++ // test to see if it has a finalizer or is malformed in some way ++ __ andi(t0, x13, Klass::_lh_instance_slow_path_bit); ++ __ bnez(t0, slow_case); + -+ return epc; -+} ++ // Allocate the instance: ++ // If TLAB is enabled: ++ // Try to allocate in the TLAB. ++ // If fails, go to the slow path. ++ // Else If inline contiguous allocations are enabled: ++ // Try to allocate in eden. ++ // If fails due to heap end, go to slow path ++ // ++ // If TLAB is enabled OR inline contiguous is enabled: ++ // Initialize the allocation. ++ // Exit. ++ // Go to slow path. ++ const bool allow_shared_alloc = Universe::heap()->supports_inline_contig_alloc(); + -+frame os::fetch_frame_from_context(const void* ucVoid) { -+ intptr_t* frame_sp = NULL; -+ intptr_t* frame_fp = NULL; -+ ExtendedPC epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp); -+ return frame(frame_sp, frame_fp, epc.pc()); -+} ++ if (UseTLAB) { ++ __ tlab_allocate(x10, x13, 0, noreg, x11, slow_case); + -+bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) { -+ address pc = (address) os::Linux::ucontext_get_pc(uc); -+ if (Interpreter::contains(pc)) { -+ // interpreter performs stack banging after the fixed frame header has -+ // been generated while the compilers perform it before. To maintain -+ // semantic consistency between interpreted and compiled frames, the -+ // method returns the Java sender of the current frame. -+ *fr = os::fetch_frame_from_context(uc); -+ if (!fr->is_first_java_frame()) { -+ assert(fr->safe_for_sender(thread), "Safety check"); -+ *fr = fr->java_sender(); ++ if (ZeroTLAB) { ++ // the fields have been already cleared ++ __ j(initialize_header); ++ } else { ++ // initialize both the header and fields ++ __ j(initialize_object); + } + } else { -+ // more complex code with compiled code -+ assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above"); -+ CodeBlob* cb = CodeCache::find_blob(pc); -+ if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) { -+ // Not sure where the pc points to, fallback to default -+ // stack overflow handling -+ return false; -+ } else { -+ // In compiled code, the stack banging is performed before RA -+ // has been saved in the frame. RA is live, and SP and FP -+ // belong to the caller. -+ intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc); -+ intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc); -+ address frame_pc = (address)(uintptr_t)(uc->uc_mcontext.__gregs[REG_LR] - -+ NativeInstruction::instruction_size); -+ *fr = frame(frame_sp, frame_fp, frame_pc); -+ if (!fr->is_java_frame()) { -+ assert(fr->safe_for_sender(thread), "Safety check"); -+ assert(!fr->is_first_frame(), "Safety check"); -+ *fr = fr->java_sender(); -+ } ++ // Allocation in the shared Eden, if allowed. ++ // ++ // x13: instance size in bytes ++ if (allow_shared_alloc) { ++ __ eden_allocate(x10, x13, 0, x28, slow_case); + } + } -+ assert(fr->is_java_frame(), "Safety check"); -+ return true; -+} + -+// By default, gcc always saves frame pointer rfp on this stack. This -+// may get turned off by -fomit-frame-pointer. -+frame os::get_sender_for_C_frame(frame* fr) { -+ return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); -+} ++ // If USETLAB or allow_shared_alloc are true, the object is created above and ++ // there is an initialized need. Otherwise, skip and go to the slow path. ++ if (UseTLAB || allow_shared_alloc) { ++ // The object is initialized before the header. If the object size is ++ // zero, go directly to the header initialization. ++ __ bind(initialize_object); ++ __ sub(x13, x13, sizeof(oopDesc)); ++ __ beqz(x13, initialize_header); + -+NOINLINE frame os::current_frame() { -+ intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0); -+ if(sender_sp != NULL) { -+ frame myframe((intptr_t*)os::current_stack_pointer(), -+ sender_sp[frame::link_offset], -+ CAST_FROM_FN_PTR(address, os::current_frame)); -+ if (os::is_first_C_frame(&myframe)) { -+ // stack is not walkable -+ return frame(); -+ } else { -+ return os::get_sender_for_C_frame(&myframe); ++ // Initialize obejct fields ++ { ++ __ add(x12, x10, sizeof(oopDesc)); ++ Label loop; ++ __ bind(loop); ++ __ sd(zr, Address(x12)); ++ __ add(x12, x12, BytesPerLong); ++ __ sub(x13, x13, BytesPerLong); ++ __ bnez(x13, loop); + } -+ } else { -+ ShouldNotReachHere(); -+ return frame(); -+ } -+} -+ -+// Utility functions -+extern "C" JNIEXPORT int -+JVM_handle_linux_signal(int sig, -+ siginfo_t* info, -+ void* ucVoid, -+ int abort_if_unrecognized) { -+ ucontext_t* uc = (ucontext_t*) ucVoid; -+ -+ Thread* t = Thread::current_or_null_safe(); -+ -+ // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away -+ // (no destructors can be run) -+ os::ThreadCrashProtection::check_crash_protection(sig, t); -+ -+ SignalHandlerMark shm(t); -+ -+ // Note: it's not uncommon that JNI code uses signal/sigset to install -+ // then restore certain signal handler (e.g. to temporarily block SIGPIPE, -+ // or have a SIGILL handler when detecting CPU type). When that happens, -+ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To -+ // avoid unnecessary crash when libjsig is not preloaded, try handle signals -+ // that do not require siginfo/ucontext first. + -+ if (sig == SIGPIPE || sig == SIGXFSZ) { -+ // allow chained handler to go first -+ if (os::Linux::chained_handler(sig, info, ucVoid)) { -+ return true; -+ } else { -+ // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219 -+ return true; -+ } -+ } ++ // initialize object hader only. ++ __ bind(initialize_header); ++ __ mv(t0, (intptr_t)markWord::prototype().value()); ++ __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); ++ __ store_klass_gap(x10, zr); // zero klass gap for compressed oops ++ __ store_klass(x10, x14); // store klass last + -+#ifdef CAN_SHOW_REGISTERS_ON_ASSERT -+ if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { -+ if (handle_assert_poison_fault(ucVoid, info->si_addr)) { -+ return 1; ++ { ++ SkipIfEqual skip(_masm, &DTraceAllocProbes, false); ++ // Trigger dtrace event for fastpath ++ __ push(atos); // save the return value ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)), x10); ++ __ pop(atos); // restore the return value + } ++ __ j(done); + } -+#endif + -+ JavaThread* thread = NULL; -+ VMThread* vmthread = NULL; -+ if (os::Linux::signal_handlers_are_installed) { -+ if (t != NULL ) { -+ if(t->is_Java_thread()) { -+ thread = (JavaThread*)t; -+ } else if(t->is_VM_thread()) { -+ vmthread = (VMThread *)t; -+ } -+ } -+ } ++ // slow case ++ __ bind(slow_case); ++ __ get_constant_pool(c_rarg1); ++ __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); ++ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2); ++ __ verify_oop(x10); + -+ // Handle SafeFetch faults -+ if (uc != NULL) { -+ address const pc = (address) os::Linux::ucontext_get_pc(uc); -+ if (StubRoutines::is_safefetch_fault(pc)) { -+ os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); -+ return 1; -+ } -+ } ++ // continue ++ __ bind(done); ++ // Must prevent reordering of stores for object initialization with stores that publish the new object. ++ __ membar(MacroAssembler::StoreStore); ++} + -+ // decide if this trap can be handled by a stub -+ address stub = NULL; ++void TemplateTable::newarray() { ++ transition(itos, atos); ++ __ load_unsigned_byte(c_rarg1, at_bcp(1)); ++ __ mv(c_rarg2, x10); ++ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), ++ c_rarg1, c_rarg2); ++ // Must prevent reordering of stores for object initialization with stores that publish the new object. ++ __ membar(MacroAssembler::StoreStore); ++} + -+ address pc = NULL; ++void TemplateTable::anewarray() { ++ transition(itos, atos); ++ __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); ++ __ get_constant_pool(c_rarg1); ++ __ mv(c_rarg3, x10); ++ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), ++ c_rarg1, c_rarg2, c_rarg3); ++ // Must prevent reordering of stores for object initialization with stores that publish the new object. ++ __ membar(MacroAssembler::StoreStore); ++} + -+ //%note os_trap_1 -+ if (info != NULL && uc != NULL && thread != NULL) { -+ pc = (address) os::Linux::ucontext_get_pc(uc); ++void TemplateTable::arraylength() { ++ transition(atos, itos); ++ __ null_check(x10, arrayOopDesc::length_offset_in_bytes()); ++ __ lwu(x10, Address(x10, arrayOopDesc::length_offset_in_bytes())); ++} + -+ // Handle ALL stack overflow variations here -+ if (sig == SIGSEGV) { -+ address addr = (address) info->si_addr; ++void TemplateTable::checkcast() ++{ ++ transition(atos, atos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ __ beqz(x10, is_null); + -+ // check if fault address is within thread stack -+ if (thread->on_local_stack(addr)) { -+ // stack overflow -+ if (thread->in_stack_yellow_reserved_zone(addr)) { -+ if (thread->thread_state() == _thread_in_Java) { -+ if (thread->in_stack_reserved_zone(addr)) { -+ frame fr; -+ if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { -+ assert(fr.is_java_frame(), "Must be a Java frame"); -+ frame activation = -+ SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); -+ if (activation.sp() != NULL) { -+ thread->disable_stack_reserved_zone(); -+ if (activation.is_interpreted_frame()) { -+ thread->set_reserved_stack_activation((address)( -+ activation.fp() + frame::interpreter_frame_initial_sp_offset)); -+ } else { -+ thread->set_reserved_stack_activation((address)activation.unextended_sp()); -+ } -+ return 1; -+ } -+ } -+ } -+ // Throw a stack overflow exception. Guard pages will be reenabled -+ // while unwinding the stack. -+ thread->disable_stack_yellow_reserved_zone(); -+ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); -+ } else { -+ // Thread was in the vm or native code. Return and try to finish. -+ thread->disable_stack_yellow_reserved_zone(); -+ return 1; -+ } -+ } else if (thread->in_stack_red_zone(addr)) { -+ // Fatal red zone violation. Disable the guard pages and fall through -+ // to handle_unexpected_exception way down below. -+ thread->disable_stack_red_zone(); -+ tty->print_raw_cr("An irrecoverable stack overflow has occurred."); ++ // Get cpool & tags index ++ __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array ++ __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index ++ // See if bytecode has already been quicked ++ __ add(t0, x13, Array::base_offset_in_bytes()); ++ __ add(x11, t0, x9); ++ __ membar(MacroAssembler::AnyAny); ++ __ lbu(x11, x11); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ __ sub(t0, x11, (u1)JVM_CONSTANT_Class); ++ __ beqz(t0, quicked); + -+ // This is a likely cause, but hard to verify. Let's just print -+ // it as a hint. -+ tty->print_raw_cr("Please check if any of your loaded .so files has " -+ "enabled executable stack (see man page execstack(8))"); -+ } else { -+ // Accessing stack address below sp may cause SEGV if current -+ // thread has MAP_GROWSDOWN stack. This should only happen when -+ // current thread was created by user code with MAP_GROWSDOWN flag -+ // and then attached to VM. See notes in os_linux.cpp. -+ if (thread->osthread()->expanding_stack() == 0) { -+ thread->osthread()->set_expanding_stack(); -+ if (os::Linux::manually_expand_stack(thread, addr)) { -+ thread->osthread()->clear_expanding_stack(); -+ return 1; -+ } -+ thread->osthread()->clear_expanding_stack(); -+ } else { -+ fatal("recursive segv. expanding stack."); -+ } -+ } -+ } -+ } ++ __ push(atos); // save receiver for result, and for GC ++ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ // vm_result_2 has metadata result ++ __ get_vm_result_2(x10, xthread); ++ __ pop_reg(x13); // restore receiver ++ __ j(resolved); + -+ if (thread->thread_state() == _thread_in_Java) { -+ // Java thread running in Java code => find exception handler if any -+ // a fault inside compiled code, the interpreter, or a stub ++ // Get superklass in x10 and subklass in x13 ++ __ bind(quicked); ++ __ mv(x13, x10); // Save object in x13; x10 needed for subtype check ++ __ load_resolved_klass_at_offset(x12, x9, x10, t0); // x10 = klass + -+ // Handle signal from NativeJump::patch_verified_entry(). -+ if ((sig == SIGILL || sig == SIGTRAP) -+ && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { -+ if (TraceTraps) { -+ tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL"); -+ } -+ stub = SharedRuntime::get_handle_wrong_method_stub(); -+ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { -+ stub = SharedRuntime::get_poll_stub(pc); -+ } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { -+ // BugId 4454115: A read from a MappedByteBuffer can fault -+ // here if the underlying file has been truncated. -+ // Do not crash the VM in such a case. -+ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); -+ CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; -+ if (nm != NULL && nm->has_unsafe_access()) { -+ address next_pc = pc + NativeCall::instruction_size; -+ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); -+ } -+ } else if (sig == SIGFPE && -+ (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { -+ stub = -+ SharedRuntime:: -+ continuation_for_implicit_exception(thread, -+ pc, -+ SharedRuntime:: -+ IMPLICIT_DIVIDE_BY_ZERO); -+ } else if (sig == SIGSEGV && -+ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { -+ // Determination of interpreter/vtable stub/compiled code null exception -+ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); -+ } -+ } else if (thread->thread_state() == _thread_in_vm && -+ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ -+ thread->doing_unsafe_access()) { -+ address next_pc = pc + NativeCall::instruction_size; -+ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); -+ } ++ __ bind(resolved); ++ __ load_klass(x9, x13); + -+ // jni_fast_GetField can trap at certain pc's if a GC kicks in -+ // and the heap gets shrunk before the field access. -+ if ((sig == SIGSEGV) || (sig == SIGBUS)) { -+ address addr_slow = JNI_FastGetField::find_slowcase_pc(pc); -+ if (addr_slow != (address)-1) { -+ stub = addr_slow; -+ } -+ } ++ // Generate subtype check. Blows x12, x15. Object in x13. ++ // Superklass in x10. Subklass in x9. ++ __ gen_subtype_check(x9, ok_is_subtype); + -+ // Check to see if we caught the safepoint code in the -+ // process of write protecting the memory serialization page. -+ // It write enables the page immediately after protecting it -+ // so we can just return to retry the write. -+ if ((sig == SIGSEGV) && -+ os::is_memory_serialize_page(thread, (address) info->si_addr)) { -+ // Block current thread until the memory serialize page permission restored. -+ os::block_on_serialize_page_trap(); -+ return true; -+ } -+ } ++ // Come here on failure ++ __ push_reg(x13); ++ // object is at TOS ++ __ j(Interpreter::_throw_ClassCastException_entry); + -+ if (stub != NULL) { -+ // save all thread context in case we need to restore it -+ if (thread != NULL) { -+ thread->set_saved_exception_pc(pc); -+ } ++ // Come here on success ++ __ bind(ok_is_subtype); ++ __ mv(x10, x13); // Restore object in x13 + -+ os::Linux::ucontext_set_pc(uc, stub); -+ return true; ++ // Collect counts on whether this test sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ j(done); ++ __ bind(is_null); ++ __ profile_null_seen(x12); ++ } else { ++ __ bind(is_null); // same as 'done' + } ++ __ bind(done); ++} + -+ // signal-chaining -+ if (os::Linux::chained_handler(sig, info, ucVoid)) { -+ return true; -+ } ++void TemplateTable::instanceof() { ++ transition(atos, itos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ __ beqz(x10, is_null); + -+ if (!abort_if_unrecognized) { -+ // caller wants another chance, so give it to him -+ return false; -+ } ++ // Get cpool & tags index ++ __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array ++ __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index ++ // See if bytecode has already been quicked ++ __ add(t0, x13, Array::base_offset_in_bytes()); ++ __ add(x11, t0, x9); ++ __ membar(MacroAssembler::AnyAny); ++ __ lbu(x11, x11); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ __ sub(t0, x11, (u1)JVM_CONSTANT_Class); ++ __ beqz(t0, quicked); + -+ if (pc == NULL && uc != NULL) { -+ pc = os::Linux::ucontext_get_pc(uc); -+ } ++ __ push(atos); // save receiver for result, and for GC ++ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ // vm_result_2 has metadata result ++ __ get_vm_result_2(x10, xthread); ++ __ pop_reg(x13); // restore receiver ++ __ verify_oop(x13); ++ __ load_klass(x13, x13); ++ __ j(resolved); + -+ // unmask current signal -+ sigset_t newset; -+ sigemptyset(&newset); -+ sigaddset(&newset, sig); -+ sigprocmask(SIG_UNBLOCK, &newset, NULL); ++ // Get superklass in x10 and subklass in x13 ++ __ bind(quicked); ++ __ load_klass(x13, x10); ++ __ load_resolved_klass_at_offset(x12, x9, x10, t0); + -+ VMError::report_and_die(t, sig, pc, info, ucVoid); ++ __ bind(resolved); + -+ ShouldNotReachHere(); -+ return true; // Mute compiler -+} ++ // Generate subtype check. Blows x12, x15 ++ // Superklass in x10. Subklass in x13. ++ __ gen_subtype_check(x13, ok_is_subtype); + -+void os::Linux::init_thread_fpu_state(void) { -+} ++ // Come here on failure ++ __ mv(x10, zr); ++ __ j(done); ++ // Come here on success ++ __ bind(ok_is_subtype); ++ __ li(x10, 1); + -+int os::Linux::get_fpu_control_word(void) { -+ return 0; ++ // Collect counts on whether this test sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ j(done); ++ __ bind(is_null); ++ __ profile_null_seen(x12); ++ } else { ++ __ bind(is_null); // same as 'done' ++ } ++ __ bind(done); ++ // x10 = 0: obj == NULL or obj is not an instanceof the specified klass ++ // x10 = 1: obj != NULL and obj is an instanceof the specified klass +} + -+void os::Linux::set_fpu_control_word(int fpu_control) { -+} ++//----------------------------------------------------------------------------- ++// Breakpoints ++void TemplateTable::_breakpoint() { ++ // Note: We get here even if we are single stepping.. ++ // jbug inists on setting breakpoints at every bytecode ++ // even if we are in single step mode. + ++ transition(vtos, vtos); + -+//////////////////////////////////////////////////////////////////////////////// -+// thread stack ++ // get the unpatched byte code ++ __ get_method(c_rarg1); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::get_original_bytecode_at), ++ c_rarg1, xbcp); ++ __ mv(x9, x10); + -+// Minimum usable stack sizes required to get to user code. Space for -+// HotSpot guard pages is added later. -+size_t os::Posix::_compiler_thread_min_stack_allowed = 72 * K; -+size_t os::Posix::_java_thread_min_stack_allowed = 72 * K; -+size_t os::Posix::_vm_internal_thread_min_stack_allowed = 72 * K; ++ // post the breakpoint event ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), ++ xmethod, xbcp); + -+// return default stack size for thr_type -+size_t os::Posix::default_stack_size(os::ThreadType thr_type) { -+ // default stack size (compiler thread needs larger stack) -+ size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M); -+ return s; ++ // complete the execution of original bytecode ++ __ mv(t0, x9); ++ __ dispatch_only_normal(vtos); +} + -+///////////////////////////////////////////////////////////////////////////// -+// helper functions for fatal error handler ++//----------------------------------------------------------------------------- ++// Exceptions + -+static const char* reg_abi_names[] = { -+ "pc", -+ "x1(ra)", "x2(sp)", "x3(gp)", "x4(tp)", -+ "x5(t0)", "x6(t1)", "x7(t2)", -+ "x8(s0)", "x9(s1)", -+ "x10(a0)", "x11(a1)", "x12(a2)", "x13(a3)", "x14(a4)", "x15(a5)", "x16(a6)", "x17(a7)", -+ "x18(s2)", "x19(s3)", "x20(s4)", "x21(s5)", "x22(s6)", "x23(s7)", "x24(s8)", "x25(s9)", "x26(s10)", "x27(s11)", -+ "x28(t3)", "x29(t4)","x30(t5)", "x31(t6)" -+}; ++void TemplateTable::athrow() { ++ transition(atos, vtos); ++ __ null_check(x10); ++ __ j(Interpreter::throw_exception_entry()); ++} + -+void os::print_context(outputStream *st, const void *context) { -+ if (context == NULL) { -+ return; -+ } ++//----------------------------------------------------------------------------- ++// Synchronization ++// ++// Note: monitorenter & exit are symmetric routines; which is reflected ++// in the assembly code structure as well ++// ++// Stack layout: ++// ++// [expressions ] <--- esp = expression stack top ++// .. ++// [expressions ] ++// [monitor entry] <--- monitor block top = expression stack bot ++// .. ++// [monitor entry] ++// [frame data ] <--- monitor block bot ++// ... ++// [saved fp ] <--- fp ++void TemplateTable::monitorenter() ++{ ++ transition(atos, vtos); + -+ const ucontext_t *uc = (const ucontext_t*)context; -+ st->print_cr("Registers:"); -+ for (int r = 0; r < 32; r++) { -+ st->print("%-*.*s=", 8, 8, reg_abi_names[r]); -+ print_location(st, uc->uc_mcontext.__gregs[r]); -+ } -+ st->cr(); ++ // check for NULL object ++ __ null_check(x10); + -+ intptr_t *frame_sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); -+ st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(frame_sp)); -+ print_hex_dump(st, (address)frame_sp, (address)(frame_sp + 64), sizeof(intptr_t)); -+ st->cr(); ++ const Address monitor_block_top( ++ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ const Address monitor_block_bot( ++ fp, frame::interpreter_frame_initial_sp_offset * wordSize); ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + -+ // Note: it may be unsafe to inspect memory near pc. For example, pc may -+ // point to garbage if entry point in an nmethod is corrupted. Leave -+ // this at the end, and hope for the best. -+ address pc = os::Linux::ucontext_get_pc(uc); -+ print_instructions(st, pc, sizeof(char)); -+ st->cr(); -+} ++ Label allocated; + -+void os::print_register_info(outputStream *st, const void *context) { -+ if (context == NULL) { -+ return; -+ } ++ // initialize entry pointer ++ __ mv(c_rarg1, zr); // points to free slot or NULL + -+ const ucontext_t *uc = (const ucontext_t*)context; ++ // find a free slot in the monitor block (result in c_rarg1) ++ { ++ Label entry, loop, exit, notUsed; ++ __ ld(c_rarg3, monitor_block_top); // points to current entry, ++ // starting with top-most entry ++ __ la(c_rarg2, monitor_block_bot); // points to word before bottom + -+ st->print_cr("Register to memory mapping:"); -+ st->cr(); ++ __ j(entry); + -+ // this is horrendously verbose but the layout of the registers in the -+ // context does not match how we defined our abstract Register set, so -+ // we can't just iterate through the gregs area ++ __ bind(loop); ++ // check if current entry is used ++ // if not used then remember entry in c_rarg1 ++ __ ld(t0, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes())); ++ __ bnez(t0, notUsed); ++ __ mv(c_rarg1, c_rarg3); ++ __ bind(notUsed); ++ // check if current entry is for same object ++ // if same object then stop searching ++ __ beq(x10, t0, exit); ++ // otherwise advance to next entry ++ __ add(c_rarg3, c_rarg3, entry_size); ++ __ bind(entry); ++ // check if bottom reached ++ // if not at bottom then check this entry ++ __ bne(c_rarg3, c_rarg2, loop); ++ __ bind(exit); ++ } + -+ // this is only for the "general purpose" registers ++ __ bnez(c_rarg1, allocated); // check if a slot has been found and ++ // if found, continue with that on + -+ for (int r = 0; r < 32; r++) -+ st->print_cr("%-*.*s=" INTPTR_FORMAT, 8, 8, reg_abi_names[r], (uintptr_t)uc->uc_mcontext.__gregs[r]); -+ st->cr(); -+} ++ // allocate one if there's no free slot ++ { ++ Label entry, loop; ++ // 1. compute new pointers // esp: old expression stack top ++ __ ld(c_rarg1, monitor_block_bot); // c_rarg1: old expression stack bottom ++ __ sub(esp, esp, entry_size); // move expression stack top ++ __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom ++ __ mv(c_rarg3, esp); // set start value for copy loop ++ __ sd(c_rarg1, monitor_block_bot); // set new monitor block bottom ++ __ sub(sp, sp, entry_size); // make room for the monitor + -+void os::setup_fpu() { -+} ++ __ j(entry); ++ // 2. move expression stack contents ++ __ bind(loop); ++ __ ld(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack ++ // word from old location ++ __ sd(c_rarg2, Address(c_rarg3, 0)); // and store it at new location ++ __ add(c_rarg3, c_rarg3, wordSize); // advance to next word ++ __ bind(entry); ++ __ bne(c_rarg3, c_rarg1, loop); // check if bottom reached.if not at bottom ++ // then copy next word ++ } + -+#ifndef PRODUCT -+void os::verify_stack_alignment() { -+ assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); -+} -+#endif ++ // call run-time routine ++ // c_rarg1: points to monitor entry ++ __ bind(allocated); + -+int os::extra_bang_size_in_bytes() { -+ return 0; ++ // Increment bcp to point to the next bytecode, so exception ++ // handling for async. exceptions work correctly. ++ // The object has already been poped from the stack, so the ++ // expression stack looks correct. ++ __ addi(xbcp, xbcp, 1); ++ ++ // store object ++ __ sd(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); ++ __ lock_object(c_rarg1); ++ ++ // check to make sure this monitor doesn't cause stack overflow after locking ++ __ save_bcp(); // in case of exception ++ __ generate_stack_overflow_check(0); ++ ++ // The bcp has already been incremented. Just need to dispatch to ++ // next instruction. ++ __ dispatch_next(vtos); +} + -+extern "C" { -+ int SpinPause() { -+ return 0; -+ } ++void TemplateTable::monitorexit() ++{ ++ transition(atos, vtos); + -+ void _Copy_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { -+ if (from > to) { -+ const jshort *end = from + count; -+ while (from < end) { -+ *(to++) = *(from++); -+ } -+ } else if (from < to) { -+ const jshort *end = from; -+ from += count - 1; -+ to += count - 1; -+ while (from >= end) { -+ *(to--) = *(from--); -+ } -+ } -+ } -+ void _Copy_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { -+ if (from > to) { -+ const jint *end = from + count; -+ while (from < end) { -+ *(to++) = *(from++); -+ } -+ } else if (from < to) { -+ const jint *end = from; -+ from += count - 1; -+ to += count - 1; -+ while (from >= end) { -+ *(to--) = *(from--); -+ } -+ } -+ } -+ void _Copy_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { -+ if (from > to) { -+ const jlong *end = from + count; -+ while (from < end) { -+ os::atomic_copy64(from++, to++); -+ } -+ } else if (from < to) { -+ const jlong *end = from; -+ from += count - 1; -+ to += count - 1; -+ while (from >= end) { -+ os::atomic_copy64(from--, to--); -+ } -+ } -+ } -+ -+ void _Copy_arrayof_conjoint_bytes(const HeapWord* from, -+ HeapWord* to, -+ size_t count) { -+ memmove(to, from, count); -+ } -+ void _Copy_arrayof_conjoint_jshorts(const HeapWord* from, -+ HeapWord* to, -+ size_t count) { -+ memmove(to, from, count * 2); -+ } -+ void _Copy_arrayof_conjoint_jints(const HeapWord* from, -+ HeapWord* to, -+ size_t count) { -+ memmove(to, from, count * 4); -+ } -+ void _Copy_arrayof_conjoint_jlongs(const HeapWord* from, -+ HeapWord* to, -+ size_t count) { -+ memmove(to, from, count * 8); -+ } -+}; -diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp -new file mode 100644 -index 000000000..eae1635b0 ---- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp -@@ -0,0 +1,40 @@ -+/* -+ * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ // check for NULL object ++ __ null_check(x10); + -+#ifndef OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP -+#define OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP ++ const Address monitor_block_top( ++ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ const Address monitor_block_bot( ++ fp, frame::interpreter_frame_initial_sp_offset * wordSize); ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + -+ static void setup_fpu(); ++ Label found; + -+ // Used to register dynamic code cache area with the OS -+ // Note: Currently only used in 64 bit Windows implementations -+ static bool register_code_area(char *low, char *high) { return true; } ++ // find matching slot ++ { ++ Label entry, loop; ++ __ ld(c_rarg1, monitor_block_top); // points to current entry, ++ // starting with top-most entry ++ __ la(c_rarg2, monitor_block_bot); // points to word before bottom ++ // of monitor block ++ __ j(entry); + -+ // Atomically copy 64 bits of data -+ static void atomic_copy64(const volatile void *src, volatile void *dst) { -+ *(jlong *) dst = *(const jlong *) src; ++ __ bind(loop); ++ // check if current entry is for same object ++ __ ld(t0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); ++ // if same object then stop searching ++ __ beq(x10, t0, found); ++ // otherwise advance to next entry ++ __ add(c_rarg1, c_rarg1, entry_size); ++ __ bind(entry); ++ // check if bottom reached ++ // if not at bottom then check this entry ++ __ bne(c_rarg1, c_rarg2, loop); + } + -+#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp ++ // error handling. Unlocking was not block-structured ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ // call run-time routine ++ __ bind(found); ++ __ push_ptr(x10); // make sure object is on stack (contract with oopMaps) ++ __ unlock_object(c_rarg1); ++ __ pop_ptr(x10); // discard object ++} ++ ++// Wide instructions ++void TemplateTable::wide() ++{ ++ __ load_unsigned_byte(x9, at_bcp(1)); ++ __ mv(t0, (address)Interpreter::_wentry_point); ++ __ shadd(t0, x9, t0, t1, 3); ++ __ ld(t0, Address(t0)); ++ __ jr(t0); ++} ++ ++// Multi arrays ++void TemplateTable::multianewarray() { ++ transition(vtos, atos); ++ __ load_unsigned_byte(x10, at_bcp(3)); // get number of dimensions ++ // last dim is on top of stack; we want address of first one: ++ // first_addr = last_addr + (ndims - 1) * wordSize ++ __ shadd(c_rarg1, x10, esp, c_rarg1, 3); ++ __ sub(c_rarg1, c_rarg1, wordSize); ++ call_VM(x10, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), ++ c_rarg1); ++ __ load_unsigned_byte(x11, at_bcp(3)); ++ __ shadd(esp, x11, esp, t0, 3); ++} +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp new file mode 100644 -index 000000000..82b9bb6fd +index 00000000000..fcc86108d28 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp -@@ -0,0 +1,38 @@ ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp +@@ -0,0 +1,42 @@ +/* -+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -55710,27 +55715,30 @@ index 000000000..82b9bb6fd + * + */ + -+#ifndef OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP -+#define OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP -+ -+#include "runtime/prefetch.hpp" -+ ++#ifndef CPU_RISCV_TEMPLATETABLE_RISCV_HPP ++#define CPU_RISCV_TEMPLATETABLE_RISCV_HPP + -+inline void Prefetch::read (void *loc, intx interval) { -+} ++static void prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index = noreg, // itable index, MethodType, etc. ++ Register recv = noreg, // if caller wants to see it ++ Register flags = noreg // if caller wants to test it ++ ); ++static void invokevirtual_helper(Register index, Register recv, ++ Register flags); + -+inline void Prefetch::write(void *loc, intx interval) { -+} ++// Helpers ++static void index_check(Register array, Register index); + -+#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp ++#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp new file mode 100644 -index 000000000..c78096931 +index 00000000000..4f50adb05c3 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp -@@ -0,0 +1,103 @@ ++++ b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp +@@ -0,0 +1,33 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -55755,91 +55763,21 @@ index 000000000..c78096931 + */ + +#include "precompiled.hpp" -+#include "memory/metaspaceShared.hpp" -+#include "runtime/frame.inline.hpp" -+#include "runtime/thread.inline.hpp" -+ -+frame JavaThread::pd_last_frame() { -+ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); -+ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); -+} -+ -+// For Forte Analyzer AsyncGetCallTrace profiling support - thread is -+// currently interrupted by SIGPROF -+bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, -+ void* ucontext, bool isInJava) { -+ -+ assert(Thread::current() == this, "caller must be current thread"); -+ return pd_get_top_frame(fr_addr, ucontext, isInJava); -+} -+ -+bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { -+ return pd_get_top_frame(fr_addr, ucontext, isInJava); -+} -+ -+bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { -+ assert(this->is_Java_thread(), "must be JavaThread"); -+ JavaThread* jt = (JavaThread *)this; -+ -+ // If we have a last_Java_frame, then we should use it even if -+ // isInJava == true. It should be more reliable than ucontext info. -+ if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { -+ *fr_addr = jt->pd_last_frame(); -+ return true; -+ } -+ -+ // At this point, we don't have a last_Java_frame, so -+ // we try to glean some information out of the ucontext -+ // if we were running Java code when SIGPROF came in. -+ if (isInJava) { -+ ucontext_t* uc = (ucontext_t*) ucontext; -+ -+ intptr_t* ret_fp = NULL; -+ intptr_t* ret_sp = NULL; -+ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, -+ &ret_sp, &ret_fp); -+ if (addr.pc() == NULL || ret_sp == NULL ) { -+ // ucontext wasn't useful -+ return false; -+ } -+ -+ if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { -+ // In the middle of a trampoline call. Bail out for safety. -+ // This happens rarely so shouldn't affect profiling. -+ return false; -+ } -+ -+ frame ret_frame(ret_sp, ret_fp, addr.pc()); -+ if (!ret_frame.safe_for_sender(jt)) { -+#ifdef COMPILER2 -+ frame ret_frame2(ret_sp, NULL, addr.pc()); -+ if (!ret_frame2.safe_for_sender(jt)) { -+ // nothing else to try if the frame isn't good -+ return false; -+ } -+ ret_frame = ret_frame2; -+#else -+ // nothing else to try if the frame isn't good -+ return false; -+#endif /* COMPILER2 */ -+ } -+ *fr_addr = ret_frame; -+ return true; -+ } ++#include "prims/universalNativeInvoker.hpp" ++#include "utilities/debug.hpp" + -+ // nothing else to try -+ return false; ++address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) { ++ Unimplemented(); ++ return nullptr; +} -+ -+void JavaThread::cache_global_variables() { } -diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp +diff --git a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp new file mode 100644 -index 000000000..657b98984 +index 00000000000..ce70da72f2e --- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp -@@ -0,0 +1,67 @@ ++++ b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp +@@ -0,0 +1,42 @@ +/* -+ * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -55863,56 +55801,31 @@ index 000000000..657b98984 + * + */ + -+#ifndef OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP -+#define OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP -+ -+ private: -+ void pd_initialize() { -+ _anchor.clear(); -+ } -+ -+ frame pd_last_frame(); -+ -+ public: -+ // Mutators are highly dangerous.... -+ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } -+ void set_last_Java_fp(intptr_t* java_fp) { _anchor.set_last_Java_fp(java_fp); } -+ -+ void set_base_of_stack_pointer(intptr_t* base_sp) { -+ } -+ -+ static ByteSize last_Java_fp_offset() { -+ return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); -+ } -+ -+ intptr_t* base_of_stack_pointer() { -+ return NULL; -+ } -+ void record_base_of_stack_pointer() { -+ } -+ -+ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, -+ bool isInJava); ++#include "precompiled.hpp" ++#include "prims/universalUpcallHandler.hpp" ++#include "utilities/debug.hpp" + -+ bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); -+private: -+ bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); ++address ProgrammableUpcallHandler::generate_upcall_stub(jobject jrec, jobject jabi, jobject jlayout) { ++ Unimplemented(); ++ return nullptr; ++} + -+ // These routines are only used on cpu architectures that -+ // have separate register stacks (Itanium). -+ static bool register_stack_overflow() { return false; } -+ static void enable_register_stack_guard() {} -+ static void disable_register_stack_guard() {} ++address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) { ++ ShouldNotCallThis(); ++ return nullptr; ++} + -+#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp ++bool ProgrammableUpcallHandler::supports_optimized_upcalls() { ++ return false; ++} +diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp new file mode 100644 -index 000000000..8ee443b5d +index 00000000000..6c89133de02 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp -@@ -0,0 +1,55 @@ ++++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp +@@ -0,0 +1,42 @@ +/* -+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -55936,45 +55849,32 @@ index 000000000..8ee443b5d + * + */ + -+#ifndef OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP -+#define OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP ++#ifndef CPU_RISCV_VMSTRUCTS_RISCV_HPP ++#define CPU_RISCV_VMSTRUCTS_RISCV_HPP + -+// These are the OS and CPU-specific fields, types and integer ++// These are the CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + -+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ -+ \ -+ /******************************/ \ -+ /* Threads (NOTE: incomplete) */ \ -+ /******************************/ \ -+ nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ -+ nonstatic_field(OSThread, _pthread_id, pthread_t) -+ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) + -+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ -+ \ -+ /**********************/ \ -+ /* Posix Thread IDs */ \ -+ /**********************/ \ -+ \ -+ declare_integer_type(OSThread::thread_id_t) \ -+ declare_unsigned_integer_type(pthread_t) ++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) + -+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + -+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + -+#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp ++#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp new file mode 100644 -index 000000000..ef9358aa0 +index 00000000000..768c7633ca6 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp -@@ -0,0 +1,116 @@ ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +@@ -0,0 +1,230 @@ +/* -+ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -55998,2163 +55898,219 @@ index 000000000..ef9358aa0 + */ + +#include "precompiled.hpp" -+#include "asm/register.hpp" ++#include "runtime/java.hpp" +#include "runtime/os.hpp" -+#include "runtime/os.inline.hpp" +#include "runtime/vm_version.hpp" ++#include "utilities/formatBuffer.hpp" ++#include "utilities/macros.hpp" + -+#include -+#include ++#include OS_HEADER_INLINE(os) + -+#ifndef HWCAP_ISA_I -+#define HWCAP_ISA_I (1 << ('I' - 'A')) -+#endif ++const char* VM_Version::_uarch = ""; ++uint32_t VM_Version::_initial_vector_length = 0; + -+#ifndef HWCAP_ISA_M -+#define HWCAP_ISA_M (1 << ('M' - 'A')) -+#endif ++void VM_Version::initialize() { ++ get_os_cpu_info(); + -+#ifndef HWCAP_ISA_A -+#define HWCAP_ISA_A (1 << ('A' - 'A')) -+#endif ++ if (FLAG_IS_DEFAULT(UseFMA)) { ++ FLAG_SET_DEFAULT(UseFMA, true); ++ } + -+#ifndef HWCAP_ISA_F -+#define HWCAP_ISA_F (1 << ('F' - 'A')) -+#endif ++ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0); ++ } + -+#ifndef HWCAP_ISA_D -+#define HWCAP_ISA_D (1 << ('D' - 'A')) -+#endif ++ if (UseAES || UseAESIntrinsics) { ++ if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { ++ warning("AES instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAES, false); ++ } ++ if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { ++ warning("AES intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESIntrinsics, false); ++ } ++ } + -+#ifndef HWCAP_ISA_C -+#define HWCAP_ISA_C (1 << ('C' - 'A')) -+#endif ++ if (UseAESCTRIntrinsics) { ++ warning("AES/CTR intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); ++ } + -+#ifndef HWCAP_ISA_V -+#define HWCAP_ISA_V (1 << ('V' - 'A')) -+#endif ++ if (UseSHA) { ++ warning("SHA instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA, false); ++ } + -+#define read_csr(csr) \ -+({ \ -+ register unsigned long __v; \ -+ __asm__ __volatile__ ("csrr %0, %1" \ -+ : "=r" (__v) \ -+ : "i" (csr) \ -+ : "memory"); \ -+ __v; \ -+}) ++ if (UseSHA1Intrinsics) { ++ warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); ++ } + -+uint32_t VM_Version::get_current_vector_length() { -+ assert(_features & CPU_V, "should not call this"); -+ return (uint32_t)read_csr(CSR_VLENB); -+} ++ if (UseSHA256Intrinsics) { ++ warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); ++ } + -+void VM_Version::get_os_cpu_info() { ++ if (UseSHA512Intrinsics) { ++ warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); ++ } + -+ uint64_t auxv = getauxval(AT_HWCAP); ++ if (UseSHA3Intrinsics) { ++ warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); ++ } + -+ STATIC_ASSERT(CPU_I == HWCAP_ISA_I); -+ STATIC_ASSERT(CPU_M == HWCAP_ISA_M); -+ STATIC_ASSERT(CPU_A == HWCAP_ISA_A); -+ STATIC_ASSERT(CPU_F == HWCAP_ISA_F); -+ STATIC_ASSERT(CPU_D == HWCAP_ISA_D); -+ STATIC_ASSERT(CPU_C == HWCAP_ISA_C); -+ STATIC_ASSERT(CPU_V == HWCAP_ISA_V); ++ if (UseCRC32Intrinsics) { ++ warning("CRC32 intrinsics are not available on this CPU."); ++ FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); ++ } + -+ if (FILE *f = fopen("/proc/cpuinfo", "r")) { -+ char buf[512], *p; -+ while (fgets(buf, sizeof (buf), f) != NULL) { -+ if ((p = strchr(buf, ':')) != NULL) { -+ if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) { -+ char* uarch = os::strdup(p + 2); -+ uarch[strcspn(uarch, "\n")] = '\0'; -+ _uarch = uarch; -+ break; -+ } -+ } ++ if (UseCRC32CIntrinsics) { ++ warning("CRC32C intrinsics are not available on this CPU."); ++ FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); ++ } ++ ++ if (UseMD5Intrinsics) { ++ warning("MD5 intrinsics are not available on this CPU."); ++ FLAG_SET_DEFAULT(UseMD5Intrinsics, false); ++ } ++ ++ if (UseRVV) { ++ if (!(_features & CPU_V)) { ++ warning("RVV is not supported on this CPU"); ++ FLAG_SET_DEFAULT(UseRVV, false); ++ } else { ++ // read vector length from vector CSR vlenb ++ _initial_vector_length = get_current_vector_length(); + } -+ fclose(f); + } + -+ // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs. -+ // Availability for those extensions could not be queried from HWCAP. -+ // TODO: Add proper detection for those extensions. -+ _features = auxv & ( -+ HWCAP_ISA_I | -+ HWCAP_ISA_M | -+ HWCAP_ISA_A | -+ HWCAP_ISA_F | -+ HWCAP_ISA_D | -+ HWCAP_ISA_C | -+ HWCAP_ISA_V); -+} -diff --git a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp -index ba61aa4c0..4ca0b050b 100644 ---- a/src/hotspot/share/adlc/archDesc.cpp -+++ b/src/hotspot/share/adlc/archDesc.cpp -@@ -929,6 +929,7 @@ const char *ArchDesc::getIdealType(const char *idealOp) { - // Match Vector types. - if (strncmp(idealOp, "Vec",3)==0) { - switch(last_char) { -+ case 'A': return "TypeVect::VECTA"; - case 'S': return "TypeVect::VECTS"; - case 'D': return "TypeVect::VECTD"; - case 'X': return "TypeVect::VECTX"; -@@ -939,6 +940,10 @@ const char *ArchDesc::getIdealType(const char *idealOp) { - } - } - -+ if (strncmp(idealOp, "RegVMask", 8) == 0) { -+ return "Type::BOTTOM"; -+ } -+ - // !!!!! - switch(last_char) { - case 'I': return "TypeInt::INT"; -diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp -index f810fde76..2cf9636d1 100644 ---- a/src/hotspot/share/adlc/formssel.cpp -+++ b/src/hotspot/share/adlc/formssel.cpp -@@ -3968,6 +3968,8 @@ bool MatchRule::is_base_register(FormDict &globals) const { - strcmp(opType,"RegL")==0 || - strcmp(opType,"RegF")==0 || - strcmp(opType,"RegD")==0 || -+ strcmp(opType,"RegVMask")==0 || -+ strcmp(opType,"VecA")==0 || - strcmp(opType,"VecS")==0 || - strcmp(opType,"VecD")==0 || - strcmp(opType,"VecX")==0 || -diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp -index e30d39f73..af54dddf3 100644 ---- a/src/hotspot/share/c1/c1_LIR.cpp -+++ b/src/hotspot/share/c1/c1_LIR.cpp -@@ -199,7 +199,6 @@ bool LIR_OprDesc::is_oop() const { - void LIR_Op2::verify() const { - #ifdef ASSERT - switch (code()) { -- case lir_cmove: - case lir_xchg: - break; - -@@ -252,30 +251,27 @@ void LIR_Op2::verify() const { - - - LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block) -- : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) -- , _cond(cond) -- , _type(type) -+ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) - , _label(block->label()) -+ , _type(type) - , _block(block) - , _ublock(NULL) - , _stub(NULL) { - } - - LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) : -- LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) -- , _cond(cond) -- , _type(type) -+ LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) - , _label(stub->entry()) -+ , _type(type) - , _block(NULL) - , _ublock(NULL) - , _stub(stub) { - } - - LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock) -- : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) -- , _cond(cond) -- , _type(type) -+ : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) - , _label(block->label()) -+ , _type(type) - , _block(block) - , _ublock(ublock) - , _stub(NULL) -@@ -296,13 +292,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) { - } - - void LIR_OpBranch::negate_cond() { -- switch (_cond) { -- case lir_cond_equal: _cond = lir_cond_notEqual; break; -- case lir_cond_notEqual: _cond = lir_cond_equal; break; -- case lir_cond_less: _cond = lir_cond_greaterEqual; break; -- case lir_cond_lessEqual: _cond = lir_cond_greater; break; -- case lir_cond_greaterEqual: _cond = lir_cond_less; break; -- case lir_cond_greater: _cond = lir_cond_lessEqual; break; -+ switch (cond()) { -+ case lir_cond_equal: set_cond(lir_cond_notEqual); break; -+ case lir_cond_notEqual: set_cond(lir_cond_equal); break; -+ case lir_cond_less: set_cond(lir_cond_greaterEqual); break; -+ case lir_cond_lessEqual: set_cond(lir_cond_greater); break; -+ case lir_cond_greaterEqual: set_cond(lir_cond_less); break; -+ case lir_cond_greater: set_cond(lir_cond_lessEqual); break; - default: ShouldNotReachHere(); - } - } -@@ -525,6 +521,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) { - assert(op->as_OpBranch() != NULL, "must be"); - LIR_OpBranch* opBranch = (LIR_OpBranch*)op; - -+ assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() && -+ opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() && -+ opBranch->_tmp5->is_illegal(), "not used"); ++ if (UseRVB && !(_features & CPU_B)) { ++ warning("RVB is not supported on this CPU"); ++ FLAG_SET_DEFAULT(UseRVB, false); ++ } + -+ if (opBranch->_opr1->is_valid()) do_input(opBranch->_opr1); -+ if (opBranch->_opr2->is_valid()) do_input(opBranch->_opr2); ++ if (UseRVC && !(_features & CPU_C)) { ++ warning("RVC is not supported on this CPU"); ++ FLAG_SET_DEFAULT(UseRVC, false); ++ } + - if (opBranch->_info != NULL) do_info(opBranch->_info); - assert(opBranch->_result->is_illegal(), "not used"); - if (opBranch->_stub != NULL) opBranch->stub()->visit(this); -@@ -615,17 +618,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) { - // to the result operand, otherwise the backend fails - case lir_cmove: - { -- assert(op->as_Op2() != NULL, "must be"); -- LIR_Op2* op2 = (LIR_Op2*)op; -+ assert(op->as_Op4() != NULL, "must be"); -+ LIR_Op4* op4 = (LIR_Op4*)op; - -- assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() && -- op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used"); -- assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used"); -+ assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() && -+ op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "must be"); -+ assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used"); - -- do_input(op2->_opr1); -- do_input(op2->_opr2); -- do_temp(op2->_opr2); -- do_output(op2->_result); -+ do_input(op4->_opr1); -+ do_input(op4->_opr2); -+ if (op4->_opr3->is_valid()) do_input(op4->_opr3); -+ if (op4->_opr4->is_valid()) do_input(op4->_opr4); -+ do_temp(op4->_opr2); -+ do_output(op4->_result); - - break; - } -@@ -1048,6 +1053,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) { - masm->emit_op3(this); - } - -+void LIR_Op4::emit_code(LIR_Assembler* masm) { -+ masm->emit_op4(this); -+} ++ if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) { ++ FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true); ++ } + - void LIR_OpLock::emit_code(LIR_Assembler* masm) { - masm->emit_lock(this); - if (stub()) { -@@ -1084,6 +1093,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block) - , _file(NULL) - , _line(0) - #endif -+#ifdef RISCV -+ , _cmp_opr1(LIR_OprFact::illegalOpr) -+ , _cmp_opr2(LIR_OprFact::illegalOpr) -+#endif - { } - - -@@ -1101,6 +1114,38 @@ void LIR_List::set_file_and_line(const char * file, int line) { - } - #endif - -+#ifdef RISCV -+void LIR_List::set_cmp_oprs(LIR_Op* op) { -+ switch (op->code()) { -+ case lir_cmp: -+ _cmp_opr1 = op->as_Op2()->in_opr1(); -+ _cmp_opr2 = op->as_Op2()->in_opr2(); -+ break; -+ case lir_branch: // fall through -+ case lir_cond_float_branch: -+ assert(op->as_OpBranch()->cond() == lir_cond_always || -+ (_cmp_opr1 != LIR_OprFact::illegalOpr && _cmp_opr2 != LIR_OprFact::illegalOpr), -+ "conditional branches must have legal operands"); -+ if (op->as_OpBranch()->cond() != lir_cond_always) { -+ op->as_Op2()->set_in_opr1(_cmp_opr1); -+ op->as_Op2()->set_in_opr2(_cmp_opr2); -+ } -+ break; -+ case lir_cmove: -+ op->as_Op4()->set_in_opr3(_cmp_opr1); -+ op->as_Op4()->set_in_opr4(_cmp_opr2); -+ break; -+#if INCLUDE_ZGC -+ case lir_zloadbarrier_test: -+ _cmp_opr1 = FrameMap::as_opr(t1); -+ _cmp_opr2 = LIR_OprFact::intConst(0); -+ break; -+#endif -+ default: -+ break; ++ if (UseRVB) { ++ if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { ++ FLAG_SET_DEFAULT(UsePopCountInstruction, true); ++ } ++ } else { ++ FLAG_SET_DEFAULT(UsePopCountInstruction, false); + } -+} -+#endif - - void LIR_List::append(LIR_InsertionBuffer* buffer) { - assert(this == buffer->lir_list(), "wrong lir list"); -@@ -1680,7 +1725,6 @@ const char * LIR_Op::name() const { - case lir_cmp_l2i: s = "cmp_l2i"; break; - case lir_ucmp_fd2i: s = "ucomp_fd2i"; break; - case lir_cmp_fd2i: s = "comp_fd2i"; break; -- case lir_cmove: s = "cmove"; break; - case lir_add: s = "add"; break; - case lir_sub: s = "sub"; break; - case lir_mul: s = "mul"; break; -@@ -1705,6 +1749,8 @@ const char * LIR_Op::name() const { - case lir_irem: s = "irem"; break; - case lir_fmad: s = "fmad"; break; - case lir_fmaf: s = "fmaf"; break; -+ // LIR_Op4 -+ case lir_cmove: s = "cmove"; break; - // LIR_OpJavaCall - case lir_static_call: s = "static"; break; - case lir_optvirtual_call: s = "optvirtual"; break; -@@ -1841,6 +1887,8 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) { - // LIR_OpBranch - void LIR_OpBranch::print_instr(outputStream* out) const { - print_condition(out, cond()); out->print(" "); -+ in_opr1()->print(out); out->print(" "); -+ in_opr2()->print(out); out->print(" "); - if (block() != NULL) { - out->print("[B%d] ", block()->block_id()); - } else if (stub() != NULL) { -@@ -1927,7 +1975,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const { - - // LIR_Op2 - void LIR_Op2::print_instr(outputStream* out) const { -- if (code() == lir_cmove || code() == lir_cmp) { -+ if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) { - print_condition(out, condition()); out->print(" "); - } - in_opr1()->print(out); out->print(" "); -@@ -1978,6 +2026,15 @@ void LIR_Op3::print_instr(outputStream* out) const { - result_opr()->print(out); - } - -+// LIR_Op4 -+void LIR_Op4::print_instr(outputStream* out) const { -+ print_condition(out, condition()); out->print(" "); -+ in_opr1()->print(out); out->print(" "); -+ in_opr2()->print(out); out->print(" "); -+ in_opr3()->print(out); out->print(" "); -+ in_opr4()->print(out); out->print(" "); -+ result_opr()->print(out); -+} - - void LIR_OpLock::print_instr(outputStream* out) const { - hdr_opr()->print(out); out->print(" "); -diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp -index 3234ca018..88cd3b24e 100644 ---- a/src/hotspot/share/c1/c1_LIR.hpp -+++ b/src/hotspot/share/c1/c1_LIR.hpp -@@ -864,9 +864,11 @@ class LIR_OpConvert; - class LIR_OpAllocObj; - class LIR_OpRoundFP; - class LIR_Op2; --class LIR_OpDelay; -+class LIR_OpBranch; -+class LIR_OpDelay; - class LIR_Op3; - class LIR_OpAllocArray; -+class LIR_Op4; - class LIR_OpCall; - class LIR_OpJavaCall; - class LIR_OpRTCall; -@@ -916,8 +918,6 @@ enum LIR_Code { - , lir_null_check - , lir_return - , lir_leal -- , lir_branch -- , lir_cond_float_branch - , lir_move - , lir_convert - , lir_alloc_object -@@ -929,11 +929,12 @@ enum LIR_Code { - , lir_unwind - , end_op1 - , begin_op2 -+ , lir_branch -+ , lir_cond_float_branch - , lir_cmp - , lir_cmp_l2i - , lir_ucmp_fd2i - , lir_cmp_fd2i -- , lir_cmove - , lir_add - , lir_sub - , lir_mul -@@ -964,6 +965,9 @@ enum LIR_Code { - , lir_fmad - , lir_fmaf - , end_op3 -+ , begin_op4 -+ , lir_cmove -+ , end_op4 - , begin_opJavaCall - , lir_static_call - , lir_optvirtual_call -@@ -1134,6 +1138,7 @@ class LIR_Op: public CompilationResourceObj { - virtual LIR_Op1* as_Op1() { return NULL; } - virtual LIR_Op2* as_Op2() { return NULL; } - virtual LIR_Op3* as_Op3() { return NULL; } -+ virtual LIR_Op4* as_Op4() { return NULL; } - virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; } - virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; } - virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; } -@@ -1410,51 +1415,6 @@ class LIR_OpRTCall: public LIR_OpCall { - virtual void verify() const; - }; - -- --class LIR_OpBranch: public LIR_Op { -- friend class LIR_OpVisitState; -- -- private: -- LIR_Condition _cond; -- BasicType _type; -- Label* _label; -- BlockBegin* _block; // if this is a branch to a block, this is the block -- BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block -- CodeStub* _stub; // if this is a branch to a stub, this is the stub -- -- public: -- LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl) -- : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL) -- , _cond(cond) -- , _type(type) -- , _label(lbl) -- , _block(NULL) -- , _ublock(NULL) -- , _stub(NULL) { } -- -- LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block); -- LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub); -- -- // for unordered comparisons -- LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock); -- -- LIR_Condition cond() const { return _cond; } -- BasicType type() const { return _type; } -- Label* label() const { return _label; } -- BlockBegin* block() const { return _block; } -- BlockBegin* ublock() const { return _ublock; } -- CodeStub* stub() const { return _stub; } -- -- void change_block(BlockBegin* b); -- void change_ublock(BlockBegin* b); -- void negate_cond(); -- -- virtual void emit_code(LIR_Assembler* masm); -- virtual LIR_OpBranch* as_OpBranch() { return this; } -- virtual void print_instr(outputStream* out) const PRODUCT_RETURN; --}; -- -- - class ConversionStub; - - class LIR_OpConvert: public LIR_Op1 { -@@ -1614,19 +1574,19 @@ class LIR_Op2: public LIR_Op { - void verify() const; - - public: -- LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL) -+ LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL, BasicType type = T_ILLEGAL) - : LIR_Op(code, LIR_OprFact::illegalOpr, info) - , _opr1(opr1) - , _opr2(opr2) -- , _type(T_ILLEGAL) -- , _condition(condition) -+ , _type(type) - , _fpu_stack_size(0) - , _tmp1(LIR_OprFact::illegalOpr) - , _tmp2(LIR_OprFact::illegalOpr) - , _tmp3(LIR_OprFact::illegalOpr) - , _tmp4(LIR_OprFact::illegalOpr) -- , _tmp5(LIR_OprFact::illegalOpr) { -- assert(code == lir_cmp || code == lir_assert, "code check"); -+ , _tmp5(LIR_OprFact::illegalOpr) -+ , _condition(condition) { -+ assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check"); - } - - LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) -@@ -1634,7 +1594,6 @@ class LIR_Op2: public LIR_Op { - , _opr1(opr1) - , _opr2(opr2) - , _type(type) -- , _condition(condition) - , _fpu_stack_size(0) - , _tmp1(LIR_OprFact::illegalOpr) - , _tmp2(LIR_OprFact::illegalOpr) -@@ -1651,14 +1610,14 @@ class LIR_Op2: public LIR_Op { - , _opr1(opr1) - , _opr2(opr2) - , _type(type) -- , _condition(lir_cond_unknown) - , _fpu_stack_size(0) - , _tmp1(LIR_OprFact::illegalOpr) - , _tmp2(LIR_OprFact::illegalOpr) - , _tmp3(LIR_OprFact::illegalOpr) - , _tmp4(LIR_OprFact::illegalOpr) -- , _tmp5(LIR_OprFact::illegalOpr) { -- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); -+ , _tmp5(LIR_OprFact::illegalOpr) -+ , _condition(lir_cond_unknown) { -+ assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); - } - - LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, -@@ -1667,14 +1626,14 @@ class LIR_Op2: public LIR_Op { - , _opr1(opr1) - , _opr2(opr2) - , _type(T_ILLEGAL) -- , _condition(lir_cond_unknown) - , _fpu_stack_size(0) - , _tmp1(tmp1) - , _tmp2(tmp2) - , _tmp3(tmp3) - , _tmp4(tmp4) -- , _tmp5(tmp5) { -- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); -+ , _tmp5(tmp5) -+ , _condition(lir_cond_unknown) { -+ assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); - } - - LIR_Opr in_opr1() const { return _opr1; } -@@ -1686,10 +1645,10 @@ class LIR_Op2: public LIR_Op { - LIR_Opr tmp4_opr() const { return _tmp4; } - LIR_Opr tmp5_opr() const { return _tmp5; } - LIR_Condition condition() const { -- assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition; -+ assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition; - } - void set_condition(LIR_Condition condition) { -- assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition; -+ assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition; - } - - void set_fpu_stack_size(int size) { _fpu_stack_size = size; } -@@ -1703,6 +1662,53 @@ class LIR_Op2: public LIR_Op { - virtual void print_instr(outputStream* out) const PRODUCT_RETURN; - }; - -+class LIR_OpBranch: public LIR_Op2 { -+ friend class LIR_OpVisitState; + -+ private: -+ BasicType _type; -+ Label* _label; -+ BlockBegin* _block; // if this is a branch to a block, this is the block -+ BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block -+ CodeStub* _stub; // if this is a branch to a stub, this is the stub ++ char buf[512]; ++ buf[0] = '\0'; ++ if (_uarch != NULL && strcmp(_uarch, "") != 0) snprintf(buf, sizeof(buf), "%s,", _uarch); ++ strcat(buf, "rv64"); ++#define ADD_FEATURE_IF_SUPPORTED(id, name, bit) if (_features & CPU_##id) strcat(buf, name); ++ CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED) ++#undef ADD_FEATURE_IF_SUPPORTED + -+ public: -+ LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl) -+ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL) -+ , _label(lbl) -+ , _type(type) -+ , _block(NULL) -+ , _ublock(NULL) -+ , _stub(NULL) { } ++ _features_string = os::strdup(buf); + -+ LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block); -+ LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub); ++#ifdef COMPILER2 ++ c2_initialize(); ++#endif // COMPILER2 ++} + -+ // for unordered comparisons -+ LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock); ++#ifdef COMPILER2 ++void VM_Version::c2_initialize() { ++ if (UseCMoveUnconditionally) { ++ FLAG_SET_DEFAULT(UseCMoveUnconditionally, false); ++ } + -+ LIR_Condition cond() const { -+ return condition(); ++ if (ConditionalMoveLimit > 0) { ++ FLAG_SET_DEFAULT(ConditionalMoveLimit, 0); + } + -+ void set_cond(LIR_Condition cond) { -+ set_condition(cond); ++ if (!UseRVV) { ++ FLAG_SET_DEFAULT(SpecialEncodeISOArray, false); + } + -+ Label* label() const { return _label; } -+ BlockBegin* block() const { return _block; } -+ BlockBegin* ublock() const { return _ublock; } -+ CodeStub* stub() const { return _stub; } ++ if (!UseRVV && MaxVectorSize) { ++ FLAG_SET_DEFAULT(MaxVectorSize, 0); ++ } + -+ void change_block(BlockBegin* b); -+ void change_ublock(BlockBegin* b); -+ void negate_cond(); ++ if (!UseRVV) { ++ FLAG_SET_DEFAULT(UseRVVForBigIntegerShiftIntrinsics, false); ++ } + -+ virtual void emit_code(LIR_Assembler* masm); -+ virtual LIR_OpBranch* as_OpBranch() { return this; } -+ virtual void print_instr(outputStream* out) const PRODUCT_RETURN; -+}; ++ if (UseRVV) { ++ if (FLAG_IS_DEFAULT(MaxVectorSize)) { ++ MaxVectorSize = _initial_vector_length; ++ } else if (MaxVectorSize < 16) { ++ warning("RVV does not support vector length less than 16 bytes. Disabling RVV."); ++ UseRVV = false; ++ } else if (is_power_of_2(MaxVectorSize)) { ++ if (MaxVectorSize > _initial_vector_length) { ++ warning("Current system only supports max RVV vector length %d. Set MaxVectorSize to %d", ++ _initial_vector_length, _initial_vector_length); ++ } ++ MaxVectorSize = _initial_vector_length; ++ } else { ++ vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize)); ++ } ++ } + - class LIR_OpAllocArray : public LIR_Op { - friend class LIR_OpVisitState; - -@@ -1766,6 +1772,63 @@ class LIR_Op3: public LIR_Op { - virtual void print_instr(outputStream* out) const PRODUCT_RETURN; - }; - -+class LIR_Op4: public LIR_Op { -+ friend class LIR_OpVisitState; -+ protected: -+ LIR_Opr _opr1; -+ LIR_Opr _opr2; -+ LIR_Opr _opr3; -+ LIR_Opr _opr4; -+ BasicType _type; -+ LIR_Opr _tmp1; -+ LIR_Opr _tmp2; -+ LIR_Opr _tmp3; -+ LIR_Opr _tmp4; -+ LIR_Opr _tmp5; -+ LIR_Condition _condition; ++ // disable prefetch ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); ++ } + -+ public: -+ LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4, -+ LIR_Opr result, BasicType type) -+ : LIR_Op(code, result, NULL) -+ , _opr1(opr1) -+ , _opr2(opr2) -+ , _opr3(opr3) -+ , _opr4(opr4) -+ , _type(type) -+ , _condition(condition) -+ , _tmp1(LIR_OprFact::illegalOpr) -+ , _tmp2(LIR_OprFact::illegalOpr) -+ , _tmp3(LIR_OprFact::illegalOpr) -+ , _tmp4(LIR_OprFact::illegalOpr) -+ , _tmp5(LIR_OprFact::illegalOpr) { -+ assert(code == lir_cmove, "code check"); -+ assert(type != T_ILLEGAL, "cmove should have type"); ++ if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMulAddIntrinsic, true); + } + -+ LIR_Opr in_opr1() const { return _opr1; } -+ LIR_Opr in_opr2() const { return _opr2; } -+ LIR_Opr in_opr3() const { return _opr3; } -+ LIR_Opr in_opr4() const { return _opr4; } -+ BasicType type() const { return _type; } -+ LIR_Opr tmp1_opr() const { return _tmp1; } -+ LIR_Opr tmp2_opr() const { return _tmp2; } -+ LIR_Opr tmp3_opr() const { return _tmp3; } -+ LIR_Opr tmp4_opr() const { return _tmp4; } -+ LIR_Opr tmp5_opr() const { return _tmp5; } ++ if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true); ++ } + -+ LIR_Condition condition() const { return _condition; } -+ void set_condition(LIR_Condition condition) { _condition = condition; } ++ if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { ++ FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true); ++ } + -+ void set_in_opr1(LIR_Opr opr) { _opr1 = opr; } -+ void set_in_opr2(LIR_Opr opr) { _opr2 = opr; } -+ void set_in_opr3(LIR_Opr opr) { _opr3 = opr; } -+ void set_in_opr4(LIR_Opr opr) { _opr4 = opr; } -+ virtual void emit_code(LIR_Assembler* masm); -+ virtual LIR_Op4* as_Op4() { return this; } ++ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true); ++ } + -+ virtual void print_instr(outputStream* out) const PRODUCT_RETURN; -+}; - - //-------------------------------- - class LabelObj: public CompilationResourceObj { -@@ -1988,6 +2051,10 @@ class LIR_List: public CompilationResourceObj { - const char * _file; - int _line; - #endif -+#ifdef RISCV -+ LIR_Opr _cmp_opr1; -+ LIR_Opr _cmp_opr2; -+#endif - - public: - void append(LIR_Op* op) { -@@ -2000,6 +2067,12 @@ class LIR_List: public CompilationResourceObj { - } - #endif // PRODUCT - -+#ifdef RISCV -+ set_cmp_oprs(op); -+ // lir_cmp set cmp oprs only on riscv -+ if (op->code() == lir_cmp) return; -+#endif -+ - _operations.append(op); - - #ifdef ASSERT -@@ -2016,6 +2089,10 @@ class LIR_List: public CompilationResourceObj { - void set_file_and_line(const char * file, int line); - #endif - -+#ifdef RISCV -+ void set_cmp_oprs(LIR_Op* op); -+#endif -+ - //---------- accessors --------------- - LIR_OpList* instructions_list() { return &_operations; } - int length() const { return _operations.length(); } -@@ -2149,8 +2226,9 @@ class LIR_List: public CompilationResourceObj { - void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); - void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info); - -- void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { -- append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type)); -+ void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type, -+ LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) { -+ append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type)); - } - - void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, -diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp -index 160483d5f..42a0350f7 100644 ---- a/src/hotspot/share/c1/c1_LIRAssembler.cpp -+++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp -@@ -709,10 +709,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { - comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op); - break; - -- case lir_cmove: -- cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type()); -- break; -- - case lir_shl: - case lir_shr: - case lir_ushr: -@@ -776,6 +772,17 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { - } - } - -+void LIR_Assembler::emit_op4(LIR_Op4* op) { -+ switch(op->code()) { -+ case lir_cmove: -+ cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type(), op->in_opr3(), op->in_opr4()); -+ break; -+ -+ default: -+ Unimplemented(); -+ break; -+ } -+} - - void LIR_Assembler::build_frame() { - _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); -diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp -index 44a5bcbe5..406a58d21 100644 ---- a/src/hotspot/share/c1/c1_LIRAssembler.hpp -+++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp -@@ -190,6 +190,7 @@ class LIR_Assembler: public CompilationResourceObj { - void emit_op1(LIR_Op1* op); - void emit_op2(LIR_Op2* op); - void emit_op3(LIR_Op3* op); -+ void emit_op4(LIR_Op4* op); - void emit_opBranch(LIR_OpBranch* op); - void emit_opLabel(LIR_OpLabel* op); - void emit_arraycopy(LIR_OpArrayCopy* op); -@@ -222,7 +223,8 @@ class LIR_Assembler: public CompilationResourceObj { - void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); - void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions - void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op); -- void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type); -+ void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type, -+ LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr); - - void call( LIR_OpJavaCall* op, relocInfo::relocType rtype); - void ic_call( LIR_OpJavaCall* op); -diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp -index c28055fd9..d00bfe91a 100644 ---- a/src/hotspot/share/c1/c1_LinearScan.cpp -+++ b/src/hotspot/share/c1/c1_LinearScan.cpp -@@ -1242,8 +1242,8 @@ void LinearScan::add_register_hints(LIR_Op* op) { - break; - } - case lir_cmove: { -- assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2"); -- LIR_Op2* cmove = (LIR_Op2*)op; -+ assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4"); -+ LIR_Op4* cmove = (LIR_Op4*)op; - - LIR_Opr move_from = cmove->in_opr1(); - LIR_Opr move_to = cmove->result_opr(); -@@ -3140,6 +3140,9 @@ void LinearScan::do_linear_scan() { - } - } - -+#ifndef RISCV -+ // Disable these optimizations on riscv temporarily, because it does not -+ // work when the comparison operands are bound to branches or cmoves. - { TIME_LINEAR_SCAN(timer_optimize_lir); - - EdgeMoveOptimizer::optimize(ir()->code()); -@@ -3147,6 +3150,7 @@ void LinearScan::do_linear_scan() { - // check that cfg is still correct after optimizations - ir()->verify(); - } -+#endif - - NOT_PRODUCT(print_lir(1, "Before Code Generation", false)); - NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final)); -@@ -6284,14 +6288,14 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { - // There might be a cmove inserted for profiling which depends on the same - // compare. If we change the condition of the respective compare, we have - // to take care of this cmove as well. -- LIR_Op2* prev_cmove = NULL; -+ LIR_Op4* prev_cmove = NULL; - - for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) { - prev_op = instructions->at(j); - // check for the cmove - if (prev_op->code() == lir_cmove) { -- assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2"); -- prev_cmove = (LIR_Op2*)prev_op; -+ assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4"); -+ prev_cmove = (LIR_Op4*)prev_op; - assert(prev_branch->cond() == prev_cmove->condition(), "should be the same"); - } - if (prev_op->code() == lir_cmp) { -diff --git a/src/hotspot/share/classfile/vmSymbols.cpp b/src/hotspot/share/classfile/vmSymbols.cpp -index 19fe196bc..d9cb8e999 100644 ---- a/src/hotspot/share/classfile/vmSymbols.cpp -+++ b/src/hotspot/share/classfile/vmSymbols.cpp -@@ -523,6 +523,7 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) { - case vmIntrinsics::_indexOfIU: - case vmIntrinsics::_indexOfIUL: - case vmIntrinsics::_indexOfU_char: -+ case vmIntrinsics::_indexOfL_char: - case vmIntrinsics::_compareToL: - case vmIntrinsics::_compareToU: - case vmIntrinsics::_compareToLU: -@@ -808,6 +809,7 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) { - case vmIntrinsics::_indexOfIU: - case vmIntrinsics::_indexOfIUL: - case vmIntrinsics::_indexOfU_char: -+ case vmIntrinsics::_indexOfL_char: - if (!SpecialStringIndexOf) return true; - break; - case vmIntrinsics::_equalsL: -diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp -index cef3f530c..a31525003 100644 ---- a/src/hotspot/share/classfile/vmSymbols.hpp -+++ b/src/hotspot/share/classfile/vmSymbols.hpp -@@ -946,6 +946,7 @@ - do_intrinsic(_indexOfIU, java_lang_StringUTF16, indexOf_name, indexOfI_signature, F_S) \ - do_intrinsic(_indexOfIUL, java_lang_StringUTF16, indexOfUL_name, indexOfI_signature, F_S) \ - do_intrinsic(_indexOfU_char, java_lang_StringUTF16, indexOfChar_name, indexOfChar_signature, F_S) \ -+ do_intrinsic(_indexOfL_char, java_lang_StringLatin1,indexOfChar_name, indexOfChar_signature, F_S) \ - do_name( indexOf_name, "indexOf") \ - do_name( indexOfChar_name, "indexOfChar") \ - do_name( indexOfUL_name, "indexOfLatin1") \ -diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp -index 4771a8b86..295f82ccc 100644 ---- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp -+++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp -@@ -31,7 +31,7 @@ - #include "utilities/defaultStream.hpp" - - void ShenandoahArguments::initialize() { --#if !(defined AARCH64 || defined AMD64 || defined IA32) -+#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined RISCV64) - vm_exit_during_initialization("Shenandoah GC is not supported on this platform."); - #endif - -diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp -index e01a242a5..ff16de0e7 100644 ---- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp -+++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp -@@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) { - inline bool JfrBigEndian::platform_supports_unaligned_reads(void) { - #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390) - return true; --#elif defined(SPARC) || defined(ARM) || defined(AARCH64) -+#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(RISCV) - return false; - #else - #warning "Unconfigured platform" -diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp -index 7768615b7..ef006f087 100644 ---- a/src/hotspot/share/opto/c2compiler.cpp -+++ b/src/hotspot/share/opto/c2compiler.cpp -@@ -510,6 +510,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt - case vmIntrinsics::_indexOfIU: - case vmIntrinsics::_indexOfIUL: - case vmIntrinsics::_indexOfU_char: -+ case vmIntrinsics::_indexOfL_char: - case vmIntrinsics::_toBytesStringU: - case vmIntrinsics::_getCharsStringU: - case vmIntrinsics::_getCharStringU: -diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp -index 500054218..fafbde78d 100644 ---- a/src/hotspot/share/opto/chaitin.cpp -+++ b/src/hotspot/share/opto/chaitin.cpp -@@ -77,6 +77,7 @@ void LRG::dump() const { - if( _is_oop ) tty->print("Oop "); - if( _is_float ) tty->print("Float "); - if( _is_vector ) tty->print("Vector "); -+ if( _is_scalable ) tty->print("Scalable "); - if( _was_spilled1 ) tty->print("Spilled "); - if( _was_spilled2 ) tty->print("Spilled2 "); - if( _direct_conflict ) tty->print("Direct_conflict "); -@@ -591,6 +592,7 @@ void PhaseChaitin::Register_Allocate() { - - // Merge multidefs if multiple defs representing the same value are used in a single block. - merge_multidefs(); -+ merge_debugdefs(); - - #ifdef ASSERT - // Veify the graph after RA. -@@ -646,7 +648,15 @@ void PhaseChaitin::Register_Allocate() { - // Live ranges record the highest register in their mask. - // We want the low register for the AD file writer's convenience. - OptoReg::Name hi = lrg.reg(); // Get hi register -- OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo -+ int num_regs = lrg.num_regs(); -+ if (lrg.is_scalable() && OptoReg::is_stack(hi)) { -+ // For scalable vector registers, when they are allocated in physical -+ // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable -+ // vector. If they are allocated on stack, we need to get the actual -+ // num_regs, which reflects the physical length of scalable registers. -+ num_regs = lrg.scalable_reg_slots(); -+ } -+ OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo - // We have to use pair [lo,lo+1] even for wide vectors because - // the rest of code generation works only with pairs. It is safe - // since for registers encoding only 'lo' is used. -@@ -801,8 +811,19 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { - // Check for vector live range (only if vector register is used). - // On SPARC vector uses RegD which could be misaligned so it is not - // processes as vector in RA. -- if (RegMask::is_vector(ireg)) -+ if (RegMask::is_vector(ireg)) { - lrg._is_vector = 1; -+ if (ireg == Op_VecA) { -+ assert(Matcher::supports_scalable_vector(), "scalable vector should be supported"); -+ lrg._is_scalable = 1; -+ // For scalable vector, when it is allocated in physical register, -+ // num_regs is RegMask::SlotsPerVecA for reg mask, -+ // which may not be the actual physical register size. -+ // If it is allocated in stack, we need to get the actual -+ // physical length of scalable vector register. -+ lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT)); -+ } -+ } - assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL, - "vector must be in vector registers"); - -@@ -912,6 +933,13 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { - lrg.set_reg_pressure(1); - #endif - break; -+ case Op_VecA: -+ assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); -+ assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity"); -+ assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned"); -+ lrg.set_num_regs(RegMask::SlotsPerVecA); -+ lrg.set_reg_pressure(1); -+ break; - case Op_VecS: - assert(Matcher::vector_size_supported(T_BYTE,4), "sanity"); - assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity"); -@@ -1358,6 +1386,47 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) { - return false; - } - -+static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) { -+ int num_regs = lrg.num_regs(); -+ OptoReg::Name assigned = mask.find_first_set(lrg, num_regs); -+ -+ if (lrg.is_scalable()) { -+ // a physical register is found -+ if (chunk == 0 && OptoReg::is_reg(assigned)) { -+ return assigned; -+ } -+ -+ // find available stack slots for scalable register -+ if (lrg._is_vector) { -+ num_regs = lrg.scalable_reg_slots(); -+ // if actual scalable vector register is exactly SlotsPerVecA * 32 bits -+ if (num_regs == RegMask::SlotsPerVecA) { -+ return assigned; -+ } -+ -+ // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it -+ // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits -+ // instead of SlotsPerVecA bits. -+ assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg -+ while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) { -+ // Verify the found reg has scalable_reg_slots() bits set. -+ if (mask.is_valid_reg(assigned, num_regs)) { -+ return assigned; -+ } else { -+ // Remove more for each iteration -+ mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg -+ mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits -+ assigned = mask.find_first_set(lrg, num_regs); -+ } -+ } -+ return OptoReg::Bad; // will cause chunk change, and retry next chunk -+ } -+ } -+ -+ return assigned; -+} -+ -+ - // Choose a color using the biasing heuristic - OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { - -@@ -1391,7 +1460,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { - RegMask tempmask = lrg.mask(); - tempmask.AND(lrgs(copy_lrg).mask()); - tempmask.clear_to_sets(lrg.num_regs()); -- OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs()); -+ OptoReg::Name reg = find_first_set(lrg, tempmask, chunk); - if (OptoReg::is_valid(reg)) - return reg; - } -@@ -1400,7 +1469,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { - // If no bias info exists, just go with the register selection ordering - if (lrg._is_vector || lrg.num_regs() == 2) { - // Find an aligned set -- return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk); -+ return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk); - } - - // CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate -@@ -1564,12 +1633,21 @@ uint PhaseChaitin::Select( ) { - int n_regs = lrg->num_regs(); - assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity"); - if (n_regs == 1 || !lrg->_fat_proj) { -- assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); -+ if (Matcher::supports_scalable_vector()) { -+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity"); -+ } else { -+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); -+ } - lrg->Clear(); // Clear the mask - lrg->Insert(reg); // Set regmask to match selected reg - // For vectors and pairs, also insert the low bit of the pair -- for (int i = 1; i < n_regs; i++) -+ // We always choose the high bit, then mask the low bits by register size -+ if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack -+ n_regs = lrg->scalable_reg_slots(); -+ } -+ for (int i = 1; i < n_regs; i++) { - lrg->Insert(OptoReg::add(reg,-i)); -+ } - lrg->set_mask_size(n_regs); - } else { // Else fatproj - // mask must be equal to fatproj bits, by definition -diff --git a/src/hotspot/share/opto/chaitin.hpp b/src/hotspot/share/opto/chaitin.hpp -index e5be5b966..b5d1b0604 100644 ---- a/src/hotspot/share/opto/chaitin.hpp -+++ b/src/hotspot/share/opto/chaitin.hpp -@@ -115,9 +115,11 @@ public: - _msize_valid=1; - if (_is_vector) { - assert(!_fat_proj, "sanity"); -- _mask.verify_sets(_num_regs); -+ if (!(_is_scalable && OptoReg::is_stack(_reg))) { -+ assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets"); -+ } - } else if (_num_regs == 2 && !_fat_proj) { -- _mask.verify_pairs(); -+ assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs"); - } - #endif - } -@@ -143,10 +145,34 @@ public: - private: - uint16_t _num_regs; // 2 for Longs and Doubles, 1 for all else - // except _num_regs is kill count for fat_proj -+ -+ // For scalable register, num_regs may not be the actual physical register size. -+ // We need to get the actual physical length of scalable register when scalable -+ // register is spilled. The size of one slot is 32-bit. -+ uint _scalable_reg_slots; // Actual scalable register length of slots. -+ // Meaningful only when _is_scalable is true. - public: - int num_regs() const { return _num_regs; } - void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; } - -+ uint scalable_reg_slots() { return _scalable_reg_slots; } -+ void set_scalable_reg_slots(uint slots) { -+ assert(_is_scalable, "scalable register"); -+ assert(slots > 0, "slots of scalable register is not valid"); -+ _scalable_reg_slots = slots; -+ } -+ -+ bool is_scalable() { -+#ifdef ASSERT -+ if (_is_scalable) { -+ // Should only be a vector for now, but it could also be a RegVMask in future. -+ assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg"); -+ } -+#endif -+ return _is_scalable; -+ } -+ -+ - private: - // Number of physical registers this live range uses when it colors - // Architecture and register-set dependent -@@ -172,6 +198,7 @@ public: - uint _is_oop:1, // Live-range holds an oop - _is_float:1, // True if in float registers - _is_vector:1, // True if in vector registers -+ _is_scalable:1, // True if register size is scalable - _was_spilled1:1, // True if prior spilling on def - _was_spilled2:1, // True if twice prior spilling on def - _is_bound:1, // live range starts life with no -@@ -756,6 +783,7 @@ private: - - // Merge nodes that are a part of a multidef lrg and produce the same value within a block. - void merge_multidefs(); -+ void merge_debugdefs(); - - private: - -diff --git a/src/hotspot/share/opto/intrinsicnode.hpp b/src/hotspot/share/opto/intrinsicnode.hpp -index c0dfe1b0c..2d9526a39 100644 ---- a/src/hotspot/share/opto/intrinsicnode.hpp -+++ b/src/hotspot/share/opto/intrinsicnode.hpp -@@ -47,10 +47,11 @@ class PartialSubtypeCheckNode : public Node { - // Base class for Ideal nodes used in String intrinsic code. - class StrIntrinsicNode: public Node { - public: -- // Possible encodings of the two parameters passed to the string intrinsic. -+ // Possible encodings of the parameters passed to the string intrinsic. - // 'L' stands for Latin1 and 'U' stands for UTF16. For example, 'LU' means that - // the first string is Latin1 encoded and the second string is UTF16 encoded. -- typedef enum ArgEncoding { LL, LU, UL, UU, none } ArgEnc; -+ // 'L' means that the single string is Latin1 encoded -+ typedef enum ArgEncoding { LL, LU, UL, UU, L, U, none } ArgEnc; - - protected: - // Encoding of strings. Used to select the right version of the intrinsic. -diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp -index 6b6aa9e9b..8719c5b12 100644 ---- a/src/hotspot/share/opto/library_call.cpp -+++ b/src/hotspot/share/opto/library_call.cpp -@@ -217,7 +217,7 @@ class LibraryCallKit : public GraphKit { - bool inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae); - Node* make_indexOf_node(Node* src_start, Node* src_count, Node* tgt_start, Node* tgt_count, - RegionNode* region, Node* phi, StrIntrinsicNode::ArgEnc ae); -- bool inline_string_indexOfChar(); -+ bool inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae); - bool inline_string_equals(StrIntrinsicNode::ArgEnc ae); - bool inline_string_toBytesU(); - bool inline_string_getCharsU(); -@@ -590,7 +590,8 @@ bool LibraryCallKit::try_to_inline(int predicate) { - case vmIntrinsics::_indexOfIL: return inline_string_indexOfI(StrIntrinsicNode::LL); - case vmIntrinsics::_indexOfIU: return inline_string_indexOfI(StrIntrinsicNode::UU); - case vmIntrinsics::_indexOfIUL: return inline_string_indexOfI(StrIntrinsicNode::UL); -- case vmIntrinsics::_indexOfU_char: return inline_string_indexOfChar(); -+ case vmIntrinsics::_indexOfU_char: return inline_string_indexOfChar(StrIntrinsicNode::U); -+ case vmIntrinsics::_indexOfL_char: return inline_string_indexOfChar(StrIntrinsicNode::L); - - case vmIntrinsics::_equalsL: return inline_string_equals(StrIntrinsicNode::LL); - case vmIntrinsics::_equalsU: return inline_string_equals(StrIntrinsicNode::UU); -@@ -1419,7 +1420,7 @@ Node* LibraryCallKit::make_indexOf_node(Node* src_start, Node* src_count, Node* - } - - //-----------------------------inline_string_indexOfChar----------------------- --bool LibraryCallKit::inline_string_indexOfChar() { -+bool LibraryCallKit::inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae) { - if (too_many_traps(Deoptimization::Reason_intrinsic)) { - return false; - } -@@ -1434,12 +1435,12 @@ bool LibraryCallKit::inline_string_indexOfChar() { - - src = must_be_not_null(src, true); - -- Node* src_offset = _gvn.transform(new LShiftINode(from_index, intcon(1))); -+ Node* src_offset = ae == StrIntrinsicNode::L ? from_index : _gvn.transform(new LShiftINode(from_index, intcon(1))); - Node* src_start = array_element_address(src, src_offset, T_BYTE); - Node* src_count = _gvn.transform(new SubINode(max, from_index)); - - // Range checks -- generate_string_range_check(src, src_offset, src_count, true); -+ generate_string_range_check(src, src_offset, src_count, ae == StrIntrinsicNode::U); - if (stopped()) { - return true; - } -@@ -1447,7 +1448,7 @@ bool LibraryCallKit::inline_string_indexOfChar() { - RegionNode* region = new RegionNode(3); - Node* phi = new PhiNode(region, TypeInt::INT); - -- Node* result = new StrIndexOfCharNode(control(), memory(TypeAryPtr::BYTES), src_start, src_count, tgt, StrIntrinsicNode::none); -+ Node* result = new StrIndexOfCharNode(control(), memory(TypeAryPtr::BYTES), src_start, src_count, tgt, ae); - C->set_has_split_ifs(true); // Has chance for split-if optimization - _gvn.transform(result); - -diff --git a/src/hotspot/share/opto/machnode.cpp b/src/hotspot/share/opto/machnode.cpp -index 8d526b15d..92b4f7158 100644 ---- a/src/hotspot/share/opto/machnode.cpp -+++ b/src/hotspot/share/opto/machnode.cpp -@@ -147,7 +147,7 @@ uint MachNode::size(PhaseRegAlloc *ra_) const { - return MachNode::emit_size(ra_); - } - --//------------------------------size------------------------------------------- -+//-------------------------emit_size------------------------------------------- - // Helper function that computes size by emitting code - uint MachNode::emit_size(PhaseRegAlloc *ra_) const { - // Emit into a trash buffer and count bytes emitted. -diff --git a/src/hotspot/share/opto/machnode.hpp b/src/hotspot/share/opto/machnode.hpp -index a52325680..dad70565b 100644 ---- a/src/hotspot/share/opto/machnode.hpp -+++ b/src/hotspot/share/opto/machnode.hpp -@@ -334,6 +334,10 @@ public: - // Top-level ideal Opcode matched - virtual int ideal_Opcode() const { return Op_Node; } - -+ virtual bool is_Opcode_equal(Node* node) { -+ return node->is_Mach() && (ideal_Opcode() == node->as_Mach()->ideal_Opcode()); -+ } -+ - // Adds the label for the case - virtual void add_case_label( int switch_val, Label* blockLabel); - -diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp -index 9e9b3383f..97de5e314 100644 ---- a/src/hotspot/share/opto/matcher.cpp -+++ b/src/hotspot/share/opto/matcher.cpp -@@ -84,6 +84,7 @@ Matcher::Matcher() - idealreg2spillmask [Op_RegF] = NULL; - idealreg2spillmask [Op_RegD] = NULL; - idealreg2spillmask [Op_RegP] = NULL; -+ idealreg2spillmask [Op_VecA] = NULL; - idealreg2spillmask [Op_VecS] = NULL; - idealreg2spillmask [Op_VecD] = NULL; - idealreg2spillmask [Op_VecX] = NULL; -@@ -110,6 +111,7 @@ Matcher::Matcher() - idealreg2mhdebugmask[Op_RegF] = NULL; - idealreg2mhdebugmask[Op_RegD] = NULL; - idealreg2mhdebugmask[Op_RegP] = NULL; -+ idealreg2mhdebugmask[Op_VecA] = NULL; - idealreg2mhdebugmask[Op_VecS] = NULL; - idealreg2mhdebugmask[Op_VecD] = NULL; - idealreg2mhdebugmask[Op_VecX] = NULL; -@@ -424,7 +426,7 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) { - void Matcher::init_first_stack_mask() { - - // Allocate storage for spill masks as masks for the appropriate load type. -- RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+5)); -+ RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+6)); - - idealreg2spillmask [Op_RegN] = &rms[0]; - idealreg2spillmask [Op_RegI] = &rms[1]; -@@ -447,11 +449,12 @@ void Matcher::init_first_stack_mask() { - idealreg2mhdebugmask[Op_RegD] = &rms[16]; - idealreg2mhdebugmask[Op_RegP] = &rms[17]; - -- idealreg2spillmask [Op_VecS] = &rms[18]; -- idealreg2spillmask [Op_VecD] = &rms[19]; -- idealreg2spillmask [Op_VecX] = &rms[20]; -- idealreg2spillmask [Op_VecY] = &rms[21]; -- idealreg2spillmask [Op_VecZ] = &rms[22]; -+ idealreg2spillmask [Op_VecA] = &rms[18]; -+ idealreg2spillmask [Op_VecS] = &rms[19]; -+ idealreg2spillmask [Op_VecD] = &rms[20]; -+ idealreg2spillmask [Op_VecX] = &rms[21]; -+ idealreg2spillmask [Op_VecY] = &rms[22]; -+ idealreg2spillmask [Op_VecZ] = &rms[23]; - - OptoReg::Name i; - -@@ -478,6 +481,7 @@ void Matcher::init_first_stack_mask() { - // Keep spill masks aligned. - aligned_stack_mask.clear_to_pairs(); - assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); -+ RegMask scalable_stack_mask = aligned_stack_mask; - - *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; - #ifdef _LP64 -@@ -548,6 +552,26 @@ void Matcher::init_first_stack_mask() { - *idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ]; - idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask); - } -+ -+ if (Matcher::supports_scalable_vector()) { -+ int k = 1; -+ OptoReg::Name in = OptoReg::add(_in_arg_limit, -1); -+ // Exclude last input arg stack slots to avoid spilling vector register there, -+ // otherwise vector spills could stomp over stack slots in caller frame. -+ for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) { -+ scalable_stack_mask.Remove(in); -+ in = OptoReg::add(in, -1); -+ } -+ -+ // For VecA -+ scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA); -+ assert(scalable_stack_mask.is_AllStack(), "should be infinite stack"); -+ *idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA]; -+ idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask); -+ } else { -+ *idealreg2spillmask[Op_VecA] = RegMask::Empty; -+ } -+ - if (UseFPUForSpilling) { - // This mask logic assumes that the spill operations are - // symmetric and that the registers involved are the same size. -@@ -872,6 +896,11 @@ void Matcher::init_spill_mask( Node *ret ) { - idealreg2regmask[Op_RegP] = &spillP->out_RegMask(); - - // Vector regmasks. -+ if (Matcher::supports_scalable_vector()) { -+ TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));; -+ MachNode *spillVectA = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTA)); -+ idealreg2regmask[Op_VecA] = &spillVectA->out_RegMask(); -+ } - if (Matcher::vector_size_supported(T_BYTE,4)) { - TypeVect::VECTS = TypeVect::make(T_BYTE, 4); - MachNode *spillVectS = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS)); -diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp -index 244e3d1f8..9a8307102 100644 ---- a/src/hotspot/share/opto/matcher.hpp -+++ b/src/hotspot/share/opto/matcher.hpp -@@ -310,7 +310,7 @@ public: - - // identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen -- static const bool match_rule_supported_vector(int opcode, int vlen); -+ static const bool match_rule_supported_vector(int opcode, int vlen, BasicType bt); - - // Some microarchitectures have mask registers used on vectors - static const bool has_predicated_vectors(void); -@@ -333,6 +333,10 @@ public: - Matcher::min_vector_size(bt) <= size); - } - -+ static const bool supports_scalable_vector(); -+ // Actual max scalable vector register length. -+ static const int scalable_vector_reg_size(const BasicType bt); -+ - // Vector ideal reg - static const uint vector_ideal_reg(int len); - static const uint vector_shift_count_ideal_reg(int len); -diff --git a/src/hotspot/share/opto/node.cpp b/src/hotspot/share/opto/node.cpp -index 02bb6bb16..99d51ba05 100644 ---- a/src/hotspot/share/opto/node.cpp -+++ b/src/hotspot/share/opto/node.cpp -@@ -2359,6 +2359,27 @@ Node* Node::find_similar(int opc) { - return NULL; - } - -+//--------------------------is_similar----------------------------------- -+// True if a node has the same opcode and inputs as "this". -+bool Node::is_similar(Node* node) { -+ if (this == node) { -+ return true; -+ } else { -+ if (is_Opcode_equal(node) && (req() == node->req())) { -+ for (uint i = 0; i < node->req(); i++) { -+ if (in(i) != node->in(i)) { -+ return false; -+ } -+ } -+ return true; -+ } -+ } -+ return false; -+} -+ -+bool Node::is_Opcode_equal(Node* node) { -+ return Opcode() == node->Opcode(); -+} - - //--------------------------unique_ctrl_out------------------------------ - // Return the unique control out if only one. Null if none or more than one. -diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp -index 0c0b9bf69..e24456d85 100644 ---- a/src/hotspot/share/opto/node.hpp -+++ b/src/hotspot/share/opto/node.hpp -@@ -1030,6 +1030,11 @@ public: - // be found; Otherwise return NULL; - Node* find_similar(int opc); - -+ // True if a node has the same opcode and inputs as "this". -+ bool is_similar(Node* node); -+ -+ virtual bool is_Opcode_equal(Node* node); -+ - // Return the unique control out if only one. Null if none or more than one. - Node* unique_ctrl_out() const; - -diff --git a/src/hotspot/share/opto/opcodes.cpp b/src/hotspot/share/opto/opcodes.cpp -index e31e8d847..aa0483c73 100644 ---- a/src/hotspot/share/opto/opcodes.cpp -+++ b/src/hotspot/share/opto/opcodes.cpp -@@ -38,12 +38,14 @@ const char *NodeClassNames[] = { - "RegF", - "RegD", - "RegL", -- "RegFlags", -+ "VecA", - "VecS", - "VecD", - "VecX", - "VecY", - "VecZ", -+ "RegVMask", -+ "RegFlags", - "_last_machine_leaf", - #include "classes.hpp" - "_last_class_name", -diff --git a/src/hotspot/share/opto/opcodes.hpp b/src/hotspot/share/opto/opcodes.hpp -index ae3d61ce0..0a77c3732 100644 ---- a/src/hotspot/share/opto/opcodes.hpp -+++ b/src/hotspot/share/opto/opcodes.hpp -@@ -37,11 +37,13 @@ enum Opcodes { - macro(RegF) // Machine float register - macro(RegD) // Machine double register - macro(RegL) // Machine long register -+ macro(VecA) // Machine vectora register - macro(VecS) // Machine vectors register - macro(VecD) // Machine vectord register - macro(VecX) // Machine vectorx register - macro(VecY) // Machine vectory register - macro(VecZ) // Machine vectorz register -+ macro(RegVMask) // Vector mask/predicate register - macro(RegFlags) // Machine flags register - _last_machine_leaf, // Split between regular opcodes and machine - #include "classes.hpp" -diff --git a/src/hotspot/share/opto/phase.cpp b/src/hotspot/share/opto/phase.cpp -index 397a53713..89c7fc7c8 100644 ---- a/src/hotspot/share/opto/phase.cpp -+++ b/src/hotspot/share/opto/phase.cpp -@@ -113,6 +113,7 @@ void Phase::print_timers() { - tty->print_cr (" Regalloc Split: %7.3f s", timers[_t_regAllocSplit].seconds()); - tty->print_cr (" Postalloc Copy Rem: %7.3f s", timers[_t_postAllocCopyRemoval].seconds()); - tty->print_cr (" Merge multidefs: %7.3f s", timers[_t_mergeMultidefs].seconds()); -+ tty->print_cr (" Merge debugdefs: %7.3f s", timers[_t_mergeDebugdefs].seconds()); - tty->print_cr (" Fixup Spills: %7.3f s", timers[_t_fixupSpills].seconds()); - tty->print_cr (" Compact: %7.3f s", timers[_t_chaitinCompact].seconds()); - tty->print_cr (" Coalesce 1: %7.3f s", timers[_t_chaitinCoalesce1].seconds()); -@@ -130,6 +131,7 @@ void Phase::print_timers() { - timers[_t_regAllocSplit].seconds() + - timers[_t_postAllocCopyRemoval].seconds() + - timers[_t_mergeMultidefs].seconds() + -+ timers[_t_mergeDebugdefs].seconds() + - timers[_t_fixupSpills].seconds() + - timers[_t_chaitinCompact].seconds() + - timers[_t_chaitinCoalesce1].seconds() + -diff --git a/src/hotspot/share/opto/phase.hpp b/src/hotspot/share/opto/phase.hpp -index 4b0c53ffc..b3302ec86 100644 ---- a/src/hotspot/share/opto/phase.hpp -+++ b/src/hotspot/share/opto/phase.hpp -@@ -91,6 +91,7 @@ public: - _t_regAllocSplit, - _t_postAllocCopyRemoval, - _t_mergeMultidefs, -+ _t_mergeDebugdefs, - _t_fixupSpills, - _t_chaitinCompact, - _t_chaitinCoalesce1, -diff --git a/src/hotspot/share/opto/postaloc.cpp b/src/hotspot/share/opto/postaloc.cpp -index 46766b604..3f608bb40 100644 ---- a/src/hotspot/share/opto/postaloc.cpp -+++ b/src/hotspot/share/opto/postaloc.cpp -@@ -27,6 +27,7 @@ - #include "memory/resourceArea.hpp" - #include "opto/chaitin.hpp" - #include "opto/machnode.hpp" -+#include "opto/addnode.hpp" - - // See if this register (or pairs, or vector) already contains the value. - static bool register_contains_value(Node* val, OptoReg::Name reg, int n_regs, -@@ -266,9 +267,9 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v - Node *val = skip_copies(n->in(k)); - if (val == x) return blk_adjust; // No progress? - -- int n_regs = RegMask::num_registers(val->ideal_reg()); - uint val_idx = _lrg_map.live_range_id(val); - OptoReg::Name val_reg = lrgs(val_idx).reg(); -+ int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx)); - - // See if it happens to already be in the correct register! - // (either Phi's direct register, or the common case of the name -@@ -305,8 +306,26 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v - } - - Node *vv = value[reg]; -+ // For scalable register, number of registers may be inconsistent between -+ // "val_reg" and "reg". For example, when "val" resides in register -+ // but "reg" is located in stack. -+ if (lrgs(val_idx).is_scalable()) { -+ assert(val->ideal_reg() == Op_VecA, "scalable vector register"); -+ if (OptoReg::is_stack(reg)) { -+ n_regs = lrgs(val_idx).scalable_reg_slots(); -+ } else { -+ n_regs = RegMask::SlotsPerVecA; -+ } -+ } - if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set -- uint last = (n_regs-1); // Looking for the last part of a set -+ uint last; -+ if (lrgs(val_idx).is_scalable()) { -+ assert(val->ideal_reg() == Op_VecA, "scalable vector register"); -+ // For scalable vector register, regmask is always SlotsPerVecA bits aligned -+ last = RegMask::SlotsPerVecA - 1; -+ } else { -+ last = (n_regs-1); // Looking for the last part of a set -+ } - if ((reg&last) != last) continue; // Wrong part of a set - if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value - } -@@ -410,6 +429,28 @@ void PhaseChaitin::merge_multidefs() { - } - } - -+void PhaseChaitin::merge_debugdefs() { -+ Compile::TracePhase tp("merge_Debugdefs", &timers[_t_mergeDebugdefs]); -+ -+ ResourceMark rm; -+ for (uint i = 0; i < _cfg.number_of_blocks(); i++) { -+ Block* block = _cfg.get_block(i); -+ for (int j = 0; j < (int) block->number_of_nodes(); j++) { -+ Node* base = block->get_node(j); -+ if (base && base->is_Mach() && base->outcnt() == 1) { -+ Node* addp = base->unique_out(); -+ if (addp && addp->is_Mach() && addp->as_Mach()->ideal_Opcode() == Op_AddP) { -+ Node* derived = addp->in(AddPNode::Address); -+ if (base == addp->in(AddPNode::Base) && base->is_similar(derived)) { -+ base->subsume_by(derived, Compile::current()); -+ block->remove_node(j--); -+ } -+ } -+ } -+ } -+ } -+} -+ - int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) { - int blk_adjust = 0; - -@@ -591,7 +632,7 @@ void PhaseChaitin::post_allocate_copy_removal() { - uint k; - Node *phi = block->get_node(j); - uint pidx = _lrg_map.live_range_id(phi); -- OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg(); -+ OptoReg::Name preg = lrgs(pidx).reg(); - - // Remove copies remaining on edges. Check for junk phi. - Node *u = NULL; -@@ -619,7 +660,7 @@ void PhaseChaitin::post_allocate_copy_removal() { - if( pidx ) { - value.map(preg,phi); - regnd.map(preg,phi); -- int n_regs = RegMask::num_registers(phi->ideal_reg()); -+ int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx)); - for (int l = 1; l < n_regs; l++) { - OptoReg::Name preg_lo = OptoReg::add(preg,-l); - value.map(preg_lo,phi); -@@ -663,7 +704,7 @@ void PhaseChaitin::post_allocate_copy_removal() { - regnd.map(ureg, def); - // Record other half of doubles - uint def_ideal_reg = def->ideal_reg(); -- int n_regs = RegMask::num_registers(def_ideal_reg); -+ int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def))); - for (int l = 1; l < n_regs; l++) { - OptoReg::Name ureg_lo = OptoReg::add(ureg,-l); - if (!value[ureg_lo] && -@@ -707,7 +748,7 @@ void PhaseChaitin::post_allocate_copy_removal() { - } - - uint n_ideal_reg = n->ideal_reg(); -- int n_regs = RegMask::num_registers(n_ideal_reg); -+ int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx)); - if (n_regs == 1) { - // If Node 'n' does not change the value mapped by the register, - // then 'n' is a useless copy. Do not update the register->node -diff --git a/src/hotspot/share/opto/regmask.cpp b/src/hotspot/share/opto/regmask.cpp -index 2e04c42eb..34a701e84 100644 ---- a/src/hotspot/share/opto/regmask.cpp -+++ b/src/hotspot/share/opto/regmask.cpp -@@ -24,6 +24,7 @@ - - #include "precompiled.hpp" - #include "opto/ad.hpp" -+#include "opto/chaitin.hpp" - #include "opto/compile.hpp" - #include "opto/matcher.hpp" - #include "opto/node.hpp" -@@ -116,30 +117,47 @@ const RegMask RegMask::Empty( - - //============================================================================= - bool RegMask::is_vector(uint ireg) { -- return (ireg == Op_VecS || ireg == Op_VecD || -+ return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD || - ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ); - } - - int RegMask::num_registers(uint ireg) { - switch(ireg) { - case Op_VecZ: -- return 16; -+ return SlotsPerVecZ; - case Op_VecY: -- return 8; -+ return SlotsPerVecY; - case Op_VecX: -- return 4; -+ return SlotsPerVecX; - case Op_VecD: -+ return SlotsPerVecD; - case Op_RegD: - case Op_RegL: - #ifdef _LP64 - case Op_RegP: - #endif - return 2; -+ case Op_VecA: -+ assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); -+ return SlotsPerVecA; - } - // Op_VecS and the rest ideal registers. - return 1; - } - -+int RegMask::num_registers(uint ireg, LRG &lrg) { -+ int n_regs = num_registers(ireg); -+ -+ // assigned is OptoReg which is selected by register allocator -+ OptoReg::Name assigned = lrg.reg(); -+ assert(OptoReg::is_valid(assigned), "should be valid opto register"); -+ -+ if (lrg.is_scalable() && OptoReg::is_stack(assigned)) { -+ n_regs = lrg.scalable_reg_slots(); -+ } -+ return n_regs; -+} -+ - //------------------------------find_first_pair-------------------------------- - // Find the lowest-numbered register pair in the mask. Return the - // HIGHEST register number in the pair, or BAD if no pairs. -@@ -238,14 +256,30 @@ int RegMask::is_bound_pair() const { - return true; - } - -+// Check that whether given reg number with size is valid -+// for current regmask, where reg is the highest number. -+bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const { -+ for (int i = 0; i < size; i++) { -+ if (!Member(reg - i)) { -+ return false; -+ } ++ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true); + } -+ return true; +} ++#endif // COMPILER2 + - // only indicies of power 2 are accessed, so index 3 is only filled in for storage. - static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 }; - //------------------------------find_first_set--------------------------------- - // Find the lowest-numbered register set in the mask. Return the - // HIGHEST register number in the set, or BAD if no sets. - // Works also for size 1. --OptoReg::Name RegMask::find_first_set(const int size) const { -- verify_sets(size); -+OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const { -+ if (lrg.is_scalable()) { -+ // For scalable vector register, regmask is SlotsPerVecA bits aligned. -+ assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets"); -+ } else { -+ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); -+ } - for (int i = 0; i < RM_SIZE; i++) { - if (_A[i]) { // Found some bits - int bit = _A[i] & -_A[i]; // Extract low bit -diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp -index c64d08795..2688275be 100644 ---- a/src/hotspot/share/opto/regmask.hpp -+++ b/src/hotspot/share/opto/regmask.hpp -@@ -28,6 +28,8 @@ - #include "code/vmreg.hpp" - #include "opto/optoreg.hpp" - -+class LRG; -+ - // Some fun naming (textual) substitutions: - // - // RegMask::get_low_elem() ==> RegMask::find_first_elem() -@@ -95,6 +97,7 @@ public: - // requirement is internal to the allocator, and independent of any - // particular platform. - enum { SlotsPerLong = 2, -+ SlotsPerVecA = RISCV_ONLY(4) NOT_RISCV(8), - SlotsPerVecS = 1, - SlotsPerVecD = 2, - SlotsPerVecX = 4, -@@ -204,10 +207,14 @@ public: - return false; - } - -+ // Check that whether given reg number with size is valid -+ // for current regmask, where reg is the highest number. -+ bool is_valid_reg(OptoReg::Name reg, const int size) const; -+ - // Find the lowest-numbered register set in the mask. Return the - // HIGHEST register number in the set, or BAD if no sets. - // Assert that the mask contains only bit sets. -- OptoReg::Name find_first_set(const int size) const; -+ OptoReg::Name find_first_set(LRG &lrg, const int size) const; - - // Clear out partial bits; leave only aligned adjacent bit sets of size. - void clear_to_sets(const int size); -@@ -226,6 +233,7 @@ public: - - static bool is_vector(uint ireg); - static int num_registers(uint ireg); -+ static int num_registers(uint ireg, LRG &lrg); - - // Fast overlap test. Non-zero if any registers in common. - int overlap( const RegMask &rm ) const { -diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp -index fed52e488..ee583236f 100644 ---- a/src/hotspot/share/opto/superword.cpp -+++ b/src/hotspot/share/opto/superword.cpp -@@ -96,8 +96,11 @@ static const bool _do_vector_loop_experimental = false; // Experimental vectoriz - //------------------------------transform_loop--------------------------- - void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { - assert(UseSuperWord, "should be"); -- // Do vectors exist on this architecture? -- if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return; -+ // SuperWord only works with power of two vector sizes. -+ int vector_width = Matcher::vector_width_in_bytes(T_BYTE); -+ if (vector_width < 2 || !is_power_of_2(vector_width)) { ++void VM_Version::initialize_cpu_information(void) { ++ // do nothing if cpu info has been initialized ++ if (_initialized) { + return; + } - - assert(lpt->_head->is_CountedLoop(), "must be"); - CountedLoopNode *cl = lpt->_head->as_CountedLoop(); -diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp -index 7d767c47c..c9948df5f 100644 ---- a/src/hotspot/share/opto/type.cpp -+++ b/src/hotspot/share/opto/type.cpp -@@ -79,6 +79,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = { - { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY - { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ - #else // all other -+ { Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA - { Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS - { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD - { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX -@@ -655,6 +656,10 @@ void Type::Initialize_shared(Compile* current) { - // get_zero_type() should not happen for T_CONFLICT - _zero_type[T_CONFLICT]= NULL; - -+ if (Matcher::supports_scalable_vector()) { -+ TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE)); -+ } -+ - // Vector predefined types, it needs initialized _const_basic_type[]. - if (Matcher::vector_size_supported(T_BYTE,4)) { - TypeVect::VECTS = TypeVect::make(T_BYTE,4); -@@ -671,6 +676,7 @@ void Type::Initialize_shared(Compile* current) { - if (Matcher::vector_size_supported(T_FLOAT,16)) { - TypeVect::VECTZ = TypeVect::make(T_FLOAT,16); - } -+ mreg2type[Op_VecA] = TypeVect::VECTA; - mreg2type[Op_VecS] = TypeVect::VECTS; - mreg2type[Op_VecD] = TypeVect::VECTD; - mreg2type[Op_VecX] = TypeVect::VECTX; -@@ -990,6 +996,7 @@ const Type::TYPES Type::dual_type[Type::lastype] = { - - Bad, // Tuple - handled in v-call - Bad, // Array - handled in v-call -+ Bad, // VectorA - handled in v-call - Bad, // VectorS - handled in v-call - Bad, // VectorD - handled in v-call - Bad, // VectorX - handled in v-call -@@ -2329,6 +2336,7 @@ bool TypeAry::ary_must_be_exact() const { - - //==============================TypeVect======================================= - // Convenience common pre-built types. -+const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic - const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors - const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors - const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors -@@ -2339,10 +2347,11 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors - const TypeVect* TypeVect::make(const Type *elem, uint length) { - BasicType elem_bt = elem->array_element_basic_type(); - assert(is_java_primitive(elem_bt), "only primitive types in vector"); -- assert(length > 1 && is_power_of_2(length), "vector length is power of 2"); - assert(Matcher::vector_size_supported(elem_bt, length), "length in range"); - int size = length * type2aelembytes(elem_bt); - switch (Matcher::vector_ideal_reg(size)) { -+ case Op_VecA: -+ return (TypeVect*)(new TypeVectA(elem, length))->hashcons(); - case Op_VecS: - return (TypeVect*)(new TypeVectS(elem, length))->hashcons(); - case Op_RegL: -@@ -2375,6 +2384,7 @@ const Type *TypeVect::xmeet( const Type *t ) const { - default: // All else is a mistake - typerr(t); - -+ case VectorA: - case VectorS: - case VectorD: - case VectorX: -@@ -2429,6 +2439,8 @@ bool TypeVect::empty(void) const { - #ifndef PRODUCT - void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const { - switch (base()) { -+ case VectorA: -+ st->print("vectora["); break; - case VectorS: - st->print("vectors["); break; - case VectorD: -diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp -index 27d042d94..82ee2dfcb 100644 ---- a/src/hotspot/share/opto/type.hpp -+++ b/src/hotspot/share/opto/type.hpp -@@ -53,6 +53,7 @@ class TypeNarrowKlass; - class TypeAry; - class TypeTuple; - class TypeVect; -+class TypeVectA; - class TypeVectS; - class TypeVectD; - class TypeVectX; -@@ -87,6 +88,7 @@ public: - - Tuple, // Method signature or object layout - Array, // Array types -+ VectorA, // (Scalable) Vector types for vector length agnostic - VectorS, // 32bit Vector types - VectorD, // 64bit Vector types - VectorX, // 128bit Vector types -@@ -769,6 +771,7 @@ public: - virtual const Type *xmeet( const Type *t) const; - virtual const Type *xdual() const; // Compute dual right now. - -+ static const TypeVect *VECTA; - static const TypeVect *VECTS; - static const TypeVect *VECTD; - static const TypeVect *VECTX; -@@ -780,6 +783,11 @@ public: - #endif - }; - -+class TypeVectA : public TypeVect { -+ friend class TypeVect; -+ TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {} -+}; -+ - class TypeVectS : public TypeVect { - friend class TypeVect; - TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {} -@@ -1630,12 +1638,12 @@ inline const TypeAry *Type::is_ary() const { - } - - inline const TypeVect *Type::is_vect() const { -- assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" ); -+ assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" ); - return (TypeVect*)this; - } - - inline const TypeVect *Type::isa_vect() const { -- return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL; -+ return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL; - } - - inline const TypePtr *Type::is_ptr() const { -diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp -index de22591ba..b82d631f4 100644 ---- a/src/hotspot/share/opto/vectornode.cpp -+++ b/src/hotspot/share/opto/vectornode.cpp -@@ -236,7 +236,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) { - (vlen > 1) && is_power_of_2(vlen) && - Matcher::vector_size_supported(bt, vlen)) { - int vopc = VectorNode::opcode(opc, bt); -- return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen); -+ return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen, bt); - } - return false; - } -@@ -655,7 +655,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) { - (vlen > 1) && is_power_of_2(vlen) && - Matcher::vector_size_supported(bt, vlen)) { - int vopc = ReductionNode::opcode(opc, bt); -- return vopc != opc && Matcher::match_rule_supported(vopc); -+ return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt); - } - return false; - } -diff --git a/src/hotspot/share/runtime/abstract_vm_version.cpp b/src/hotspot/share/runtime/abstract_vm_version.cpp -index c46247f2b..ee769634f 100644 ---- a/src/hotspot/share/runtime/abstract_vm_version.cpp -+++ b/src/hotspot/share/runtime/abstract_vm_version.cpp -@@ -98,8 +98,13 @@ bool Abstract_VM_Version::_parallel_worker_threads_initialized = false; - #ifdef ZERO - #define VMTYPE "Zero" - #else // ZERO -- #define VMTYPE COMPILER1_PRESENT("Client") \ -- COMPILER2_PRESENT("Server") -+ #ifdef COMPILER2 -+ #define VMTYPE "Server" -+ #elif defined(COMPILER1) -+ #define VMTYPE "Client" -+ #else -+ #define VMTYPE "Core" -+ #endif // COMPILER2 - #endif // ZERO - #endif // TIERED - #endif -@@ -196,7 +201,8 @@ const char* Abstract_VM_Version::jre_release_version() { - IA32_ONLY("x86") \ - IA64_ONLY("ia64") \ - S390_ONLY("s390") \ -- SPARC_ONLY("sparc") -+ SPARC_ONLY("sparc") \ -+ RISCV64_ONLY("riscv64") - #endif // !ZERO - #endif // !CPU - -diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp -index 0a9c45f85..a96c2dd81 100644 ---- a/src/hotspot/share/runtime/thread.hpp -+++ b/src/hotspot/share/runtime/thread.hpp -@@ -1234,7 +1234,7 @@ class JavaThread: public Thread { - address last_Java_pc(void) { return _anchor.last_Java_pc(); } - - // Safepoint support --#if !(defined(PPC64) || defined(AARCH64)) -+#if !(defined(PPC64) || defined(AARCH64) || defined(RISCV64)) - JavaThreadState thread_state() const { return _thread_state; } - void set_thread_state(JavaThreadState s) { - assert(current_or_null() == NULL || current_or_null() == this, -diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp -index dee8534f7..aa71d7655 100644 ---- a/src/hotspot/share/runtime/thread.inline.hpp -+++ b/src/hotspot/share/runtime/thread.inline.hpp -@@ -142,7 +142,7 @@ inline void JavaThread::set_pending_async_exception(oop e) { - set_has_async_exception(); - } - --#if defined(PPC64) || defined (AARCH64) -+#if defined(PPC64) || defined(AARCH64) || defined(RISCV64) - inline JavaThreadState JavaThread::thread_state() const { - return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state); - } -diff --git a/src/hotspot/share/utilities/debug.cpp b/src/hotspot/share/utilities/debug.cpp -index 0b898dcc3..7f76486ae 100644 ---- a/src/hotspot/share/utilities/debug.cpp -+++ b/src/hotspot/share/utilities/debug.cpp -@@ -632,6 +632,7 @@ void help() { - tty->print_cr(" pns($sp, $rbp, $pc) on Linux/amd64 and Solaris/amd64 or"); - tty->print_cr(" pns($sp, $ebp, $pc) on Linux/x86 or"); - tty->print_cr(" pns($sp, $fp, $pc) on Linux/AArch64 or"); -+ tty->print_cr(" pns($sp, $fp, $pc) on Linux/RISCV64 or"); - tty->print_cr(" pns($sp, 0, $pc) on Linux/ppc64 or"); - tty->print_cr(" pns($sp + 0x7ff, 0, $pc) on Solaris/SPARC"); - tty->print_cr(" - in gdb do 'set overload-resolution off' before calling pns()"); -diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp -index cf8025386..e8ab3097a 100644 ---- a/src/hotspot/share/utilities/macros.hpp -+++ b/src/hotspot/share/utilities/macros.hpp -@@ -597,6 +597,32 @@ - - #define MACOS_AARCH64_ONLY(x) MACOS_ONLY(AARCH64_ONLY(x)) - -+#if defined(RISCV32) || defined(RISCV64) -+#define RISCV -+#define RISCV_ONLY(code) code -+#define NOT_RISCV(code) -+#else -+#undef RISCV -+#define RISCV_ONLY(code) -+#define NOT_RISCV(code) code -+#endif -+ -+#ifdef RISCV32 -+#define RISCV32_ONLY(code) code -+#define NOT_RISCV32(code) -+#else -+#define RISCV32_ONLY(code) -+#define NOT_RISCV32(code) code -+#endif -+ -+#ifdef RISCV64 -+#define RISCV64_ONLY(code) code -+#define NOT_RISCV64(code) -+#else -+#define RISCV64_ONLY(code) -+#define NOT_RISCV64(code) code -+#endif -+ - #ifdef VM_LITTLE_ENDIAN - #define LITTLE_ENDIAN_ONLY(code) code - #define BIG_ENDIAN_ONLY(code) -diff --git a/src/java.base/share/classes/java/lang/StringLatin1.java b/src/java.base/share/classes/java/lang/StringLatin1.java -index 063a5ef3a..50e9cdb57 100644 ---- a/src/java.base/share/classes/java/lang/StringLatin1.java -+++ b/src/java.base/share/classes/java/lang/StringLatin1.java -@@ -209,6 +209,11 @@ final class StringLatin1 { - // Note: fromIndex might be near -1>>>1. - return -1; - } -+ return indexOfChar(value, ch, fromIndex, max); -+ } -+ -+ @HotSpotIntrinsicCandidate -+ private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) { - byte c = (byte)ch; - for (int i = fromIndex; i < max; i++) { - if (value[i] == c) { -diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c -index 0d834302c..55a7b96f7 100644 ---- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c -+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c -@@ -58,6 +58,10 @@ - #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h" - #endif - -+#ifdef riscv64 -+#include "sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext.h" -+#endif -+ - static jfieldID p_ps_prochandle_ID = 0; - static jfieldID threadList_ID = 0; - static jfieldID loadObjectList_ID = 0; -@@ -397,7 +401,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo - return (err == PS_OK)? array : 0; - } - --#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) -+#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64) - JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0 - (JNIEnv *env, jobject this_obj, jint lwp_id) { - -@@ -422,6 +426,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo - #ifdef aarch64 - #define NPRGREG sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_NPRGREG - #endif -+#ifdef riscv64 -+#define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG -+#endif - #if defined(sparc) || defined(sparcv9) - #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG - #endif -@@ -534,6 +541,46 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo - } - #endif /* aarch64 */ - -+#if defined(riscv64) -+ -+#define REG_INDEX(reg) sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg -+ -+ { -+ regs[REG_INDEX(PC)] = gregs.pc; -+ regs[REG_INDEX(LR)] = gregs.ra; -+ regs[REG_INDEX(SP)] = gregs.sp; -+ regs[REG_INDEX(R3)] = gregs.gp; -+ regs[REG_INDEX(R4)] = gregs.tp; -+ regs[REG_INDEX(R5)] = gregs.t0; -+ regs[REG_INDEX(R6)] = gregs.t1; -+ regs[REG_INDEX(R7)] = gregs.t2; -+ regs[REG_INDEX(R8)] = gregs.s0; -+ regs[REG_INDEX(R9)] = gregs.s1; -+ regs[REG_INDEX(R10)] = gregs.a0; -+ regs[REG_INDEX(R11)] = gregs.a1; -+ regs[REG_INDEX(R12)] = gregs.a2; -+ regs[REG_INDEX(R13)] = gregs.a3; -+ regs[REG_INDEX(R14)] = gregs.a4; -+ regs[REG_INDEX(R15)] = gregs.a5; -+ regs[REG_INDEX(R16)] = gregs.a6; -+ regs[REG_INDEX(R17)] = gregs.a7; -+ regs[REG_INDEX(R18)] = gregs.s2; -+ regs[REG_INDEX(R19)] = gregs.s3; -+ regs[REG_INDEX(R20)] = gregs.s4; -+ regs[REG_INDEX(R21)] = gregs.s5; -+ regs[REG_INDEX(R22)] = gregs.s6; -+ regs[REG_INDEX(R23)] = gregs.s7; -+ regs[REG_INDEX(R24)] = gregs.s8; -+ regs[REG_INDEX(R25)] = gregs.s9; -+ regs[REG_INDEX(R26)] = gregs.s10; -+ regs[REG_INDEX(R27)] = gregs.s11; -+ regs[REG_INDEX(R28)] = gregs.t3; -+ regs[REG_INDEX(R29)] = gregs.t4; -+ regs[REG_INDEX(R30)] = gregs.t5; -+ regs[REG_INDEX(R31)] = gregs.t6; -+ } -+#endif /* riscv64 */ + - #if defined(ppc64) || defined(ppc64le) - #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg - -diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h -index 8318e8e02..9d7fda8a6 100644 ---- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h -+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h -@@ -43,6 +43,8 @@ - #elif defined(arm) - #include - #define user_regs_struct pt_regs -+#elif defined(riscv64) -+#include - #endif - - // This C bool type must be int for compatibility with Linux calls and -diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -index de5254d85..12eafc455 100644 ---- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -@@ -134,6 +134,9 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use - #define ptrace_getregs(request, pid, addr, data) ptrace(request, pid, data, addr) - #endif - -+// riscv kernel didn't implement compat_arch_ptrace function that will handle PT_GETREGS case -+// like other platforms, so call ptrace with PTRACE_GETREGSET here. -+#ifndef riscv64 - #if defined(_LP64) && defined(PTRACE_GETREGS64) - #define PTRACE_GETREGS_REQ PTRACE_GETREGS64 - #elif defined(PTRACE_GETREGS) -@@ -141,6 +144,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use - #elif defined(PT_GETREGS) - #define PTRACE_GETREGS_REQ PT_GETREGS - #endif -+#endif - - #ifdef PTRACE_GETREGS_REQ - if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java -index 0f5f0119c..82c083055 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java -@@ -1,6 +1,7 @@ - /* - * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2021, Azul Systems, Inc. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -36,6 +37,7 @@ import sun.jvm.hotspot.debugger.MachineDescription; - import sun.jvm.hotspot.debugger.MachineDescriptionAMD64; - import sun.jvm.hotspot.debugger.MachineDescriptionPPC64; - import sun.jvm.hotspot.debugger.MachineDescriptionAArch64; -+import sun.jvm.hotspot.debugger.MachineDescriptionRISCV64; - import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; - import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit; - import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit; -@@ -592,6 +594,8 @@ public class HotSpotAgent { - machDesc = new MachineDescriptionPPC64(); - } else if (cpu.equals("aarch64")) { - machDesc = new MachineDescriptionAArch64(); -+ } else if (cpu.equals("riscv64")) { -+ machDesc = new MachineDescriptionRISCV64(); - } else if (cpu.equals("sparc")) { - if (LinuxDebuggerLocal.getAddressSize()==8) { - machDesc = new MachineDescriptionSPARC64Bit(); -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java ++ _no_of_cores = os::processor_count(); ++ _no_of_threads = _no_of_cores; ++ _no_of_sockets = _no_of_cores; ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string); ++ _initialized = true; ++} +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp new file mode 100644 -index 000000000..4221937f1 +index 00000000000..8e35530359a --- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java -@@ -0,0 +1,40 @@ ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp +@@ -0,0 +1,72 @@ +/* -+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -58178,163 +56134,61 @@ index 000000000..4221937f1 + * + */ + -+package sun.jvm.hotspot.debugger; ++#ifndef CPU_RISCV_VM_VERSION_RISCV_HPP ++#define CPU_RISCV_VM_VERSION_RISCV_HPP + -+public class MachineDescriptionRISCV64 extends MachineDescriptionTwosComplement implements MachineDescription { -+ public long getAddressSize() { -+ return 8; -+ } ++#include "runtime/abstract_vm_version.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/globals_extension.hpp" ++#include "utilities/sizes.hpp" + -+ public boolean isLP64() { -+ return true; -+ } -+ -+ public boolean isBigEndian() { -+ return false; -+ } -+} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java -index 5e5a6bb71..acd5844ca 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java -@@ -33,6 +33,7 @@ import sun.jvm.hotspot.debugger.cdbg.*; - import sun.jvm.hotspot.debugger.x86.*; - import sun.jvm.hotspot.debugger.amd64.*; - import sun.jvm.hotspot.debugger.aarch64.*; -+import sun.jvm.hotspot.debugger.riscv64.*; - import sun.jvm.hotspot.debugger.sparc.*; - import sun.jvm.hotspot.debugger.ppc64.*; - import sun.jvm.hotspot.debugger.linux.x86.*; -@@ -40,6 +41,7 @@ import sun.jvm.hotspot.debugger.linux.amd64.*; - import sun.jvm.hotspot.debugger.linux.sparc.*; - import sun.jvm.hotspot.debugger.linux.ppc64.*; - import sun.jvm.hotspot.debugger.linux.aarch64.*; -+import sun.jvm.hotspot.debugger.linux.riscv64.*; - import sun.jvm.hotspot.utilities.*; - - class LinuxCDebugger implements CDebugger { -@@ -116,7 +118,14 @@ class LinuxCDebugger implements CDebugger { - Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC); - if (pc == null) return null; - return new LinuxAARCH64CFrame(dbg, fp, pc); -- } else { -+ } else if (cpu.equals("riscv64")) { -+ RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext(); -+ Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP); -+ if (fp == null) return null; -+ Address pc = context.getRegisterAsAddress(RISCV64ThreadContext.PC); -+ if (pc == null) return null; -+ return new LinuxRISCV64CFrame(dbg, fp, pc); -+ } else { - // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu - ThreadContext context = (ThreadContext) thread.getContext(); - return context.getTopFrame(dbg); -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java -new file mode 100644 -index 000000000..eaef586b4 ---- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java -@@ -0,0 +1,90 @@ -+/* -+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+package sun.jvm.hotspot.debugger.linux.riscv64; -+ -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.debugger.riscv64.*; -+import sun.jvm.hotspot.debugger.linux.*; -+import sun.jvm.hotspot.debugger.cdbg.*; -+import sun.jvm.hotspot.debugger.cdbg.basic.*; -+ -+public final class LinuxRISCV64CFrame extends BasicCFrame { -+ private static final int C_FRAME_LINK_OFFSET = -2; -+ private static final int C_FRAME_RETURN_ADDR_OFFSET = -1; -+ -+ public LinuxRISCV64CFrame(LinuxDebugger dbg, Address fp, Address pc) { -+ super(dbg.getCDebugger()); -+ this.fp = fp; -+ this.pc = pc; -+ this.dbg = dbg; -+ } -+ -+ // override base class impl to avoid ELF parsing -+ public ClosestSymbol closestSymbolToPC() { -+ // try native lookup in debugger. -+ return dbg.lookup(dbg.getAddressValue(pc())); -+ } ++class VM_Version : public Abstract_VM_Version { ++#ifdef COMPILER2 ++private: ++ static void c2_initialize(); ++#endif // COMPILER2 + -+ public Address pc() { -+ return pc; -+ } ++protected: ++ static const char* _uarch; ++ static uint32_t _initial_vector_length; ++ static void get_os_cpu_info(); ++ static uint32_t get_current_vector_length(); + -+ public Address localVariableBase() { -+ return fp; -+ } ++public: ++ // Initialization ++ static void initialize(); + -+ public CFrame sender(ThreadProxy thread) { -+ RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext(); -+ Address rsp = context.getRegisterAsAddress(RISCV64ThreadContext.SP); ++ constexpr static bool supports_stack_watermark_barrier() { return true; } + -+ if ((fp == null) || fp.lessThan(rsp)) { -+ return null; -+ } ++ enum Feature_Flag { ++#define CPU_FEATURE_FLAGS(decl) \ ++ decl(I, "i", 8) \ ++ decl(M, "m", 12) \ ++ decl(A, "a", 0) \ ++ decl(F, "f", 5) \ ++ decl(D, "d", 3) \ ++ decl(C, "c", 2) \ ++ decl(V, "v", 21) \ ++ decl(B, "b", 1) + -+ // Check alignment of fp -+ if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) { -+ return null; -+ } ++#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit), ++ CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG) ++#undef DECLARE_CPU_FEATURE_FLAG ++ }; + -+ Address nextFP = fp.getAddressAt(C_FRAME_LINK_OFFSET * ADDRESS_SIZE); -+ if (nextFP == null || nextFP.lessThanOrEqual(fp)) { -+ return null; -+ } -+ Address nextPC = fp.getAddressAt(C_FRAME_RETURN_ADDR_OFFSET * ADDRESS_SIZE); -+ if (nextPC == null) { -+ return null; -+ } -+ return new LinuxRISCV64CFrame(dbg, nextFP, nextPC); -+ } ++ static void initialize_cpu_information(void); ++}; + -+ // package/class internals only -+ private static final int ADDRESS_SIZE = 8; -+ private Address pc; -+ private Address sp; -+ private Address fp; -+ private LinuxDebugger dbg; -+} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java ++#endif // CPU_RISCV_VM_VERSION_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp new file mode 100644 -index 000000000..4789e664c +index 00000000000..aa7222dc64a --- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java -@@ -0,0 +1,48 @@ ++++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp +@@ -0,0 +1,64 @@ +/* -+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -58357,68 +56211,54 @@ index 000000000..4789e664c + * + */ + -+package sun.jvm.hotspot.debugger.linux.riscv64; -+ -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.debugger.riscv64.*; -+import sun.jvm.hotspot.debugger.linux.*; ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "code/vmreg.hpp" + -+public class LinuxRISCV64ThreadContext extends RISCV64ThreadContext { -+ private LinuxDebugger debugger; ++void VMRegImpl::set_regName() { ++ int i = 0; ++ Register reg = ::as_Register(0); ++ for ( ; i < ConcreteRegisterImpl::max_gpr ; ) { ++ for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) { ++ regName[i++] = reg->name(); ++ } ++ reg = reg->successor(); ++ } + -+ public LinuxRISCV64ThreadContext(LinuxDebugger debugger) { -+ super(); -+ this.debugger = debugger; ++ FloatRegister freg = ::as_FloatRegister(0); ++ for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { ++ for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) { ++ regName[i++] = reg->name(); ++ } ++ freg = freg->successor(); + } + -+ public void setRegisterAsAddress(int index, Address value) { -+ setRegister(index, debugger.getAddressValue(value)); ++ VectorRegister vreg = ::as_VectorRegister(0); ++ for ( ; i < ConcreteRegisterImpl::max_vpr ; ) { ++ for (int j = 0 ; j < VectorRegisterImpl::max_slots_per_register ; j++) { ++ regName[i++] = reg->name(); ++ } ++ vreg = vreg->successor(); + } + -+ public Address getRegisterAsAddress(int index) { -+ return debugger.newAddress(getRegister(index)); ++ for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) { ++ regName[i] = "NON-GPR-FPR-VPR"; + } +} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java -index 74e957d94..1f44d75ee 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java -@@ -32,12 +32,14 @@ import sun.jvm.hotspot.debugger.*; - import sun.jvm.hotspot.debugger.cdbg.*; - import sun.jvm.hotspot.debugger.proc.amd64.*; - import sun.jvm.hotspot.debugger.proc.aarch64.*; -+import sun.jvm.hotspot.debugger.proc.riscv64.*; - import sun.jvm.hotspot.debugger.proc.sparc.*; - import sun.jvm.hotspot.debugger.proc.ppc64.*; - import sun.jvm.hotspot.debugger.proc.x86.*; - import sun.jvm.hotspot.debugger.ppc64.*; - import sun.jvm.hotspot.debugger.amd64.*; - import sun.jvm.hotspot.debugger.aarch64.*; -+import sun.jvm.hotspot.debugger.riscv64.*; - import sun.jvm.hotspot.debugger.sparc.*; - import sun.jvm.hotspot.debugger.x86.*; - import sun.jvm.hotspot.utilities.*; -@@ -94,6 +96,10 @@ public class ProcDebuggerLocal extends DebuggerBase implements ProcDebugger { - threadFactory = new ProcAARCH64ThreadFactory(this); - pcRegIndex = AARCH64ThreadContext.PC; - fpRegIndex = AARCH64ThreadContext.FP; -+ } else if (cpu.equals("riscv64")) { -+ threadFactory = new ProcRISCV64ThreadFactory(this); -+ pcRegIndex = RISCV64ThreadContext.PC; -+ fpRegIndex = RISCV64ThreadContext.FP; - } else if (cpu.equals("ppc64")) { - threadFactory = new ProcPPC64ThreadFactory(this); - pcRegIndex = PPC64ThreadContext.PC; -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java ++ ++VMReg VMRegImpl::vmStorageToVMReg(int type, int index) { ++ Unimplemented(); ++ return VMRegImpl::Bad(); ++} +diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp new file mode 100644 -index 000000000..c1cf1fb0f +index 00000000000..9e611b1f671 --- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java -@@ -0,0 +1,88 @@ ++++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp +@@ -0,0 +1,68 @@ +/* -+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -58441,132 +56281,58 @@ index 000000000..c1cf1fb0f + * + */ + -+package sun.jvm.hotspot.debugger.proc.riscv64; -+ -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.debugger.riscv64.*; -+import sun.jvm.hotspot.debugger.proc.*; -+import sun.jvm.hotspot.utilities.*; -+ -+public class ProcRISCV64Thread implements ThreadProxy { -+ private ProcDebugger debugger; -+ private int id; -+ -+ public ProcRISCV64Thread(ProcDebugger debugger, Address addr) { -+ this.debugger = debugger; -+ -+ // FIXME: the size here should be configurable. However, making it -+ // so would produce a dependency on the "types" package from the -+ // debugger package, which is not desired. -+ this.id = (int) addr.getCIntegerAt(0, 4, true); -+ } -+ -+ public ProcRISCV64Thread(ProcDebugger debugger, long id) { -+ this.debugger = debugger; -+ this.id = (int) id; -+ } -+ -+ public ThreadContext getContext() throws IllegalThreadStateException { -+ ProcRISCV64ThreadContext context = new ProcRISCV64ThreadContext(debugger); -+ long[] regs = debugger.getThreadIntegerRegisterSet(id); -+ if (Assert.ASSERTS_ENABLED) { -+ Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size mismatch"); -+ } -+ for (int i = 0; i < regs.length; i++) { -+ context.setRegister(i, regs[i]); -+ } -+ return context; -+ } -+ -+ public boolean canSetContext() throws DebuggerException { -+ return false; -+ } -+ -+ public void setContext(ThreadContext context) -+ throws IllegalThreadStateException, DebuggerException { -+ throw new DebuggerException("Unimplemented"); -+ } -+ -+ public String toString() { -+ return "t@" + id; -+ } -+ -+ public boolean equals(Object obj) { -+ if ((obj == null) || !(obj instanceof ProcRISCV64Thread)) { -+ return false; -+ } -+ -+ return (((ProcRISCV64Thread) obj).id == id); -+ } ++#ifndef CPU_RISCV_VMREG_RISCV_HPP ++#define CPU_RISCV_VMREG_RISCV_HPP + -+ public int hashCode() { -+ return id; -+ } ++inline bool is_Register() { ++ return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; +} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java -new file mode 100644 -index 000000000..498fa0dc6 ---- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ + -+package sun.jvm.hotspot.debugger.proc.riscv64; ++inline bool is_FloatRegister() { ++ return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; ++} + -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.debugger.riscv64.*; -+import sun.jvm.hotspot.debugger.proc.*; ++inline bool is_VectorRegister() { ++ return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr; ++} + -+public class ProcRISCV64ThreadContext extends RISCV64ThreadContext { -+ private ProcDebugger debugger; ++inline Register as_Register() { ++ assert(is_Register(), "must be"); ++ return ::as_Register(value() / RegisterImpl::max_slots_per_register); ++} + -+ public ProcRISCV64ThreadContext(ProcDebugger debugger) { -+ super(); -+ this.debugger = debugger; -+ } ++inline FloatRegister as_FloatRegister() { ++ assert(is_FloatRegister() && is_even(value()), "must be"); ++ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) / ++ FloatRegisterImpl::max_slots_per_register); ++} + -+ public void setRegisterAsAddress(int index, Address value) { -+ setRegister(index, debugger.getAddressValue(value)); -+ } ++inline VectorRegister as_VectorRegister() { ++ assert(is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be"); ++ return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) / ++ VectorRegisterImpl::max_slots_per_register); ++} + -+ public Address getRegisterAsAddress(int index) { -+ return debugger.newAddress(getRegister(index)); -+ } ++inline bool is_concrete() { ++ assert(is_reg(), "must be"); ++ if (is_VectorRegister()) { ++ int base = value() - ConcreteRegisterImpl::max_fpr; ++ return (base % VectorRegisterImpl::max_slots_per_register) == 0; ++ } else { ++ return is_even(value()); ++ } +} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java ++ ++#endif // CPU_RISCV_VMREG_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp new file mode 100644 -index 000000000..81afd8fdc +index 00000000000..06b70020b4b --- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java ++++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp @@ -0,0 +1,46 @@ +/* -+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -58589,36 +56355,37 @@ index 000000000..81afd8fdc + * + */ + -+package sun.jvm.hotspot.debugger.proc.riscv64; -+ -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.debugger.proc.*; -+ -+public class ProcRISCV64ThreadFactory implements ProcThreadFactory { -+ private ProcDebugger debugger; ++#ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP ++#define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP + -+ public ProcRISCV64ThreadFactory(ProcDebugger debugger) { -+ this.debugger = debugger; -+ } ++inline VMReg RegisterImpl::as_VMReg() const { ++ if (this == noreg) { ++ return VMRegImpl::Bad(); ++ } ++ return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register); ++} + -+ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { -+ return new ProcRISCV64Thread(debugger, threadIdentifierAddr); -+ } ++inline VMReg FloatRegisterImpl::as_VMReg() const { ++ return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) + ++ ConcreteRegisterImpl::max_gpr); ++} + -+ public ThreadProxy createThreadWrapper(long id) { -+ return new ProcRISCV64Thread(debugger, id); -+ } ++inline VMReg VectorRegisterImpl::as_VMReg() const { ++ return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) + ++ ConcreteRegisterImpl::max_fpr); +} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java ++ ++#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP +diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp new file mode 100644 -index 000000000..ab92e3e74 +index 00000000000..78b81138003 --- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java -@@ -0,0 +1,55 @@ ++++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp +@@ -0,0 +1,260 @@ +/* -+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -58641,99 +56408,310 @@ index 000000000..ab92e3e74 + * + */ + -+package sun.jvm.hotspot.debugger.remote.riscv64; ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "assembler_riscv.inline.hpp" ++#include "code/vtableStubs.hpp" ++#include "interp_masm_riscv.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/instanceKlass.hpp" ++#include "oops/klassVtable.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_riscv.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif + -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.debugger.riscv64.*; -+import sun.jvm.hotspot.debugger.remote.*; -+import sun.jvm.hotspot.utilities.*; ++// machine-dependent part of VtableStubs: create VtableStub of correct size and ++// initialize its code + -+public class RemoteRISCV64Thread extends RemoteThread { -+ public RemoteRISCV64Thread(RemoteDebuggerClient debugger, Address addr) { -+ super(debugger, addr); -+ } ++#define __ masm-> + -+ public RemoteRISCV64Thread(RemoteDebuggerClient debugger, long id) { -+ super(debugger, id); -+ } ++#ifndef PRODUCT ++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); ++#endif + -+ public ThreadContext getContext() throws IllegalThreadStateException { -+ RemoteRISCV64ThreadContext context = new RemoteRISCV64ThreadContext(debugger); -+ long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : -+ debugger.getThreadIntegerRegisterSet(id); -+ if (Assert.ASSERTS_ENABLED) { -+ Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size of register set must match"); -+ } -+ for (int i = 0; i < regs.length; i++) { -+ context.setRegister(i, regs[i]); -+ } -+ return context; ++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(true); ++ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; + } -+} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java -new file mode 100644 -index 000000000..1e8cd19b2 ---- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+package sun.jvm.hotspot.debugger.remote.riscv64; + -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.debugger.riscv64.*; -+import sun.jvm.hotspot.debugger.remote.*; ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc = NULL; ++ int slop_bytes = 0; ++ int slop_delta = 0; + -+public class RemoteRISCV64ThreadContext extends RISCV64ThreadContext { -+ private RemoteDebuggerClient debugger; ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ assert_cond(masm != NULL); + -+ public RemoteRISCV64ThreadContext(RemoteDebuggerClient debugger) { -+ super(); -+ this.debugger = debugger; ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ __ la(t2, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); ++ __ add_memory_int64(Address(t2), 1); + } ++#endif + -+ public void setRegisterAsAddress(int index, Address value) { -+ setRegister(index, debugger.getAddressValue(value)); -+ } ++ // get receiver (need to skip return address on top of stack) ++ assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); + -+ public Address getRegisterAsAddress(int index) { -+ return debugger.newAddress(getRegister(index)); ++ // get receiver klass ++ address npe_addr = __ pc(); ++ __ load_klass(t2, j_rarg0); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ start_pc = __ pc(); ++ ++ // check offset vs vtable length ++ __ lwu(t0, Address(t2, Klass::vtable_length_offset())); ++ __ mvw(t1, vtable_index * vtableEntry::size()); ++ __ bgt(t0, t1, L); ++ __ enter(); ++ __ mv(x12, vtable_index); ++ ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, x12); ++ const ptrdiff_t estimate = 256; ++ const ptrdiff_t codesize = __ pc() - start_pc; ++ slop_delta = estimate - codesize; // call_VM varies in length, depending on data ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize); ++ ++ __ leave(); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ ++ start_pc = __ pc(); ++ __ lookup_virtual_method(t2, vtable_index, xmethod); ++ // lookup_virtual_method generates ++ // 4 instructions (maximum value encountered in normal case):li(lui + addiw) + add + ld ++ // 1 instruction (best case):ld * 1 ++ slop_delta = 16 - (int)(__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ __ beqz(xmethod, L); ++ __ ld(t0, Address(xmethod, Method::from_compiled_offset())); ++ __ bnez(t0, L); ++ __ stop("Vtable entry is NULL"); ++ __ bind(L); + } ++#endif // PRODUCT ++ ++ // x10: receiver klass ++ // xmethod: Method* ++ // x12: receiver ++ address ame_addr = __ pc(); ++ __ ld(t0, Address(xmethod, Method::from_compiled_offset())); ++ __ jr(t0); ++ ++ masm->flush(); ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0); ++ ++ return s; +} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java ++ ++VtableStub* VtableStubs::create_itable_stub(int itable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(false); ++ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc = NULL; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ assert_cond(masm != NULL); ++ ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ __ la(x18, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); ++ __ add_memory_int64(Address(x18), 1); ++ } ++#endif ++ ++ // get receiver (need to skip return address on top of stack) ++ assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); ++ ++ // Entry arguments: ++ // t2: CompiledICHolder ++ // j_rarg0: Receiver ++ ++ // This stub is called from compiled code which has no callee-saved registers, ++ // so all registers except arguments are free at this point. ++ const Register recv_klass_reg = x18; ++ const Register holder_klass_reg = x19; // declaring interface klass (DECC) ++ const Register resolved_klass_reg = xmethod; // resolved interface klass (REFC) ++ const Register temp_reg = x28; ++ const Register temp_reg2 = x29; ++ const Register icholder_reg = t1; ++ ++ Label L_no_such_interface; ++ ++ __ ld(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset())); ++ __ ld(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset())); ++ ++ start_pc = __ pc(); ++ ++ // get receiver klass (also an implicit null-check) ++ address npe_addr = __ pc(); ++ __ load_klass(recv_klass_reg, j_rarg0); ++ ++ // Receiver subtype check against REFC. ++ __ lookup_interface_method(// inputs: rec. class, interface ++ recv_klass_reg, resolved_klass_reg, noreg, ++ // outputs: scan temp. reg1, scan temp. reg2 ++ temp_reg2, temp_reg, ++ L_no_such_interface, ++ /*return_method=*/false); ++ ++ const ptrdiff_t typecheckSize = __ pc() - start_pc; ++ start_pc = __ pc(); ++ ++ // Get selected method from declaring class and itable index ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ recv_klass_reg, holder_klass_reg, itable_index, ++ // outputs: method, scan temp. reg ++ xmethod, temp_reg, ++ L_no_such_interface); ++ ++ const ptrdiff_t lookupSize = __ pc() - start_pc; ++ ++ // Reduce "estimate" such that "padding" does not drop below 8. ++ const ptrdiff_t estimate = 256; ++ const ptrdiff_t codesize = typecheckSize + lookupSize; ++ slop_delta = (int)(estimate - codesize); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize); ++ ++#ifdef ASSERT ++ if (DebugVtables) { ++ Label L2; ++ __ beqz(xmethod, L2); ++ __ ld(t0, Address(xmethod, Method::from_compiled_offset())); ++ __ bnez(t0, L2); ++ __ stop("compiler entrypoint is null"); ++ __ bind(L2); ++ } ++#endif // ASSERT ++ ++ // xmethod: Method* ++ // j_rarg0: receiver ++ address ame_addr = __ pc(); ++ __ ld(t0, Address(xmethod, Method::from_compiled_offset())); ++ __ jr(t0); ++ ++ __ bind(L_no_such_interface); ++ // Handle IncompatibleClassChangeError in itable stubs. ++ // More detailed error message. ++ // We force resolving of the call site by jumping to the "handle ++ // wrong method" stub, and so let the interpreter runtime do all the ++ // dirty work. ++ assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order"); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); ++ ++ masm->flush(); ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); ++ ++ return s; ++} ++ ++int VtableStub::pd_code_alignment() { ++ // RISCV cache line size is not an architected constant. We just align on word size. ++ const unsigned int icache_line_size = wordSize; ++ return icache_line_size; ++} +diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +index 897be2209e2..ee298f56653 100644 +--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp ++++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +@@ -1,6 +1,6 @@ + /* +- * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2016, 2019, SAP SE. All rights reserved. ++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2016, 2019 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -1447,7 +1447,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op + } + + // result = condition ? opr1 : opr2 +-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { ++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, ++ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { ++ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on s390"); ++ + Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual; + switch (condition) { + case lir_cond_equal: acond = Assembler::bcondEqual; ncond = Assembler::bcondNotEqual; break; +diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +index cee3140f4f7..82e9de5a06f 100644 +--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp ++++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -1970,7 +1970,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + } + } + +-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { ++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, ++ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { ++ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on x86"); ++ + Assembler::Condition acond, ncond; + switch (condition) { + case lir_cond_equal: acond = Assembler::equal; ncond = Assembler::notEqual; break; +diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp +index 3799adf5dd9..6f75e623a9a 100644 +--- a/src/hotspot/os/linux/os_linux.cpp ++++ b/src/hotspot/os/linux/os_linux.cpp +@@ -2845,6 +2845,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) { + strncpy(cpuinfo, "IA64", length); + #elif defined(PPC) + strncpy(cpuinfo, "PPC64", length); ++#elif defined(RISCV) ++ strncpy(cpuinfo, "RISCV64", length); + #elif defined(S390) + strncpy(cpuinfo, "S390", length); + #elif defined(SPARC) +diff --git a/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp new file mode 100644 -index 000000000..eecb6e029 +index 00000000000..f2610af6cdd --- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java -@@ -0,0 +1,46 @@ ++++ b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp +@@ -0,0 +1,26 @@ +/* -+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -58756,36 +56734,16 @@ index 000000000..eecb6e029 + * + */ + -+package sun.jvm.hotspot.debugger.remote.riscv64; -+ -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.debugger.remote.*; -+ -+public class RemoteRISCV64ThreadFactory implements RemoteThreadFactory { -+ private RemoteDebuggerClient debugger; -+ -+ public RemoteRISCV64ThreadFactory(RemoteDebuggerClient debugger) { -+ this.debugger = debugger; -+ } -+ -+ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { -+ return new RemoteRISCV64Thread(debugger, threadIdentifierAddr); -+ } -+ -+ public ThreadProxy createThreadWrapper(long id) { -+ return new RemoteRISCV64Thread(debugger, id); -+ } -+} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java ++// nothing required here +diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp new file mode 100644 -index 000000000..426ff0580 +index 00000000000..761da5d743e --- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java -@@ -0,0 +1,172 @@ ++++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp +@@ -0,0 +1,134 @@ +/* -+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -58808,183 +56766,124 @@ index 000000000..426ff0580 + * + */ + -+package sun.jvm.hotspot.debugger.riscv64; -+ -+import java.lang.annotation.Native; -+ -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.debugger.cdbg.*; -+ -+/** Specifies the thread context on riscv64 platforms; only a sub-portion -+ * of the context is guaranteed to be present on all operating -+ * systems. */ -+ -+public abstract class RISCV64ThreadContext implements ThreadContext { -+ // Taken from /usr/include/asm/sigcontext.h on Linux/RISCV64. ++#ifndef OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP + -+ // /* -+ // * Signal context structure - contains all info to do with the state -+ // * before the signal handler was invoked. -+ // */ -+ // struct sigcontext { -+ // struct user_regs_struct sc_regs; -+ // union __riscv_fp_state sc_fpregs; -+ // }; -+ // -+ // struct user_regs_struct { -+ // unsigned long pc; -+ // unsigned long ra; -+ // unsigned long sp; -+ // unsigned long gp; -+ // unsigned long tp; -+ // unsigned long t0; -+ // unsigned long t1; -+ // unsigned long t2; -+ // unsigned long s0; -+ // unsigned long s1; -+ // unsigned long a0; -+ // unsigned long a1; -+ // unsigned long a2; -+ // unsigned long a3; -+ // unsigned long a4; -+ // unsigned long a5; -+ // unsigned long a6; -+ // unsigned long a7; -+ // unsigned long s2; -+ // unsigned long s3; -+ // unsigned long s4; -+ // unsigned long s5; -+ // unsigned long s6; -+ // unsigned long s7; -+ // unsigned long s8; -+ // unsigned long s9; -+ // unsigned long s10; -+ // unsigned long s11; -+ // unsigned long t3; -+ // unsigned long t4; -+ // unsigned long t5; -+ // unsigned long t6; -+ // }; ++#include "runtime/vm_version.hpp" + -+ // NOTE: the indices for the various registers must be maintained as -+ // listed across various operating systems. However, only a small -+ // subset of the registers' values are guaranteed to be present (and -+ // must be present for the SA's stack walking to work) ++// Implementation of class atomic + -+ // One instance of the Native annotation is enough to trigger header generation -+ // for this file. -+ @Native -+ public static final int R0 = 0; -+ public static final int R1 = 1; -+ public static final int R2 = 2; -+ public static final int R3 = 3; -+ public static final int R4 = 4; -+ public static final int R5 = 5; -+ public static final int R6 = 6; -+ public static final int R7 = 7; -+ public static final int R8 = 8; -+ public static final int R9 = 9; -+ public static final int R10 = 10; -+ public static final int R11 = 11; -+ public static final int R12 = 12; -+ public static final int R13 = 13; -+ public static final int R14 = 14; -+ public static final int R15 = 15; -+ public static final int R16 = 16; -+ public static final int R17 = 17; -+ public static final int R18 = 18; -+ public static final int R19 = 19; -+ public static final int R20 = 20; -+ public static final int R21 = 21; -+ public static final int R22 = 22; -+ public static final int R23 = 23; -+ public static final int R24 = 24; -+ public static final int R25 = 25; -+ public static final int R26 = 26; -+ public static final int R27 = 27; -+ public static final int R28 = 28; -+ public static final int R29 = 29; -+ public static final int R30 = 30; -+ public static final int R31 = 31; ++// Note that memory_order_conservative requires a full barrier after atomic stores. ++// See https://patchwork.kernel.org/patch/3575821/ + -+ public static final int NPRGREG = 32; ++template ++struct Atomic::PlatformAdd { ++ template ++ D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { ++ D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE); ++ FULL_MEM_BARRIER; ++ return res; ++ } + -+ public static final int PC = R0; -+ public static final int LR = R1; -+ public static final int SP = R2; -+ public static final int FP = R8; ++ template ++ D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const { ++ return add_and_fetch(dest, add_value, order) - add_value; ++ } ++}; + -+ private long[] data; ++template ++template ++inline T Atomic::PlatformXchg::operator()(T volatile* dest, ++ T exchange_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(byte_size == sizeof(T)); ++ T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE); ++ FULL_MEM_BARRIER; ++ return res; ++} + -+ public RISCV64ThreadContext() { -+ data = new long[NPRGREG]; -+ } ++// __attribute__((unused)) on dest is to get rid of spurious GCC warnings. ++template ++template ++inline T Atomic::PlatformCmpxchg::operator()(T volatile* dest __attribute__((unused)), ++ T compare_value, ++ T exchange_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(byte_size == sizeof(T)); ++ T value = compare_value; ++ if (order != memory_order_relaxed) { ++ FULL_MEM_BARRIER; ++ } + -+ public int getNumRegisters() { -+ return NPRGREG; -+ } ++ __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */ false, ++ __ATOMIC_RELAXED, __ATOMIC_RELAXED); + -+ public String getRegisterName(int index) { -+ switch (index) { -+ case LR: return "lr"; -+ case SP: return "sp"; -+ case PC: return "pc"; -+ default: -+ return "r" + index; -+ } -+ } ++ if (order != memory_order_relaxed) { ++ FULL_MEM_BARRIER; ++ } ++ return value; ++} + -+ public void setRegister(int index, long value) { -+ data[index] = value; -+ } ++template<> ++template ++inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((unused)), ++ T compare_value, ++ T exchange_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(4 == sizeof(T)); ++ if (order != memory_order_relaxed) { ++ FULL_MEM_BARRIER; ++ } ++ T rv; ++ int tmp; ++ __asm volatile( ++ "1:\n\t" ++ " addiw %[tmp], %[cv], 0\n\t" // make sure compare_value signed_extend ++ " lr.w.aq %[rv], (%[dest])\n\t" ++ " bne %[rv], %[tmp], 2f\n\t" ++ " sc.w.rl %[tmp], %[ev], (%[dest])\n\t" ++ " bnez %[tmp], 1b\n\t" ++ "2:\n\t" ++ : [rv] "=&r" (rv), [tmp] "=&r" (tmp) ++ : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value) ++ : "memory"); ++ if (order != memory_order_relaxed) { ++ FULL_MEM_BARRIER; ++ } ++ return rv; ++} + -+ public long getRegister(int index) { -+ return data[index]; -+ } ++template ++struct Atomic::PlatformOrderedLoad ++{ ++ template ++ T operator()(const volatile T* p) const { T data; __atomic_load(const_cast(p), &data, __ATOMIC_ACQUIRE); return data; } ++}; + -+ public CFrame getTopFrame(Debugger dbg) { -+ return null; -+ } ++template ++struct Atomic::PlatformOrderedStore ++{ ++ template ++ void operator()(volatile T* p, T v) const { __atomic_store(const_cast(p), &v, __ATOMIC_RELEASE); } ++}; + -+ /** This can't be implemented in this class since we would have to -+ * tie the implementation to, for example, the debugging system */ -+ public abstract void setRegisterAsAddress(int index, Address value); ++template ++struct Atomic::PlatformOrderedStore ++{ ++ template ++ void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); } ++}; + -+ /** This can't be implemented in this class since we would have to -+ * tie the implementation to, for example, the debugging system */ -+ public abstract Address getRegisterAsAddress(int index); -+} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java -index 190062785..74bd614d3 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java -@@ -38,6 +38,7 @@ import sun.jvm.hotspot.runtime.win32_aarch64.Win32AARCH64JavaThreadPDAccess; - import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess; - import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; - import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; -+import sun.jvm.hotspot.runtime.linux_riscv64.LinuxRISCV64JavaThreadPDAccess; - import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess; - import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess; - import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; -@@ -99,6 +100,8 @@ public class Threads { - access = new LinuxPPC64JavaThreadPDAccess(); - } else if (cpu.equals("aarch64")) { - access = new LinuxAARCH64JavaThreadPDAccess(); -+ } else if (cpu.equals("riscv64")) { -+ access = new LinuxRISCV64JavaThreadPDAccess(); - } else { - try { - access = (JavaThreadPDAccess) -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java ++#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp new file mode 100644 -index 000000000..2df0837b6 +index 00000000000..28868c76406 --- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java -@@ -0,0 +1,132 @@ ++++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp +@@ -0,0 +1,45 @@ +/* -+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -59007,122 +56906,120 @@ index 000000000..2df0837b6 + * + */ + -+package sun.jvm.hotspot.runtime.linux_riscv64; ++#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP + -+import java.io.*; -+import java.util.*; -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.debugger.riscv64.*; -+import sun.jvm.hotspot.runtime.*; -+import sun.jvm.hotspot.runtime.riscv64.*; -+import sun.jvm.hotspot.types.*; -+import sun.jvm.hotspot.utilities.*; ++#include + -+public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess { -+ private static AddressField lastJavaFPField; -+ private static AddressField osThreadField; ++// Efficient swapping of data bytes from Java byte ++// ordering to native byte ordering and vice versa. ++inline u2 Bytes::swap_u2(u2 x) { ++ return bswap_16(x); ++} + -+ // Field from OSThread -+ private static CIntegerField osThreadThreadIDField; ++inline u4 Bytes::swap_u4(u4 x) { ++ return bswap_32(x); ++} + -+ // This is currently unneeded but is being kept in case we change -+ // the currentFrameGuess algorithm -+ private static final long GUESS_SCAN_RANGE = 128 * 1024; -+ -+ static { -+ VM.registerVMInitializedObserver(new Observer() { -+ public void update(Observable o, Object data) { -+ initialize(VM.getVM().getTypeDataBase()); -+ } -+ }); -+ } -+ -+ private static synchronized void initialize(TypeDataBase db) { -+ Type type = db.lookupType("JavaThread"); -+ osThreadField = type.getAddressField("_osthread"); -+ -+ Type anchorType = db.lookupType("JavaFrameAnchor"); -+ lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); -+ -+ Type osThreadType = db.lookupType("OSThread"); -+ osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); -+ } -+ -+ public Address getLastJavaFP(Address addr) { -+ return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); -+ } -+ -+ public Address getLastJavaPC(Address addr) { -+ return null; -+ } ++inline u8 Bytes::swap_u8(u8 x) { ++ return bswap_64(x); ++} + -+ public Address getBaseOfStackPointer(Address addr) { -+ return null; -+ } ++#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp +new file mode 100644 +index 00000000000..147cfdf3c10 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ public Frame getLastFramePD(JavaThread thread, Address addr) { -+ Address fp = thread.getLastJavaFP(); -+ if (fp == null) { -+ return null; // no information -+ } -+ return new RISCV64Frame(thread.getLastJavaSP(), fp); -+ } ++#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP + -+ public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { -+ return new RISCV64RegisterMap(thread, updateMap); -+ } ++// Empty for build system + -+ public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { -+ ThreadProxy t = getThreadProxy(addr); -+ RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext(); -+ RISCV64CurrentFrameGuess guesser = new RISCV64CurrentFrameGuess(context, thread); -+ if (!guesser.run(GUESS_SCAN_RANGE)) { -+ return null; -+ } -+ if (guesser.getPC() == null) { -+ return new RISCV64Frame(guesser.getSP(), guesser.getFP()); -+ } else { -+ return new RISCV64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); -+ } -+ } ++#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp +new file mode 100644 +index 00000000000..1aa58f27871 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ public void printThreadIDOn(Address addr, PrintStream tty) { -+ tty.print(getThreadProxy(addr)); -+ } ++#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP + -+ public void printInfoOn(Address threadAddr, PrintStream tty) { -+ tty.print("Thread id: "); -+ printThreadIDOn(threadAddr, tty); -+ } ++#include + -+ public Address getLastSP(Address addr) { -+ ThreadProxy t = getThreadProxy(addr); -+ RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext(); -+ return context.getRegisterAsAddress(RISCV64ThreadContext.SP); -+ } ++// ++// Support for building on older Linux systems ++// + -+ public ThreadProxy getThreadProxy(Address addr) { -+ // Addr is the address of the JavaThread. -+ // Fetch the OSThread (for now and for simplicity, not making a -+ // separate "OSThread" class in this package) -+ Address osThreadAddr = osThreadField.getValue(addr); -+ // Get the address of the _thread_id from the OSThread -+ Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); ++#ifndef SYS_memfd_create ++#define SYS_memfd_create 279 ++#endif ++#ifndef SYS_fallocate ++#define SYS_fallocate 47 ++#endif + -+ JVMDebugger debugger = VM.getVM().getDebugger(); -+ return debugger.getThreadForIdentifierAddress(threadIdAddr); -+ } -+} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java ++#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp new file mode 100644 -index 000000000..a3bbf1ad1 +index 00000000000..297414bfcd5 --- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java -@@ -0,0 +1,223 @@ ++++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp +@@ -0,0 +1,43 @@ +/* -+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2019, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -59145,213 +57042,102 @@ index 000000000..a3bbf1ad1 + * + */ + -+package sun.jvm.hotspot.runtime.riscv64; -+ -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.debugger.riscv64.*; -+import sun.jvm.hotspot.code.*; -+import sun.jvm.hotspot.interpreter.*; -+import sun.jvm.hotspot.runtime.*; -+import sun.jvm.hotspot.runtime.riscv64.*; -+ -+/**

Should be able to be used on all riscv64 platforms we support -+ (Linux/riscv64) to implement JavaThread's "currentFrameGuess()" -+ functionality. Input is an RISCV64ThreadContext; output is SP, FP, -+ and PC for an RISCV64Frame. Instantiation of the RISCV64Frame is -+ left to the caller, since we may need to subclass RISCV64Frame to -+ support signal handler frames on Unix platforms.

-+ -+

Algorithm is to walk up the stack within a given range (say, -+ 512K at most) looking for a plausible PC and SP for a Java frame, -+ also considering those coming in from the context. If we find a PC -+ that belongs to the VM (i.e., in generated code like the -+ interpreter or CodeCache) then we try to find an associated FP. -+ We repeat this until we either find a complete frame or run out of -+ stack to look at.

*/ -+ -+public class RISCV64CurrentFrameGuess { -+ private RISCV64ThreadContext context; -+ private JavaThread thread; -+ private Address spFound; -+ private Address fpFound; -+ private Address pcFound; ++#ifndef OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP + -+ private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.riscv64.RISCV64Frame.DEBUG") -+ != null; ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) + -+ public RISCV64CurrentFrameGuess(RISCV64ThreadContext context, -+ JavaThread thread) { -+ this.context = context; -+ this.thread = thread; -+ } ++define_pd_global(bool, DontYieldALot, false); ++define_pd_global(intx, ThreadStackSize, 2048); // 0 => use system default ++define_pd_global(intx, VMThreadStackSize, 2048); + -+ /** Returns false if not able to find a frame within a reasonable range. */ -+ public boolean run(long regionInBytesToSearch) { -+ Address sp = context.getRegisterAsAddress(RISCV64ThreadContext.SP); -+ Address pc = context.getRegisterAsAddress(RISCV64ThreadContext.PC); -+ Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP); -+ if (sp == null) { -+ // Bail out if no last java frame either -+ if (thread.getLastJavaSP() != null) { -+ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); -+ return true; -+ } -+ return false; -+ } -+ Address end = sp.addOffsetTo(regionInBytesToSearch); -+ VM vm = VM.getVM(); ++define_pd_global(intx, CompilerThreadStackSize, 2048); + -+ setValues(null, null, null); // Assume we're not going to find anything ++define_pd_global(uintx, JVMInvokeMethodSlack, 8192); + -+ if (vm.isJavaPCDbg(pc)) { -+ if (vm.isClientCompiler()) { -+ // If the topmost frame is a Java frame, we are (pretty much) -+ // guaranteed to have a viable FP. We should be more robust -+ // than this (we have the potential for losing entire threads' -+ // stack traces) but need to see how much work we really have -+ // to do here. Searching the stack for an (SP, FP) pair is -+ // hard since it's easy to misinterpret inter-frame stack -+ // pointers as base-of-frame pointers; we also don't know the -+ // sizes of C1 frames (not registered in the nmethod) so can't -+ // derive them from SP. ++// Used on 64 bit platforms for UseCompressedOops base address ++define_pd_global(uintx, HeapBaseMinAddress, 2 * G); + -+ setValues(sp, fp, pc); -+ return true; -+ } else { -+ if (vm.getInterpreter().contains(pc)) { -+ if (DEBUG) { -+ System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + -+ sp + ", fp = " + fp + ", pc = " + pc); -+ } -+ setValues(sp, fp, pc); -+ return true; -+ } ++#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp +new file mode 100644 +index 00000000000..1c33dc1e87f +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp +@@ -0,0 +1,63 @@ ++/* ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // For the server compiler, FP is not guaranteed to be valid -+ // for compiled code. In addition, an earlier attempt at a -+ // non-searching algorithm (see below) failed because the -+ // stack pointer from the thread context was pointing -+ // (considerably) beyond the ostensible end of the stack, into -+ // garbage; walking from the topmost frame back caused a crash. -+ // -+ // This algorithm takes the current PC as a given and tries to -+ // find the correct corresponding SP by walking up the stack -+ // and repeatedly performing stackwalks (very inefficient). -+ // -+ // FIXME: there is something wrong with stackwalking across -+ // adapter frames...this is likely to be the root cause of the -+ // failure with the simpler algorithm below. ++#ifndef OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP + -+ for (long offset = 0; -+ offset < regionInBytesToSearch; -+ offset += vm.getAddressSize()) { -+ try { -+ Address curSP = sp.addOffsetTo(offset); -+ Frame frame = new RISCV64Frame(curSP, null, pc); -+ RegisterMap map = thread.newRegisterMap(false); -+ while (frame != null) { -+ if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { -+ // We were able to traverse all the way to the -+ // bottommost Java frame. -+ // This sp looks good. Keep it. -+ if (DEBUG) { -+ System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); -+ } -+ setValues(curSP, null, pc); -+ return true; -+ } -+ frame = frame.sender(map); -+ } -+ } catch (Exception e) { -+ if (DEBUG) { -+ System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); -+ } -+ // Bad SP. Try another. -+ } -+ } ++// Included in orderAccess.hpp header file. + -+ // Were not able to find a plausible SP to go with this PC. -+ // Bail out. -+ return false; -+ } -+ } else { -+ // If the current program counter was not known to us as a Java -+ // PC, we currently assume that we are in the run-time system -+ // and attempt to look to thread-local storage for saved SP and -+ // FP. Note that if these are null (because we were, in fact, -+ // in Java code, i.e., vtable stubs or similar, and the SA -+ // didn't have enough insight into the target VM to understand -+ // that) then we are going to lose the entire stack trace for -+ // the thread, which is sub-optimal. FIXME. ++#include "runtime/vm_version.hpp" + -+ if (DEBUG) { -+ System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + -+ thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); -+ } -+ if (thread.getLastJavaSP() == null) { -+ return false; // No known Java frames on stack -+ } ++// Implementation of class OrderAccess. + -+ // The runtime has a nasty habit of not saving fp in the frame -+ // anchor, leaving us to grovel about in the stack to find a -+ // plausible address. Fortunately, this only happens in -+ // compiled code; there we always have a valid PC, and we always -+ // push LR and FP onto the stack as a pair, with FP at the lower -+ // address. -+ pc = thread.getLastJavaPC(); -+ fp = thread.getLastJavaFP(); -+ sp = thread.getLastJavaSP(); ++inline void OrderAccess::loadload() { acquire(); } ++inline void OrderAccess::storestore() { release(); } ++inline void OrderAccess::loadstore() { acquire(); } ++inline void OrderAccess::storeload() { fence(); } + -+ if (fp == null) { -+ CodeCache cc = vm.getCodeCache(); -+ if (cc.contains(pc)) { -+ CodeBlob cb = cc.findBlob(pc); -+ if (DEBUG) { -+ System.out.println("FP is null. Found blob frame size " + cb.getFrameSize()); -+ } -+ // See if we can derive a frame pointer from SP and PC -+ long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize(); -+ if (link_offset >= 0) { -+ fp = sp.addOffsetTo(link_offset); -+ } -+ } -+ } ++#define FULL_MEM_BARRIER __sync_synchronize() ++#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); ++#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); + -+ // We found a PC in the frame anchor. Check that it's plausible, and -+ // if it is, use it. -+ if (vm.isJavaPCDbg(pc)) { -+ setValues(sp, fp, pc); -+ } else { -+ setValues(sp, fp, null); -+ } ++inline void OrderAccess::acquire() { ++ READ_MEM_BARRIER; ++} + -+ return true; -+ } -+ } ++inline void OrderAccess::release() { ++ WRITE_MEM_BARRIER; ++} + -+ public Address getSP() { return spFound; } -+ public Address getFP() { return fpFound; } -+ /** May be null if getting values from thread-local storage; take -+ care to call the correct RISCV64Frame constructor to recover this if -+ necessary */ -+ public Address getPC() { return pcFound; } ++inline void OrderAccess::fence() { ++ FULL_MEM_BARRIER; ++} + -+ private void setValues(Address sp, Address fp, Address pc) { -+ spFound = sp; -+ fpFound = fp; -+ pcFound = pc; ++inline void OrderAccess::cross_modify_fence_impl() { ++ asm volatile("fence.i" : : : "memory"); ++ if (UseConservativeFence) { ++ asm volatile("fence ir, ir" : : : "memory"); + } +} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java ++ ++#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp new file mode 100644 -index 000000000..c04def5a1 +index 00000000000..1f46bbab0a2 --- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java -@@ -0,0 +1,554 @@ ++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +@@ -0,0 +1,466 @@ +/* -+ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2019, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -59374,544 +57160,23387 @@ index 000000000..c04def5a1 + * + */ + -+package sun.jvm.hotspot.runtime.riscv64; ++// no precompiled headers ++#include "asm/macroAssembler.hpp" ++#include "classfile/vmSymbols.hpp" ++#include "code/codeCache.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nativeInst.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "jvm.h" ++#include "memory/allocation.inline.hpp" ++#include "os_share_linux.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/java.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/osThread.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/timer.hpp" ++#include "signals_posix.hpp" ++#include "utilities/debug.hpp" ++#include "utilities/events.hpp" ++#include "utilities/vmError.hpp" + -+import java.util.*; -+import sun.jvm.hotspot.code.*; -+import sun.jvm.hotspot.compiler.*; -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.oops.*; -+import sun.jvm.hotspot.runtime.*; -+import sun.jvm.hotspot.types.*; -+import sun.jvm.hotspot.utilities.*; ++// put OS-includes here ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include + -+/** Specialization of and implementation of abstract methods of the -+ Frame class for the riscv64 family of CPUs. */ ++#define REG_LR 1 ++#define REG_FP 8 + -+public class RISCV64Frame extends Frame { -+ private static final boolean DEBUG; -+ static { -+ DEBUG = System.getProperty("sun.jvm.hotspot.runtime.RISCV64.RISCV64Frame.DEBUG") != null; -+ } ++NOINLINE address os::current_stack_pointer() { ++ return (address)__builtin_frame_address(0); ++} + -+ // Java frames -+ private static final int LINK_OFFSET = -2; -+ private static final int RETURN_ADDR_OFFSET = -1; -+ private static final int SENDER_SP_OFFSET = 0; ++char* os::non_memory_address_word() { ++ // Must never look like an address returned by reserve_memory, ++ return (char*) -1; ++} + -+ // Interpreter frames -+ private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -3; -+ private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; -+ private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; -+ private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only -+ private static int INTERPRETER_FRAME_PADDING_OFFSET; -+ private static int INTERPRETER_FRAME_MIRROR_OFFSET; -+ private static int INTERPRETER_FRAME_CACHE_OFFSET; -+ private static int INTERPRETER_FRAME_LOCALS_OFFSET; -+ private static int INTERPRETER_FRAME_BCX_OFFSET; -+ private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET; -+ private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET; -+ private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET; ++address os::Posix::ucontext_get_pc(const ucontext_t * uc) { ++ return (address)uc->uc_mcontext.__gregs[REG_PC]; ++} + -+ // Entry frames -+ private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -10; ++void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) { ++ uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc; ++} + -+ // Native frames -+ private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET = 2; ++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP]; ++} + -+ private static VMReg fp = new VMReg(8); ++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; ++} + -+ static { -+ VM.registerVMInitializedObserver(new Observer() { -+ public void update(Observable o, Object data) { -+ initialize(VM.getVM().getTypeDataBase()); -+ } -+ }); -+ } ++address os::fetch_frame_from_context(const void* ucVoid, ++ intptr_t** ret_sp, intptr_t** ret_fp) { ++ address epc; ++ const ucontext_t* uc = (const ucontext_t*)ucVoid; + -+ private static synchronized void initialize(TypeDataBase db) { -+ INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; -+ INTERPRETER_FRAME_PADDING_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; -+ INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_PADDING_OFFSET - 1; -+ INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; -+ INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; -+ INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; -+ INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; -+ INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; -+ INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; -+ } -+ -+ -+ // an additional field beyond sp and pc: -+ Address raw_fp; // frame pointer -+ private Address raw_unextendedSP; -+ -+ private RISCV64Frame() { -+ } -+ -+ private void adjustForDeopt() { -+ if ( pc != null) { -+ // Look for a deopt pc and if it is deopted convert to original pc -+ CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); -+ if (cb != null && cb.isJavaMethod()) { -+ NMethod nm = (NMethod) cb; -+ if (pc.equals(nm.deoptHandlerBegin())) { -+ if (Assert.ASSERTS_ENABLED) { -+ Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); -+ } -+ // adjust pc if frame is deoptimized. -+ pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); -+ deoptimized = true; -+ } -+ } ++ if (uc != NULL) { ++ epc = os::Posix::ucontext_get_pc(uc); ++ if (ret_sp != NULL) { ++ *ret_sp = os::Linux::ucontext_get_sp(uc); + } -+ } -+ -+ public RISCV64Frame(Address raw_sp, Address raw_fp, Address pc) { -+ this.raw_sp = raw_sp; -+ this.raw_unextendedSP = raw_sp; -+ this.raw_fp = raw_fp; -+ this.pc = pc; -+ adjustUnextendedSP(); -+ -+ // Frame must be fully constructed before this call -+ adjustForDeopt(); -+ -+ if (DEBUG) { -+ System.out.println("RISCV64Frame(sp, fp, pc): " + this); -+ dumpStack(); ++ if (ret_fp != NULL) { ++ *ret_fp = os::Linux::ucontext_get_fp(uc); + } -+ } -+ -+ public RISCV64Frame(Address raw_sp, Address raw_fp) { -+ this.raw_sp = raw_sp; -+ this.raw_unextendedSP = raw_sp; -+ this.raw_fp = raw_fp; -+ -+ // We cannot assume SP[-1] always contains a valid return PC (e.g. if -+ // the callee is a C/C++ compiled frame). If the PC is not known to -+ // Java then this.pc is null. -+ Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); -+ if (VM.getVM().isJavaPCDbg(savedPC)) { -+ this.pc = savedPC; ++ } else { ++ epc = NULL; ++ if (ret_sp != NULL) { ++ *ret_sp = (intptr_t *)NULL; + } -+ -+ adjustUnextendedSP(); -+ -+ // Frame must be fully constructed before this call -+ adjustForDeopt(); -+ -+ if (DEBUG) { -+ System.out.println("RISCV64Frame(sp, fp): " + this); -+ dumpStack(); ++ if (ret_fp != NULL) { ++ *ret_fp = (intptr_t *)NULL; + } + } + -+ public RISCV64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { -+ this.raw_sp = raw_sp; -+ this.raw_unextendedSP = raw_unextendedSp; -+ this.raw_fp = raw_fp; -+ this.pc = pc; -+ adjustUnextendedSP(); -+ -+ // Frame must be fully constructed before this call -+ adjustForDeopt(); -+ -+ if (DEBUG) { -+ System.out.println("RISCV64Frame(sp, unextendedSP, fp, pc): " + this); -+ dumpStack(); -+ } ++ return epc; ++} + -+ } ++frame os::fetch_compiled_frame_from_context(const void* ucVoid) { ++ const ucontext_t* uc = (const ucontext_t*)ucVoid; ++ // In compiled code, the stack banging is performed before RA ++ // has been saved in the frame. RA is live, and SP and FP ++ // belong to the caller. ++ intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc); ++ intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc); ++ address frame_pc = (address)(uc->uc_mcontext.__gregs[REG_LR] ++ - NativeInstruction::instruction_size); ++ return frame(frame_sp, frame_fp, frame_pc); ++} + -+ public Object clone() { -+ RISCV64Frame frame = new RISCV64Frame(); -+ frame.raw_sp = raw_sp; -+ frame.raw_unextendedSP = raw_unextendedSP; -+ frame.raw_fp = raw_fp; -+ frame.pc = pc; -+ frame.deoptimized = deoptimized; -+ return frame; -+ } ++frame os::fetch_frame_from_context(const void* ucVoid) { ++ intptr_t* frame_sp = NULL; ++ intptr_t* frame_fp = NULL; ++ address epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp); ++ return frame(frame_sp, frame_fp, epc); ++} + -+ public boolean equals(Object arg) { -+ if (arg == null) { -+ return false; -+ } ++// By default, gcc always saves frame pointer rfp on this stack. This ++// may get turned off by -fomit-frame-pointer. ++frame os::get_sender_for_C_frame(frame* fr) { ++ return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); ++} + -+ if (!(arg instanceof RISCV64Frame)) { -+ return false; ++NOINLINE frame os::current_frame() { ++ intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0); ++ if (sender_sp != NULL) { ++ frame myframe((intptr_t*)os::current_stack_pointer(), ++ sender_sp[frame::link_offset], ++ CAST_FROM_FN_PTR(address, os::current_frame)); ++ if (os::is_first_C_frame(&myframe)) { ++ // stack is not walkable ++ return frame(); ++ } else { ++ return os::get_sender_for_C_frame(&myframe); + } -+ -+ RISCV64Frame other = (RISCV64Frame) arg; -+ -+ return (AddressOps.equal(getSP(), other.getSP()) && -+ AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && -+ AddressOps.equal(getFP(), other.getFP()) && -+ AddressOps.equal(getPC(), other.getPC())); ++ } else { ++ ShouldNotReachHere(); ++ return frame(); + } ++} + -+ public int hashCode() { -+ if (raw_sp == null) { -+ return 0; -+ } ++// Utility functions ++bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, ++ ucontext_t* uc, JavaThread* thread) { + -+ return raw_sp.hashCode(); -+ } ++ // decide if this trap can be handled by a stub ++ address stub = NULL; + -+ public String toString() { -+ return "sp: " + (getSP() == null? "null" : getSP().toString()) + -+ ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + -+ ", fp: " + (getFP() == null? "null" : getFP().toString()) + -+ ", pc: " + (pc == null? "null" : pc.toString()); -+ } ++ address pc = NULL; + -+ // accessors for the instance variables -+ public Address getFP() { return raw_fp; } -+ public Address getSP() { return raw_sp; } -+ public Address getID() { return raw_sp; } ++ //%note os_trap_1 ++ if (info != NULL && uc != NULL && thread != NULL) { ++ pc = (address) os::Posix::ucontext_get_pc(uc); + -+ // FIXME: not implemented yet -+ public boolean isSignalHandlerFrameDbg() { return false; } -+ public int getSignalNumberDbg() { return 0; } -+ public String getSignalNameDbg() { return null; } ++ address addr = (address) info->si_addr; + -+ public boolean isInterpretedFrameValid() { -+ if (Assert.ASSERTS_ENABLED) { -+ Assert.that(isInterpretedFrame(), "Not an interpreted frame"); ++ // Make sure the high order byte is sign extended, as it may be masked away by the hardware. ++ if ((uintptr_t(addr) & (uintptr_t(1) << 55)) != 0) { ++ addr = address(uintptr_t(addr) | (uintptr_t(0xFF) << 56)); + } + -+ // These are reasonable sanity checks -+ if (getFP() == null || getFP().andWithMask(0x3) != null) { -+ return false; ++ // Handle ALL stack overflow variations here ++ if (sig == SIGSEGV) { ++ // check if fault address is within thread stack ++ if (thread->is_in_full_stack(addr)) { ++ if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) { ++ return true; // continue ++ } ++ } + } + -+ if (getSP() == null || getSP().andWithMask(0x3) != null) { -+ return false; -+ } ++ if (thread->thread_state() == _thread_in_Java) { ++ // Java thread running in Java code => find exception handler if any ++ // a fault inside compiled code, the interpreter, or a stub + -+ if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { -+ return false; ++ // Handle signal from NativeJump::patch_verified_entry(). ++ if ((sig == SIGILL || sig == SIGTRAP) ++ && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { ++ if (TraceTraps) { ++ tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL"); ++ } ++ stub = SharedRuntime::get_handle_wrong_method_stub(); ++ } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) { ++ stub = SharedRuntime::get_poll_stub(pc); ++ } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { ++ // BugId 4454115: A read from a MappedByteBuffer can fault ++ // here if the underlying file has been truncated. ++ // Do not crash the VM in such a case. ++ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); ++ CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; ++ bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); ++ if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { ++ address next_pc = pc + NativeCall::instruction_size; ++ if (is_unsafe_arraycopy) { ++ next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); ++ } ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) { ++ // Pull a pointer to the error message out of the instruction ++ // stream. ++ const uint64_t *detail_msg_ptr ++ = (uint64_t*)(pc + NativeInstruction::instruction_size); ++ const char *detail_msg = (const char *)*detail_msg_ptr; ++ const char *msg = "stop"; ++ if (TraceTraps) { ++ tty->print_cr("trap: %s: (SIGILL)", msg); ++ } ++ ++ // End life with a fatal error, message and detail message and the context. ++ // Note: no need to do any post-processing here (e.g. signal chaining) ++ va_list va_dummy; ++ VMError::report_and_die(thread, uc, NULL, 0, msg, detail_msg, va_dummy); ++ va_end(va_dummy); ++ ++ ShouldNotReachHere(); ++ } else if (sig == SIGFPE && ++ (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { ++ stub = ++ SharedRuntime:: ++ continuation_for_implicit_exception(thread, ++ pc, ++ SharedRuntime:: ++ IMPLICIT_DIVIDE_BY_ZERO); ++ } else if (sig == SIGSEGV && ++ MacroAssembler::uses_implicit_null_check((void*)addr)) { ++ // Determination of interpreter/vtable stub/compiled code null exception ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); ++ } ++ } else if ((thread->thread_state() == _thread_in_vm || ++ thread->thread_state() == _thread_in_native) && ++ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ ++ thread->doing_unsafe_access()) { ++ address next_pc = pc + NativeCall::instruction_size; ++ if (UnsafeCopyMemory::contains_pc(pc)) { ++ next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); ++ } ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } + -+ // These are hacks to keep us out of trouble. -+ // The problem with these is that they mask other problems -+ if (getFP().lessThanOrEqual(getSP())) { -+ // this attempts to deal with unsigned comparison above -+ return false; ++ // jni_fast_GetField can trap at certain pc's if a GC kicks in ++ // and the heap gets shrunk before the field access. ++ if ((sig == SIGSEGV) || (sig == SIGBUS)) { ++ address addr_slow = JNI_FastGetField::find_slowcase_pc(pc); ++ if (addr_slow != (address)-1) { ++ stub = addr_slow; ++ } + } ++ } + -+ if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { -+ // stack frames shouldn't be large. -+ return false; ++ if (stub != NULL) { ++ // save all thread context in case we need to restore it ++ if (thread != NULL) { ++ thread->set_saved_exception_pc(pc); + } + ++ os::Posix::ucontext_set_pc(uc, stub); + return true; + } + -+ public Frame sender(RegisterMap regMap, CodeBlob cb) { -+ RISCV64RegisterMap map = (RISCV64RegisterMap) regMap; ++ return false; // Mute compiler ++} + -+ if (Assert.ASSERTS_ENABLED) { -+ Assert.that(map != null, "map must be set"); -+ } ++void os::Linux::init_thread_fpu_state(void) { ++} + -+ // Default is we done have to follow them. The sender_for_xxx will -+ // update it accordingly -+ map.setIncludeArgumentOops(false); ++int os::Linux::get_fpu_control_word(void) { ++ return 0; ++} + -+ if (isEntryFrame()) return senderForEntryFrame(map); -+ if (isInterpretedFrame()) return senderForInterpreterFrame(map); ++void os::Linux::set_fpu_control_word(int fpu_control) { ++} + -+ if(cb == null) { -+ cb = VM.getVM().getCodeCache().findBlob(getPC()); -+ } else { -+ if (Assert.ASSERTS_ENABLED) { -+ Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); -+ } -+ } ++//////////////////////////////////////////////////////////////////////////////// ++// thread stack + -+ if (cb != null) { -+ return senderForCompiledFrame(map, cb); -+ } ++// Minimum usable stack sizes required to get to user code. Space for ++// HotSpot guard pages is added later. ++size_t os::Posix::_compiler_thread_min_stack_allowed = 72 * K; ++size_t os::Posix::_java_thread_min_stack_allowed = 72 * K; ++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 72 * K; + -+ // Must be native-compiled frame, i.e. the marshaling code for native -+ // methods that exists in the core system. -+ return new RISCV64Frame(getSenderSP(), getLink(), getSenderPC()); -+ } ++// return default stack size for thr_type ++size_t os::Posix::default_stack_size(os::ThreadType thr_type) { ++ // default stack size (compiler thread needs larger stack) ++ size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M); ++ return s; ++} + -+ private Frame senderForEntryFrame(RISCV64RegisterMap map) { -+ if (DEBUG) { -+ System.out.println("senderForEntryFrame"); -+ } -+ if (Assert.ASSERTS_ENABLED) { -+ Assert.that(map != null, "map must be set"); -+ } -+ // Java frame called from C; skip all C frames and return top C -+ // frame of that chunk as the sender -+ RISCV64JavaCallWrapper jcw = (RISCV64JavaCallWrapper) getEntryFrameCallWrapper(); -+ if (Assert.ASSERTS_ENABLED) { -+ Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); -+ Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); -+ } -+ RISCV64Frame fr; -+ if (jcw.getLastJavaPC() != null) { -+ fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); -+ } else { -+ fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); -+ } -+ map.clear(); -+ if (Assert.ASSERTS_ENABLED) { -+ Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); -+ } -+ return fr; -+ } ++///////////////////////////////////////////////////////////////////////////// ++// helper functions for fatal error handler + -+ //------------------------------------------------------------------------------ -+ // frame::adjust_unextended_sp -+ private void adjustUnextendedSP() { -+ // If we are returning to a compiled MethodHandle call site, the -+ // saved_fp will in fact be a saved value of the unextended SP. The -+ // simplest way to tell whether we are returning to such a call site -+ // is as follows: ++static const char* reg_abi_names[] = { ++ "pc", ++ "x1(ra)", "x2(sp)", "x3(gp)", "x4(tp)", ++ "x5(t0)", "x6(t1)", "x7(t2)", ++ "x8(s0)", "x9(s1)", ++ "x10(a0)", "x11(a1)", "x12(a2)", "x13(a3)", "x14(a4)", "x15(a5)", "x16(a6)", "x17(a7)", ++ "x18(s2)", "x19(s3)", "x20(s4)", "x21(s5)", "x22(s6)", "x23(s7)", "x24(s8)", "x25(s9)", "x26(s10)", "x27(s11)", ++ "x28(t3)", "x29(t4)","x30(t5)", "x31(t6)" ++}; + -+ CodeBlob cb = cb(); -+ NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); -+ if (senderNm != null) { -+ // If the sender PC is a deoptimization point, get the original -+ // PC. For MethodHandle call site the unextended_sp is stored in -+ // saved_fp. -+ if (senderNm.isDeoptMhEntry(getPC())) { -+ raw_unextendedSP = getFP(); -+ } -+ else if (senderNm.isDeoptEntry(getPC())) { -+ } -+ else if (senderNm.isMethodHandleReturn(getPC())) { -+ raw_unextendedSP = getFP(); -+ } -+ } ++void os::print_context(outputStream *st, const void *context) { ++ if (context == NULL) { ++ return; + } + -+ private Frame senderForInterpreterFrame(RISCV64RegisterMap map) { -+ if (DEBUG) { -+ System.out.println("senderForInterpreterFrame"); -+ } -+ Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); -+ Address sp = addressOfStackSlot(SENDER_SP_OFFSET); -+ // We do not need to update the callee-save register mapping because above -+ // us is either another interpreter frame or a converter-frame, but never -+ // directly a compiled frame. -+ // 11/24/04 SFG. With the removal of adapter frames this is no longer true. -+ // However c2 no longer uses callee save register for java calls so there -+ // are no callee register to find. ++ const ucontext_t *uc = (const ucontext_t*)context; ++ st->print_cr("Registers:"); ++ for (int r = 0; r < 32; r++) { ++ st->print("%-*.*s=", 8, 8, reg_abi_names[r]); ++ print_location(st, uc->uc_mcontext.__gregs[r]); ++ } ++ st->cr(); + -+ if (map.getUpdateMap()) -+ updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); ++ intptr_t *frame_sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); ++ st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(frame_sp)); ++ print_hex_dump(st, (address)frame_sp, (address)(frame_sp + 64), sizeof(intptr_t)); ++ st->cr(); + -+ return new RISCV64Frame(sp, unextendedSP, getLink(), getSenderPC()); -+ } ++ // Note: it may be unsafe to inspect memory near pc. For example, pc may ++ // point to garbage if entry point in an nmethod is corrupted. Leave ++ // this at the end, and hope for the best. ++ address pc = os::Posix::ucontext_get_pc(uc); ++ print_instructions(st, pc, sizeof(char)); ++ st->cr(); ++} + -+ private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { -+ map.setLocation(fp, savedFPAddr); ++void os::print_register_info(outputStream *st, const void *context) { ++ if (context == NULL) { ++ return; + } + -+ private Frame senderForCompiledFrame(RISCV64RegisterMap map, CodeBlob cb) { -+ if (DEBUG) { -+ System.out.println("senderForCompiledFrame"); -+ } -+ -+ // -+ // NOTE: some of this code is (unfortunately) duplicated RISCV64CurrentFrameGuess -+ // ++ const ucontext_t *uc = (const ucontext_t*)context; + -+ if (Assert.ASSERTS_ENABLED) { -+ Assert.that(map != null, "map must be set"); -+ } ++ st->print_cr("Register to memory mapping:"); ++ st->cr(); + -+ // frame owned by optimizing compiler -+ if (Assert.ASSERTS_ENABLED) { -+ Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); -+ } -+ Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); ++ // this is horrendously verbose but the layout of the registers in the ++ // context does not match how we defined our abstract Register set, so ++ // we can't just iterate through the gregs area + -+ // The return_address is always the word on the stack -+ Address senderPC = senderSP.getAddressAt(RETURN_ADDR_OFFSET * VM.getVM().getAddressSize()); ++ // this is only for the "general purpose" registers + -+ // This is the saved value of FP which may or may not really be an FP. -+ // It is only an FP if the sender is an interpreter frame. -+ Address savedFPAddr = senderSP.addOffsetTo(LINK_OFFSET * VM.getVM().getAddressSize()); ++ for (int r = 0; r < 32; r++) ++ st->print_cr("%-*.*s=" INTPTR_FORMAT, 8, 8, reg_abi_names[r], (uintptr_t)uc->uc_mcontext.__gregs[r]); ++ st->cr(); ++} + -+ if (map.getUpdateMap()) { -+ // Tell GC to use argument oopmaps for some runtime stubs that need it. -+ // For C1, the runtime stub might not have oop maps, so set this flag -+ // outside of update_register_map. -+ map.setIncludeArgumentOops(cb.callerMustGCArguments()); ++void os::setup_fpu() { ++} + -+ if (cb.getOopMaps() != null) { -+ ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); -+ } ++#ifndef PRODUCT ++void os::verify_stack_alignment() { ++ assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); ++} ++#endif + -+ // Since the prolog does the save and restore of FP there is no oopmap -+ // for it so we must fill in its location as if there was an oopmap entry -+ // since if our caller was compiled code there could be live jvm state in it. -+ updateMapWithSavedLink(map, savedFPAddr); -+ } ++int os::extra_bang_size_in_bytes() { ++ return 0; ++} + -+ return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); ++extern "C" { ++ int SpinPause() { ++ return 0; + } + -+ protected boolean hasSenderPD() { -+ return true; ++ void _Copy_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { ++ if (from > to) { ++ const jshort *end = from + count; ++ while (from < end) { ++ *(to++) = *(from++); ++ } ++ } else if (from < to) { ++ const jshort *end = from; ++ from += count - 1; ++ to += count - 1; ++ while (from >= end) { ++ *(to--) = *(from--); ++ } ++ } + } -+ -+ public long frameSize() { -+ return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); ++ void _Copy_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { ++ if (from > to) { ++ const jint *end = from + count; ++ while (from < end) { ++ *(to++) = *(from++); ++ } ++ } else if (from < to) { ++ const jint *end = from; ++ from += count - 1; ++ to += count - 1; ++ while (from >= end) { ++ *(to--) = *(from--); ++ } ++ } + } -+ -+ public Address getLink() { -+ try { -+ if (DEBUG) { -+ System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET) -+ + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0)); -+ } -+ return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); -+ } catch (Exception e) { -+ if (DEBUG) -+ System.out.println("Returning null"); -+ return null; -+ } ++ void _Copy_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { ++ if (from > to) { ++ const jlong *end = from + count; ++ while (from < end) { ++ os::atomic_copy64(from++, to++); ++ } ++ } else if (from < to) { ++ const jlong *end = from; ++ from += count - 1; ++ to += count - 1; ++ while (from >= end) { ++ os::atomic_copy64(from--, to--); ++ } + } ++ } + -+ public Address getUnextendedSP() { return raw_unextendedSP; } ++ void _Copy_arrayof_conjoint_bytes(const HeapWord* from, ++ HeapWord* to, ++ size_t count) { ++ memmove(to, from, count); ++ } ++ void _Copy_arrayof_conjoint_jshorts(const HeapWord* from, ++ HeapWord* to, ++ size_t count) { ++ memmove(to, from, count * 2); ++ } ++ void _Copy_arrayof_conjoint_jints(const HeapWord* from, ++ HeapWord* to, ++ size_t count) { ++ memmove(to, from, count * 4); ++ } ++ void _Copy_arrayof_conjoint_jlongs(const HeapWord* from, ++ HeapWord* to, ++ size_t count) { ++ memmove(to, from, count * 8); ++ } ++}; +diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp +new file mode 100644 +index 00000000000..6d415630661 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP ++ ++ static void setup_fpu(); ++ ++ // Used to register dynamic code cache area with the OS ++ // Note: Currently only used in 64 bit Windows implementations ++ static bool register_code_area(char *low, char *high) { return true; } ++ ++ // Atomically copy 64 bits of data ++ static void atomic_copy64(const volatile void *src, volatile void *dst) { ++ *(jlong *) dst = *(const jlong *) src; ++ } ++ ++ // SYSCALL_RISCV_FLUSH_ICACHE is used to flush instruction cache. The "fence.i" instruction ++ // only work on the current hart, so kernel provides the icache flush syscall to flush icache ++ // on each hart. You can pass a flag to determine a global or local icache flush. ++ static void icache_flush(long int start, long int end) ++ { ++ const int SYSCALL_RISCV_FLUSH_ICACHE = 259; ++ register long int __a7 asm ("a7") = SYSCALL_RISCV_FLUSH_ICACHE; ++ register long int __a0 asm ("a0") = start; ++ register long int __a1 asm ("a1") = end; ++ // the flush can be applied to either all threads or only the current. ++ // 0 means a global icache flush, and the icache flush will be applied ++ // to other harts concurrently executing. ++ register long int __a2 asm ("a2") = 0; ++ __asm__ volatile ("ecall\n\t" ++ : "+r" (__a0) ++ : "r" (__a0), "r" (__a1), "r" (__a2), "r" (__a7) ++ : "memory"); ++ } ++ ++#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp +new file mode 100644 +index 00000000000..a6432c84ec7 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP ++#define OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP ++ ++#include "runtime/prefetch.hpp" ++ ++ ++inline void Prefetch::read (const void *loc, intx interval) { ++} ++ ++inline void Prefetch::write(void *loc, intx interval) { ++} ++ ++#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +new file mode 100644 +index 00000000000..3100572e9fd +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/thread.inline.hpp" ++ ++frame JavaThread::pd_last_frame() { ++ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is ++// currently interrupted by SIGPROF ++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, ++ void* ucontext, bool isInJava) { ++ ++ assert(Thread::current() == this, "caller must be current thread"); ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { ++ // If we have a last_Java_frame, then we should use it even if ++ // isInJava == true. It should be more reliable than ucontext info. ++ if (has_last_Java_frame() && frame_anchor()->walkable()) { ++ *fr_addr = pd_last_frame(); ++ return true; ++ } ++ ++ // At this point, we don't have a last_Java_frame, so ++ // we try to glean some information out of the ucontext ++ // if we were running Java code when SIGPROF came in. ++ if (isInJava) { ++ ucontext_t* uc = (ucontext_t*) ucontext; ++ ++ intptr_t* ret_fp = NULL; ++ intptr_t* ret_sp = NULL; ++ address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp); ++ if (addr == NULL || ret_sp == NULL ) { ++ // ucontext wasn't useful ++ return false; ++ } ++ ++ frame ret_frame(ret_sp, ret_fp, addr); ++ if (!ret_frame.safe_for_sender(this)) { ++#ifdef COMPILER2 ++ frame ret_frame2(ret_sp, NULL, addr); ++ if (!ret_frame2.safe_for_sender(this)) { ++ // nothing else to try if the frame isn't good ++ return false; ++ } ++ ret_frame = ret_frame2; ++#else ++ // nothing else to try if the frame isn't good ++ return false; ++#endif /* COMPILER2 */ ++ } ++ *fr_addr = ret_frame; ++ return true; ++ } ++ ++ // nothing else to try ++ return false; ++} ++ ++void JavaThread::cache_global_variables() { } +diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp +new file mode 100644 +index 00000000000..61e2cf85b63 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp +@@ -0,0 +1,48 @@ ++/* ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP ++ ++ private: ++ void pd_initialize() { ++ _anchor.clear(); ++ } ++ ++ frame pd_last_frame(); ++ ++ public: ++ static ByteSize last_Java_fp_offset() { ++ return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); ++ } ++ ++ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, ++ bool isInJava); ++ ++ bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); ++private: ++ bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); ++ ++#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp +new file mode 100644 +index 00000000000..6cf7683a586 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP ++ ++// These are the OS and CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* Threads (NOTE: incomplete) */ \ ++ /******************************/ \ ++ nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ ++ nonstatic_field(OSThread, _pthread_id, pthread_t) ++ ++ ++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ \ ++ /**********************/ \ ++ /* Posix Thread IDs */ \ ++ /**********************/ \ ++ \ ++ declare_integer_type(OSThread::thread_id_t) \ ++ declare_unsigned_integer_type(pthread_t) ++ ++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp +new file mode 100644 +index 00000000000..4623dbfad42 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp +@@ -0,0 +1,118 @@ ++/* ++ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/register.hpp" ++#include "runtime/os.hpp" ++#include "runtime/os.inline.hpp" ++#include "runtime/vm_version.hpp" ++ ++#include ++#include ++ ++#ifndef HWCAP_ISA_I ++#define HWCAP_ISA_I (1 << ('I' - 'A')) ++#endif ++ ++#ifndef HWCAP_ISA_M ++#define HWCAP_ISA_M (1 << ('M' - 'A')) ++#endif ++ ++#ifndef HWCAP_ISA_A ++#define HWCAP_ISA_A (1 << ('A' - 'A')) ++#endif ++ ++#ifndef HWCAP_ISA_F ++#define HWCAP_ISA_F (1 << ('F' - 'A')) ++#endif ++ ++#ifndef HWCAP_ISA_D ++#define HWCAP_ISA_D (1 << ('D' - 'A')) ++#endif ++ ++#ifndef HWCAP_ISA_C ++#define HWCAP_ISA_C (1 << ('C' - 'A')) ++#endif ++ ++#ifndef HWCAP_ISA_V ++#define HWCAP_ISA_V (1 << ('V' - 'A')) ++#endif ++ ++#ifndef HWCAP_ISA_B ++#define HWCAP_ISA_B (1 << ('B' - 'A')) ++#endif ++ ++#define read_csr(csr) \ ++({ \ ++ register unsigned long __v; \ ++ __asm__ __volatile__ ("csrr %0, %1" \ ++ : "=r" (__v) \ ++ : "i" (csr) \ ++ : "memory"); \ ++ __v; \ ++}) ++ ++uint32_t VM_Version::get_current_vector_length() { ++ assert(_features & CPU_V, "should not call this"); ++ return (uint32_t)read_csr(CSR_VLENB); ++} ++ ++void VM_Version::get_os_cpu_info() { ++ ++ uint64_t auxv = getauxval(AT_HWCAP); ++ ++ static_assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP"); ++ static_assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP"); ++ static_assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP"); ++ static_assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP"); ++ static_assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP"); ++ static_assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP"); ++ static_assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP"); ++ static_assert(CPU_B == HWCAP_ISA_B, "Flag CPU_B must follow Linux HWCAP"); ++ _features = auxv & ( ++ HWCAP_ISA_I | ++ HWCAP_ISA_M | ++ HWCAP_ISA_A | ++ HWCAP_ISA_F | ++ HWCAP_ISA_D | ++ HWCAP_ISA_C | ++ HWCAP_ISA_V | ++ HWCAP_ISA_B); ++ ++ if (FILE *f = fopen("/proc/cpuinfo", "r")) { ++ char buf[512], *p; ++ while (fgets(buf, sizeof (buf), f) != NULL) { ++ if ((p = strchr(buf, ':')) != NULL) { ++ if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) { ++ char* uarch = os::strdup(p + 2); ++ uarch[strcspn(uarch, "\n")] = '\0'; ++ _uarch = uarch; ++ break; ++ } ++ } ++ } ++ fclose(f); ++ } ++} +diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp +index e30d39f73d1..733ee9e654c 100644 +--- a/src/hotspot/share/c1/c1_LIR.cpp ++++ b/src/hotspot/share/c1/c1_LIR.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -199,7 +199,6 @@ bool LIR_OprDesc::is_oop() const { + void LIR_Op2::verify() const { + #ifdef ASSERT + switch (code()) { +- case lir_cmove: + case lir_xchg: + break; + +@@ -252,9 +251,7 @@ void LIR_Op2::verify() const { + + + LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block) +- : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) +- , _cond(cond) +- , _type(type) ++ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type) + , _label(block->label()) + , _block(block) + , _ublock(NULL) +@@ -262,9 +259,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block + } + + LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) : +- LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) +- , _cond(cond) +- , _type(type) ++ LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type) + , _label(stub->entry()) + , _block(NULL) + , _ublock(NULL) +@@ -272,9 +267,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) : + } + + LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock) +- : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) +- , _cond(cond) +- , _type(type) ++ : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type) + , _label(block->label()) + , _block(block) + , _ublock(ublock) +@@ -296,13 +289,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) { + } + + void LIR_OpBranch::negate_cond() { +- switch (_cond) { +- case lir_cond_equal: _cond = lir_cond_notEqual; break; +- case lir_cond_notEqual: _cond = lir_cond_equal; break; +- case lir_cond_less: _cond = lir_cond_greaterEqual; break; +- case lir_cond_lessEqual: _cond = lir_cond_greater; break; +- case lir_cond_greaterEqual: _cond = lir_cond_less; break; +- case lir_cond_greater: _cond = lir_cond_lessEqual; break; ++ switch (cond()) { ++ case lir_cond_equal: set_cond(lir_cond_notEqual); break; ++ case lir_cond_notEqual: set_cond(lir_cond_equal); break; ++ case lir_cond_less: set_cond(lir_cond_greaterEqual); break; ++ case lir_cond_lessEqual: set_cond(lir_cond_greater); break; ++ case lir_cond_greaterEqual: set_cond(lir_cond_less); break; ++ case lir_cond_greater: set_cond(lir_cond_lessEqual); break; + default: ShouldNotReachHere(); + } + } +@@ -525,6 +518,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) { + assert(op->as_OpBranch() != NULL, "must be"); + LIR_OpBranch* opBranch = (LIR_OpBranch*)op; + ++ assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() && ++ opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() && ++ opBranch->_tmp5->is_illegal(), "not used"); ++ ++ if (opBranch->_opr1->is_valid()) do_input(opBranch->_opr1); ++ if (opBranch->_opr2->is_valid()) do_input(opBranch->_opr2); ++ + if (opBranch->_info != NULL) do_info(opBranch->_info); + assert(opBranch->_result->is_illegal(), "not used"); + if (opBranch->_stub != NULL) opBranch->stub()->visit(this); +@@ -615,17 +615,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) { + // to the result operand, otherwise the backend fails + case lir_cmove: + { +- assert(op->as_Op2() != NULL, "must be"); +- LIR_Op2* op2 = (LIR_Op2*)op; ++ assert(op->as_Op4() != NULL, "must be"); ++ LIR_Op4* op4 = (LIR_Op4*)op; + +- assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() && +- op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used"); +- assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used"); ++ assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() && ++ op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "not used"); ++ assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used"); + +- do_input(op2->_opr1); +- do_input(op2->_opr2); +- do_temp(op2->_opr2); +- do_output(op2->_result); ++ do_input(op4->_opr1); ++ do_input(op4->_opr2); ++ if (op4->_opr3->is_valid()) do_input(op4->_opr3); ++ if (op4->_opr4->is_valid()) do_input(op4->_opr4); ++ do_temp(op4->_opr2); ++ do_output(op4->_result); + + break; + } +@@ -1048,6 +1050,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) { + masm->emit_op3(this); + } + ++void LIR_Op4::emit_code(LIR_Assembler* masm) { ++ masm->emit_op4(this); ++} ++ + void LIR_OpLock::emit_code(LIR_Assembler* masm) { + masm->emit_lock(this); + if (stub()) { +@@ -1084,6 +1090,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block) + , _file(NULL) + , _line(0) + #endif ++#ifdef RISCV ++ , _cmp_opr1(LIR_OprFact::illegalOpr) ++ , _cmp_opr2(LIR_OprFact::illegalOpr) ++#endif + { } + + +@@ -1101,6 +1111,38 @@ void LIR_List::set_file_and_line(const char * file, int line) { + } + #endif + ++#ifdef RISCV ++void LIR_List::set_cmp_oprs(LIR_Op* op) { ++ switch (op->code()) { ++ case lir_cmp: ++ _cmp_opr1 = op->as_Op2()->in_opr1(); ++ _cmp_opr2 = op->as_Op2()->in_opr2(); ++ break; ++ case lir_branch: // fall through ++ case lir_cond_float_branch: ++ assert(op->as_OpBranch()->cond() == lir_cond_always || ++ (_cmp_opr1 != LIR_OprFact::illegalOpr && _cmp_opr2 != LIR_OprFact::illegalOpr), ++ "conditional branches must have legal operands"); ++ if (op->as_OpBranch()->cond() != lir_cond_always) { ++ op->as_Op2()->set_in_opr1(_cmp_opr1); ++ op->as_Op2()->set_in_opr2(_cmp_opr2); ++ } ++ break; ++ case lir_cmove: ++ op->as_Op4()->set_in_opr3(_cmp_opr1); ++ op->as_Op4()->set_in_opr4(_cmp_opr2); ++ break; ++#if INCLUDE_ZGC ++ case lir_zloadbarrier_test: ++ _cmp_opr1 = FrameMap::as_opr(t1); ++ _cmp_opr2 = LIR_OprFact::intConst(0); ++ break; ++#endif ++ default: ++ break; ++ } ++} ++#endif + + void LIR_List::append(LIR_InsertionBuffer* buffer) { + assert(this == buffer->lir_list(), "wrong lir list"); +@@ -1680,7 +1722,6 @@ const char * LIR_Op::name() const { + case lir_cmp_l2i: s = "cmp_l2i"; break; + case lir_ucmp_fd2i: s = "ucomp_fd2i"; break; + case lir_cmp_fd2i: s = "comp_fd2i"; break; +- case lir_cmove: s = "cmove"; break; + case lir_add: s = "add"; break; + case lir_sub: s = "sub"; break; + case lir_mul: s = "mul"; break; +@@ -1705,6 +1746,8 @@ const char * LIR_Op::name() const { + case lir_irem: s = "irem"; break; + case lir_fmad: s = "fmad"; break; + case lir_fmaf: s = "fmaf"; break; ++ // LIR_Op4 ++ case lir_cmove: s = "cmove"; break; + // LIR_OpJavaCall + case lir_static_call: s = "static"; break; + case lir_optvirtual_call: s = "optvirtual"; break; +@@ -1841,6 +1884,8 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) { + // LIR_OpBranch + void LIR_OpBranch::print_instr(outputStream* out) const { + print_condition(out, cond()); out->print(" "); ++ in_opr1()->print(out); out->print(" "); ++ in_opr2()->print(out); out->print(" "); + if (block() != NULL) { + out->print("[B%d] ", block()->block_id()); + } else if (stub() != NULL) { +@@ -1927,7 +1972,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const { + + // LIR_Op2 + void LIR_Op2::print_instr(outputStream* out) const { +- if (code() == lir_cmove || code() == lir_cmp) { ++ if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) { + print_condition(out, condition()); out->print(" "); + } + in_opr1()->print(out); out->print(" "); +@@ -1978,6 +2023,15 @@ void LIR_Op3::print_instr(outputStream* out) const { + result_opr()->print(out); + } + ++// LIR_Op4 ++void LIR_Op4::print_instr(outputStream* out) const { ++ print_condition(out, condition()); out->print(" "); ++ in_opr1()->print(out); out->print(" "); ++ in_opr2()->print(out); out->print(" "); ++ in_opr3()->print(out); out->print(" "); ++ in_opr4()->print(out); out->print(" "); ++ result_opr()->print(out); ++} + + void LIR_OpLock::print_instr(outputStream* out) const { + hdr_opr()->print(out); out->print(" "); +diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp +index 3234ca018b7..efff6bf7a30 100644 +--- a/src/hotspot/share/c1/c1_LIR.hpp ++++ b/src/hotspot/share/c1/c1_LIR.hpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -867,6 +867,7 @@ class LIR_Op2; + class LIR_OpDelay; + class LIR_Op3; + class LIR_OpAllocArray; ++class LIR_Op4; + class LIR_OpCall; + class LIR_OpJavaCall; + class LIR_OpRTCall; +@@ -916,8 +917,6 @@ enum LIR_Code { + , lir_null_check + , lir_return + , lir_leal +- , lir_branch +- , lir_cond_float_branch + , lir_move + , lir_convert + , lir_alloc_object +@@ -929,11 +928,12 @@ enum LIR_Code { + , lir_unwind + , end_op1 + , begin_op2 ++ , lir_branch ++ , lir_cond_float_branch + , lir_cmp + , lir_cmp_l2i + , lir_ucmp_fd2i + , lir_cmp_fd2i +- , lir_cmove + , lir_add + , lir_sub + , lir_mul +@@ -964,6 +964,9 @@ enum LIR_Code { + , lir_fmad + , lir_fmaf + , end_op3 ++ , begin_op4 ++ , lir_cmove ++ , end_op4 + , begin_opJavaCall + , lir_static_call + , lir_optvirtual_call +@@ -1001,6 +1004,11 @@ enum LIR_Code { + , begin_opAssert + , lir_assert + , end_opAssert ++#ifdef INCLUDE_ZGC ++ , begin_opZLoadBarrierTest ++ , lir_zloadbarrier_test ++ , end_opZLoadBarrierTest ++#endif + }; + + +@@ -1134,6 +1142,7 @@ class LIR_Op: public CompilationResourceObj { + virtual LIR_Op1* as_Op1() { return NULL; } + virtual LIR_Op2* as_Op2() { return NULL; } + virtual LIR_Op3* as_Op3() { return NULL; } ++ virtual LIR_Op4* as_Op4() { return NULL; } + virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; } + virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; } + virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; } +@@ -1410,51 +1419,6 @@ class LIR_OpRTCall: public LIR_OpCall { + virtual void verify() const; + }; + +- +-class LIR_OpBranch: public LIR_Op { +- friend class LIR_OpVisitState; +- +- private: +- LIR_Condition _cond; +- BasicType _type; +- Label* _label; +- BlockBegin* _block; // if this is a branch to a block, this is the block +- BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block +- CodeStub* _stub; // if this is a branch to a stub, this is the stub +- +- public: +- LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl) +- : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL) +- , _cond(cond) +- , _type(type) +- , _label(lbl) +- , _block(NULL) +- , _ublock(NULL) +- , _stub(NULL) { } +- +- LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block); +- LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub); +- +- // for unordered comparisons +- LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock); +- +- LIR_Condition cond() const { return _cond; } +- BasicType type() const { return _type; } +- Label* label() const { return _label; } +- BlockBegin* block() const { return _block; } +- BlockBegin* ublock() const { return _ublock; } +- CodeStub* stub() const { return _stub; } +- +- void change_block(BlockBegin* b); +- void change_ublock(BlockBegin* b); +- void negate_cond(); +- +- virtual void emit_code(LIR_Assembler* masm); +- virtual LIR_OpBranch* as_OpBranch() { return this; } +- virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +-}; +- +- + class ConversionStub; + + class LIR_OpConvert: public LIR_Op1 { +@@ -1614,19 +1578,19 @@ class LIR_Op2: public LIR_Op { + void verify() const; + + public: +- LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL) ++ LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL, BasicType type = T_ILLEGAL) + : LIR_Op(code, LIR_OprFact::illegalOpr, info) + , _opr1(opr1) + , _opr2(opr2) +- , _type(T_ILLEGAL) +- , _condition(condition) + , _fpu_stack_size(0) ++ , _type(type) + , _tmp1(LIR_OprFact::illegalOpr) + , _tmp2(LIR_OprFact::illegalOpr) + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) +- , _tmp5(LIR_OprFact::illegalOpr) { +- assert(code == lir_cmp || code == lir_assert, "code check"); ++ , _tmp5(LIR_OprFact::illegalOpr) ++ , _condition(condition) { ++ assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check"); + } + + LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) +@@ -1651,14 +1615,14 @@ class LIR_Op2: public LIR_Op { + , _opr1(opr1) + , _opr2(opr2) + , _type(type) +- , _condition(lir_cond_unknown) + , _fpu_stack_size(0) + , _tmp1(LIR_OprFact::illegalOpr) + , _tmp2(LIR_OprFact::illegalOpr) + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) +- , _tmp5(LIR_OprFact::illegalOpr) { +- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); ++ , _tmp5(LIR_OprFact::illegalOpr) ++ , _condition(lir_cond_unknown) { ++ assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); + } + + LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, +@@ -1667,14 +1631,14 @@ class LIR_Op2: public LIR_Op { + , _opr1(opr1) + , _opr2(opr2) + , _type(T_ILLEGAL) +- , _condition(lir_cond_unknown) + , _fpu_stack_size(0) + , _tmp1(tmp1) + , _tmp2(tmp2) + , _tmp3(tmp3) + , _tmp4(tmp4) +- , _tmp5(tmp5) { +- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); ++ , _tmp5(tmp5) ++ , _condition(lir_cond_unknown) { ++ assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); + } + + LIR_Opr in_opr1() const { return _opr1; } +@@ -1686,10 +1650,10 @@ class LIR_Op2: public LIR_Op { + LIR_Opr tmp4_opr() const { return _tmp4; } + LIR_Opr tmp5_opr() const { return _tmp5; } + LIR_Condition condition() const { +- assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition; ++ assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition; + } + void set_condition(LIR_Condition condition) { +- assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition; ++ assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition; + } + + void set_fpu_stack_size(int size) { _fpu_stack_size = size; } +@@ -1703,6 +1667,51 @@ class LIR_Op2: public LIR_Op { + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; + }; + ++class LIR_OpBranch: public LIR_Op2 { ++ friend class LIR_OpVisitState; ++ ++ private: ++ Label* _label; ++ BlockBegin* _block; // if this is a branch to a block, this is the block ++ BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block ++ CodeStub* _stub; // if this is a branch to a stub, this is the stub ++ ++ public: ++ LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl) ++ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL, type) ++ , _label(lbl) ++ , _block(NULL) ++ , _ublock(NULL) ++ , _stub(NULL) { } ++ ++ LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block); ++ LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub); ++ ++ // for unordered comparisons ++ LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock); ++ ++ LIR_Condition cond() const { ++ return condition(); ++ } ++ ++ void set_cond(LIR_Condition cond) { ++ set_condition(cond); ++ } ++ ++ Label* label() const { return _label; } ++ BlockBegin* block() const { return _block; } ++ BlockBegin* ublock() const { return _ublock; } ++ CodeStub* stub() const { return _stub; } ++ ++ void change_block(BlockBegin* b); ++ void change_ublock(BlockBegin* b); ++ void negate_cond(); ++ ++ virtual void emit_code(LIR_Assembler* masm); ++ virtual LIR_OpBranch* as_OpBranch() { return this; } ++ virtual void print_instr(outputStream* out) const PRODUCT_RETURN; ++}; ++ + class LIR_OpAllocArray : public LIR_Op { + friend class LIR_OpVisitState; + +@@ -1766,6 +1775,63 @@ class LIR_Op3: public LIR_Op { + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; + }; + ++class LIR_Op4: public LIR_Op { ++ friend class LIR_OpVisitState; ++ protected: ++ LIR_Opr _opr1; ++ LIR_Opr _opr2; ++ LIR_Opr _opr3; ++ LIR_Opr _opr4; ++ BasicType _type; ++ LIR_Opr _tmp1; ++ LIR_Opr _tmp2; ++ LIR_Opr _tmp3; ++ LIR_Opr _tmp4; ++ LIR_Opr _tmp5; ++ LIR_Condition _condition; ++ ++ public: ++ LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4, ++ LIR_Opr result, BasicType type) ++ : LIR_Op(code, result, NULL) ++ , _opr1(opr1) ++ , _opr2(opr2) ++ , _opr3(opr3) ++ , _opr4(opr4) ++ , _type(type) ++ , _tmp1(LIR_OprFact::illegalOpr) ++ , _tmp2(LIR_OprFact::illegalOpr) ++ , _tmp3(LIR_OprFact::illegalOpr) ++ , _tmp4(LIR_OprFact::illegalOpr) ++ , _tmp5(LIR_OprFact::illegalOpr) ++ , _condition(condition) { ++ assert(code == lir_cmove, "code check"); ++ assert(type != T_ILLEGAL, "cmove should have type"); ++ } ++ ++ LIR_Opr in_opr1() const { return _opr1; } ++ LIR_Opr in_opr2() const { return _opr2; } ++ LIR_Opr in_opr3() const { return _opr3; } ++ LIR_Opr in_opr4() const { return _opr4; } ++ BasicType type() const { return _type; } ++ LIR_Opr tmp1_opr() const { return _tmp1; } ++ LIR_Opr tmp2_opr() const { return _tmp2; } ++ LIR_Opr tmp3_opr() const { return _tmp3; } ++ LIR_Opr tmp4_opr() const { return _tmp4; } ++ LIR_Opr tmp5_opr() const { return _tmp5; } ++ ++ LIR_Condition condition() const { return _condition; } ++ void set_condition(LIR_Condition condition) { _condition = condition; } ++ ++ void set_in_opr1(LIR_Opr opr) { _opr1 = opr; } ++ void set_in_opr2(LIR_Opr opr) { _opr2 = opr; } ++ void set_in_opr3(LIR_Opr opr) { _opr3 = opr; } ++ void set_in_opr4(LIR_Opr opr) { _opr4 = opr; } ++ virtual void emit_code(LIR_Assembler* masm); ++ virtual LIR_Op4* as_Op4() { return this; } ++ ++ virtual void print_instr(outputStream* out) const PRODUCT_RETURN; ++}; + + //-------------------------------- + class LabelObj: public CompilationResourceObj { +@@ -1988,6 +2054,10 @@ class LIR_List: public CompilationResourceObj { + const char * _file; + int _line; + #endif ++#ifdef RISCV ++ LIR_Opr _cmp_opr1; ++ LIR_Opr _cmp_opr2; ++#endif + + public: + void append(LIR_Op* op) { +@@ -2000,6 +2070,12 @@ class LIR_List: public CompilationResourceObj { + } + #endif // PRODUCT + ++#ifdef RISCV ++ set_cmp_oprs(op); ++ // lir_cmp set cmp oprs only on riscv ++ if (op->code() == lir_cmp) return; ++#endif ++ + _operations.append(op); + + #ifdef ASSERT +@@ -2016,6 +2092,10 @@ class LIR_List: public CompilationResourceObj { + void set_file_and_line(const char * file, int line); + #endif + ++#ifdef RISCV ++ void set_cmp_oprs(LIR_Op* op); ++#endif ++ + //---------- accessors --------------- + LIR_OpList* instructions_list() { return &_operations; } + int length() const { return _operations.length(); } +@@ -2149,8 +2229,9 @@ class LIR_List: public CompilationResourceObj { + void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); + void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info); + +- void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { +- append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type)); ++ void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type, ++ LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) { ++ append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type)); + } + + void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, +diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp +index 160483d5f74..42a0350f7d9 100644 +--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp ++++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp +@@ -709,10 +709,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { + comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op); + break; + +- case lir_cmove: +- cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type()); +- break; +- + case lir_shl: + case lir_shr: + case lir_ushr: +@@ -776,6 +772,17 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { + } + } + ++void LIR_Assembler::emit_op4(LIR_Op4* op) { ++ switch(op->code()) { ++ case lir_cmove: ++ cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type(), op->in_opr3(), op->in_opr4()); ++ break; ++ ++ default: ++ Unimplemented(); ++ break; ++ } ++} + + void LIR_Assembler::build_frame() { + _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); +diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp +index 44a5bcbe542..c677bd346fc 100644 +--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp ++++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp +@@ -190,6 +190,7 @@ class LIR_Assembler: public CompilationResourceObj { + void emit_op1(LIR_Op1* op); + void emit_op2(LIR_Op2* op); + void emit_op3(LIR_Op3* op); ++ void emit_op4(LIR_Op4* op); + void emit_opBranch(LIR_OpBranch* op); + void emit_opLabel(LIR_OpLabel* op); + void emit_arraycopy(LIR_OpArrayCopy* op); +@@ -222,8 +223,8 @@ class LIR_Assembler: public CompilationResourceObj { + void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); + void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions + void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op); +- void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type); +- ++ void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type, ++ LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr); + void call( LIR_OpJavaCall* op, relocInfo::relocType rtype); + void ic_call( LIR_OpJavaCall* op); + void vtable_call( LIR_OpJavaCall* op); +diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp +index c28055fd996..a4dfe8552ae 100644 +--- a/src/hotspot/share/c1/c1_LinearScan.cpp ++++ b/src/hotspot/share/c1/c1_LinearScan.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -1242,11 +1242,11 @@ void LinearScan::add_register_hints(LIR_Op* op) { + break; + } + case lir_cmove: { +- assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2"); +- LIR_Op2* cmove = (LIR_Op2*)op; ++ assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4"); ++ LIR_Op4* cmove = (LIR_Op4*)op; + + LIR_Opr move_from = cmove->in_opr1(); +- LIR_Opr move_to = cmove->result_opr(); ++ LIR_Opr move_to = cmove->result_opr(); + + if (move_to->is_register() && move_from->is_register()) { + Interval* from = interval_at(reg_num(move_from)); +@@ -3140,6 +3140,9 @@ void LinearScan::do_linear_scan() { + } + } + ++#ifndef RISCV ++ // Disable these optimizations on riscv temporarily, because it does not ++ // work when the comparison operands are bound to branches or cmoves. + { TIME_LINEAR_SCAN(timer_optimize_lir); + + EdgeMoveOptimizer::optimize(ir()->code()); +@@ -3147,6 +3150,7 @@ void LinearScan::do_linear_scan() { + // check that cfg is still correct after optimizations + ir()->verify(); + } ++#endif + + NOT_PRODUCT(print_lir(1, "Before Code Generation", false)); + NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final)); +@@ -6284,14 +6288,14 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { + // There might be a cmove inserted for profiling which depends on the same + // compare. If we change the condition of the respective compare, we have + // to take care of this cmove as well. +- LIR_Op2* prev_cmove = NULL; ++ LIR_Op4* prev_cmove = NULL; + + for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) { + prev_op = instructions->at(j); + // check for the cmove + if (prev_op->code() == lir_cmove) { +- assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2"); +- prev_cmove = (LIR_Op2*)prev_op; ++ assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4"); ++ prev_cmove = (LIR_Op4*)prev_op; + assert(prev_branch->cond() == prev_cmove->condition(), "should be the same"); + } + if (prev_op->code() == lir_cmp) { +diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +index 4771a8b8652..6d377fa005d 100644 +--- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp ++++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -31,7 +31,7 @@ + #include "utilities/defaultStream.hpp" + + void ShenandoahArguments::initialize() { +-#if !(defined AARCH64 || defined AMD64 || defined IA32) ++#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined RISCV64) + vm_exit_during_initialization("Shenandoah GC is not supported on this platform."); + #endif + +diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp +index 9f8ce742433..f36dd612eff 100644 +--- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp ++++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -100,7 +100,7 @@ class LIR_OpZLoadBarrierTest : public LIR_Op { + + public: + LIR_OpZLoadBarrierTest(LIR_Opr opr) : +- LIR_Op(), ++ LIR_Op(lir_zloadbarrier_test, LIR_OprFact::illegalOpr, NULL), + _opr(opr) {} + + virtual void visit(LIR_OpVisitState* state) { +diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +index e01a242a57e..ff16de0e778 100644 +--- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp ++++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +@@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) { + inline bool JfrBigEndian::platform_supports_unaligned_reads(void) { + #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390) + return true; +-#elif defined(SPARC) || defined(ARM) || defined(AARCH64) ++#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(RISCV) + return false; + #else + #warning "Unconfigured platform" +diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp +index c64d0879592..bc856d4b617 100644 +--- a/src/hotspot/share/opto/regmask.hpp ++++ b/src/hotspot/share/opto/regmask.hpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +diff --git a/src/hotspot/share/runtime/abstract_vm_version.cpp b/src/hotspot/share/runtime/abstract_vm_version.cpp +index c46247f2bdb..b5e64b65ff7 100644 +--- a/src/hotspot/share/runtime/abstract_vm_version.cpp ++++ b/src/hotspot/share/runtime/abstract_vm_version.cpp +@@ -196,7 +196,8 @@ const char* Abstract_VM_Version::jre_release_version() { + IA32_ONLY("x86") \ + IA64_ONLY("ia64") \ + S390_ONLY("s390") \ +- SPARC_ONLY("sparc") ++ SPARC_ONLY("sparc") \ ++ RISCV64_ONLY("riscv64") + #endif // !ZERO + #endif // !CPU + +diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp +index e7b32723e47..434826853ee 100644 +--- a/src/hotspot/share/runtime/synchronizer.cpp ++++ b/src/hotspot/share/runtime/synchronizer.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp +index aa914eccafc..a2f98e6a251 100644 +--- a/src/hotspot/share/runtime/thread.hpp ++++ b/src/hotspot/share/runtime/thread.hpp +@@ -1234,7 +1234,7 @@ class JavaThread: public Thread { + address last_Java_pc(void) { return _anchor.last_Java_pc(); } + + // Safepoint support +-#if !(defined(PPC64) || defined(AARCH64)) ++#if !(defined(PPC64) || defined(AARCH64) || defined(RISCV64)) + JavaThreadState thread_state() const { return _thread_state; } + void set_thread_state(JavaThreadState s) { + assert(current_or_null() == NULL || current_or_null() == this, +diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp +index dee8534f739..9af07aeb459 100644 +--- a/src/hotspot/share/runtime/thread.inline.hpp ++++ b/src/hotspot/share/runtime/thread.inline.hpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * +@@ -142,7 +142,7 @@ inline void JavaThread::set_pending_async_exception(oop e) { + set_has_async_exception(); + } + +-#if defined(PPC64) || defined (AARCH64) ++#if defined(PPC64) || defined (AARCH64) || defined(RISCV64) + inline JavaThreadState JavaThread::thread_state() const { + return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state); + } +diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp +index cf802538689..e8ab3097ac7 100644 +--- a/src/hotspot/share/utilities/macros.hpp ++++ b/src/hotspot/share/utilities/macros.hpp +@@ -597,6 +597,32 @@ + + #define MACOS_AARCH64_ONLY(x) MACOS_ONLY(AARCH64_ONLY(x)) + ++#if defined(RISCV32) || defined(RISCV64) ++#define RISCV ++#define RISCV_ONLY(code) code ++#define NOT_RISCV(code) ++#else ++#undef RISCV ++#define RISCV_ONLY(code) ++#define NOT_RISCV(code) code ++#endif ++ ++#ifdef RISCV32 ++#define RISCV32_ONLY(code) code ++#define NOT_RISCV32(code) ++#else ++#define RISCV32_ONLY(code) ++#define NOT_RISCV32(code) code ++#endif ++ ++#ifdef RISCV64 ++#define RISCV64_ONLY(code) code ++#define NOT_RISCV64(code) ++#else ++#define RISCV64_ONLY(code) ++#define NOT_RISCV64(code) code ++#endif ++ + #ifdef VM_LITTLE_ENDIAN + #define LITTLE_ENDIAN_ONLY(code) code + #define BIG_ENDIAN_ONLY(code) +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +index 0d834302c57..45a927fb5ee 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2002, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -58,6 +58,10 @@ + #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h" + #endif + ++#ifdef riscv64 ++#include "sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext.h" ++#endif ++ + static jfieldID p_ps_prochandle_ID = 0; + static jfieldID threadList_ID = 0; + static jfieldID loadObjectList_ID = 0; +@@ -397,7 +401,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + return (err == PS_OK)? array : 0; + } + +-#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) ++#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64) + JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0 + (JNIEnv *env, jobject this_obj, jint lwp_id) { + +@@ -425,6 +429,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + #if defined(sparc) || defined(sparcv9) + #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG + #endif ++#ifdef riscv64 ++#define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG ++#endif + #if defined(ppc64) || defined(ppc64le) + #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG + #endif +@@ -534,6 +541,44 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + } + #endif /* aarch64 */ + ++#if defined(riscv64) ++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg ++ ++ regs[REG_INDEX(PC)] = gregs.pc; ++ regs[REG_INDEX(LR)] = gregs.ra; ++ regs[REG_INDEX(SP)] = gregs.sp; ++ regs[REG_INDEX(R3)] = gregs.gp; ++ regs[REG_INDEX(R4)] = gregs.tp; ++ regs[REG_INDEX(R5)] = gregs.t0; ++ regs[REG_INDEX(R6)] = gregs.t1; ++ regs[REG_INDEX(R7)] = gregs.t2; ++ regs[REG_INDEX(R8)] = gregs.s0; ++ regs[REG_INDEX(R9)] = gregs.s1; ++ regs[REG_INDEX(R10)] = gregs.a0; ++ regs[REG_INDEX(R11)] = gregs.a1; ++ regs[REG_INDEX(R12)] = gregs.a2; ++ regs[REG_INDEX(R13)] = gregs.a3; ++ regs[REG_INDEX(R14)] = gregs.a4; ++ regs[REG_INDEX(R15)] = gregs.a5; ++ regs[REG_INDEX(R16)] = gregs.a6; ++ regs[REG_INDEX(R17)] = gregs.a7; ++ regs[REG_INDEX(R18)] = gregs.s2; ++ regs[REG_INDEX(R19)] = gregs.s3; ++ regs[REG_INDEX(R20)] = gregs.s4; ++ regs[REG_INDEX(R21)] = gregs.s5; ++ regs[REG_INDEX(R22)] = gregs.s6; ++ regs[REG_INDEX(R23)] = gregs.s7; ++ regs[REG_INDEX(R24)] = gregs.s8; ++ regs[REG_INDEX(R25)] = gregs.s9; ++ regs[REG_INDEX(R26)] = gregs.s10; ++ regs[REG_INDEX(R27)] = gregs.s11; ++ regs[REG_INDEX(R28)] = gregs.t3; ++ regs[REG_INDEX(R29)] = gregs.t4; ++ regs[REG_INDEX(R30)] = gregs.t5; ++ regs[REG_INDEX(R31)] = gregs.t6; ++ ++#endif /* riscv64 */ ++ + #if defined(ppc64) || defined(ppc64le) + #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg + +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +index 8318e8e0213..ab092d4ee33 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -43,6 +43,8 @@ + #elif defined(arm) + #include + #define user_regs_struct pt_regs ++#elif defined(riscv64) ++#include + #endif + + // This C bool type must be int for compatibility with Linux calls and +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +index 0f5f0119c73..9bff9ee9b15 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +@@ -36,6 +36,7 @@ + import sun.jvm.hotspot.debugger.MachineDescriptionAMD64; + import sun.jvm.hotspot.debugger.MachineDescriptionPPC64; + import sun.jvm.hotspot.debugger.MachineDescriptionAArch64; ++import sun.jvm.hotspot.debugger.MachineDescriptionRISCV64; + import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; + import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit; + import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit; +@@ -598,6 +599,8 @@ private void setupDebuggerLinux() { + } else { + machDesc = new MachineDescriptionSPARC32Bit(); + } ++ } else if (cpu.equals("riscv64")) { ++ machDesc = new MachineDescriptionRISCV64(); + } else { + try { + machDesc = (MachineDescription) +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java +new file mode 100644 +index 00000000000..a972516dee3 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java +@@ -0,0 +1,40 @@ ++/* ++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger; ++ ++public class MachineDescriptionRISCV64 extends MachineDescriptionTwosComplement implements MachineDescription { ++ public long getAddressSize() { ++ return 8; ++ } ++ ++ public boolean isLP64() { ++ return true; ++ } ++ ++ public boolean isBigEndian() { ++ return false; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +index 5e5a6bb7141..dc0bcb3da94 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * +@@ -34,12 +34,14 @@ + import sun.jvm.hotspot.debugger.amd64.*; + import sun.jvm.hotspot.debugger.aarch64.*; + import sun.jvm.hotspot.debugger.sparc.*; ++import sun.jvm.hotspot.debugger.riscv64.*; + import sun.jvm.hotspot.debugger.ppc64.*; + import sun.jvm.hotspot.debugger.linux.x86.*; + import sun.jvm.hotspot.debugger.linux.amd64.*; + import sun.jvm.hotspot.debugger.linux.sparc.*; + import sun.jvm.hotspot.debugger.linux.ppc64.*; + import sun.jvm.hotspot.debugger.linux.aarch64.*; ++import sun.jvm.hotspot.debugger.linux.riscv64.*; + import sun.jvm.hotspot.utilities.*; + + class LinuxCDebugger implements CDebugger { +@@ -116,7 +118,14 @@ public CFrame topFrameForThread(ThreadProxy thread) throws DebuggerException { + Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC); + if (pc == null) return null; + return new LinuxAARCH64CFrame(dbg, fp, pc); +- } else { ++ } else if (cpu.equals("riscv64")) { ++ RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext(); ++ Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP); ++ if (fp == null) return null; ++ Address pc = context.getRegisterAsAddress(RISCV64ThreadContext.PC); ++ if (pc == null) return null; ++ return new LinuxRISCV64CFrame(dbg, fp, pc); ++ } else { + // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu + ThreadContext context = (ThreadContext) thread.getContext(); + return context.getTopFrame(dbg); +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java +new file mode 100644 +index 00000000000..f06da24bd0e +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java +@@ -0,0 +1,90 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.debugger.linux.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++import sun.jvm.hotspot.debugger.cdbg.basic.*; ++ ++public final class LinuxRISCV64CFrame extends BasicCFrame { ++ private static final int C_FRAME_LINK_OFFSET = -2; ++ private static final int C_FRAME_RETURN_ADDR_OFFSET = -1; ++ ++ public LinuxRISCV64CFrame(LinuxDebugger dbg, Address fp, Address pc) { ++ super(dbg.getCDebugger()); ++ this.fp = fp; ++ this.pc = pc; ++ this.dbg = dbg; ++ } ++ ++ // override base class impl to avoid ELF parsing ++ public ClosestSymbol closestSymbolToPC() { ++ // try native lookup in debugger. ++ return dbg.lookup(dbg.getAddressValue(pc())); ++ } ++ ++ public Address pc() { ++ return pc; ++ } ++ ++ public Address localVariableBase() { ++ return fp; ++ } ++ ++ public CFrame sender(ThreadProxy thread) { ++ RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext(); ++ Address rsp = context.getRegisterAsAddress(RISCV64ThreadContext.SP); ++ ++ if ((fp == null) || fp.lessThan(rsp)) { ++ return null; ++ } ++ ++ // Check alignment of fp ++ if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) { ++ return null; ++ } ++ ++ Address nextFP = fp.getAddressAt(C_FRAME_LINK_OFFSET * ADDRESS_SIZE); ++ if (nextFP == null || nextFP.lessThanOrEqual(fp)) { ++ return null; ++ } ++ Address nextPC = fp.getAddressAt(C_FRAME_RETURN_ADDR_OFFSET * ADDRESS_SIZE); ++ if (nextPC == null) { ++ return null; ++ } ++ return new LinuxRISCV64CFrame(dbg, nextFP, nextPC); ++ } ++ ++ // package/class internals only ++ private static final int ADDRESS_SIZE = 8; ++ private Address pc; ++ private Address sp; ++ private Address fp; ++ private LinuxDebugger dbg; ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java +new file mode 100644 +index 00000000000..fdb841ccf3d +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java +@@ -0,0 +1,48 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.debugger.linux.*; ++ ++public class LinuxRISCV64ThreadContext extends RISCV64ThreadContext { ++ private LinuxDebugger debugger; ++ ++ public LinuxRISCV64ThreadContext(LinuxDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java +new file mode 100644 +index 00000000000..96d5dee47ce +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java +@@ -0,0 +1,88 @@ ++/* ++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class ProcRISCV64Thread implements ThreadProxy { ++ private ProcDebugger debugger; ++ private int id; ++ ++ public ProcRISCV64Thread(ProcDebugger debugger, Address addr) { ++ this.debugger = debugger; ++ ++ // FIXME: the size here should be configurable. However, making it ++ // so would produce a dependency on the "types" package from the ++ // debugger package, which is not desired. ++ this.id = (int) addr.getCIntegerAt(0, 4, true); ++ } ++ ++ public ProcRISCV64Thread(ProcDebugger debugger, long id) { ++ this.debugger = debugger; ++ this.id = (int) id; ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ ProcRISCV64ThreadContext context = new ProcRISCV64ThreadContext(debugger); ++ long[] regs = debugger.getThreadIntegerRegisterSet(id); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size mismatch"); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++ ++ public boolean canSetContext() throws DebuggerException { ++ return false; ++ } ++ ++ public void setContext(ThreadContext context) ++ throws IllegalThreadStateException, DebuggerException { ++ throw new DebuggerException("Unimplemented"); ++ } ++ ++ public String toString() { ++ return "t@" + id; ++ } ++ ++ public boolean equals(Object obj) { ++ if ((obj == null) || !(obj instanceof ProcRISCV64Thread)) { ++ return false; ++ } ++ ++ return (((ProcRISCV64Thread) obj).id == id); ++ } ++ ++ public int hashCode() { ++ return id; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java +new file mode 100644 +index 00000000000..f2aa845e665 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java +@@ -0,0 +1,48 @@ ++/* ++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcRISCV64ThreadContext extends RISCV64ThreadContext { ++ private ProcDebugger debugger; ++ ++ public ProcRISCV64ThreadContext(ProcDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java +new file mode 100644 +index 00000000000..19f64b8ce2d +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java +@@ -0,0 +1,46 @@ ++/* ++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcRISCV64ThreadFactory implements ProcThreadFactory { ++ private ProcDebugger debugger; ++ ++ public ProcRISCV64ThreadFactory(ProcDebugger debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new ProcRISCV64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new ProcRISCV64Thread(debugger, id); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java +new file mode 100644 +index 00000000000..aecbda59023 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class RemoteRISCV64Thread extends RemoteThread { ++ public RemoteRISCV64Thread(RemoteDebuggerClient debugger, Address addr) { ++ super(debugger, addr); ++ } ++ ++ public RemoteRISCV64Thread(RemoteDebuggerClient debugger, long id) { ++ super(debugger, id); ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ RemoteRISCV64ThreadContext context = new RemoteRISCV64ThreadContext(debugger); ++ long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : ++ debugger.getThreadIntegerRegisterSet(id); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size of register set must match"); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java +new file mode 100644 +index 00000000000..1d3da6be5af +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java +@@ -0,0 +1,48 @@ ++/* ++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteRISCV64ThreadContext extends RISCV64ThreadContext { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteRISCV64ThreadContext(RemoteDebuggerClient debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java +new file mode 100644 +index 00000000000..725b94e25a3 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java +@@ -0,0 +1,46 @@ ++/* ++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteRISCV64ThreadFactory implements RemoteThreadFactory { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteRISCV64ThreadFactory(RemoteDebuggerClient debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new RemoteRISCV64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new RemoteRISCV64Thread(debugger, id); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java +new file mode 100644 +index 00000000000..fb60a70427a +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java +@@ -0,0 +1,172 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.riscv64; ++ ++import java.lang.annotation.Native; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++ ++/** Specifies the thread context on riscv64 platforms; only a sub-portion ++ * of the context is guaranteed to be present on all operating ++ * systems. */ ++ ++public abstract class RISCV64ThreadContext implements ThreadContext { ++ // Taken from /usr/include/asm/sigcontext.h on Linux/RISCV64. ++ ++ // /* ++ // * Signal context structure - contains all info to do with the state ++ // * before the signal handler was invoked. ++ // */ ++ // struct sigcontext { ++ // struct user_regs_struct sc_regs; ++ // union __riscv_fp_state sc_fpregs; ++ // }; ++ // ++ // struct user_regs_struct { ++ // unsigned long pc; ++ // unsigned long ra; ++ // unsigned long sp; ++ // unsigned long gp; ++ // unsigned long tp; ++ // unsigned long t0; ++ // unsigned long t1; ++ // unsigned long t2; ++ // unsigned long s0; ++ // unsigned long s1; ++ // unsigned long a0; ++ // unsigned long a1; ++ // unsigned long a2; ++ // unsigned long a3; ++ // unsigned long a4; ++ // unsigned long a5; ++ // unsigned long a6; ++ // unsigned long a7; ++ // unsigned long s2; ++ // unsigned long s3; ++ // unsigned long s4; ++ // unsigned long s5; ++ // unsigned long s6; ++ // unsigned long s7; ++ // unsigned long s8; ++ // unsigned long s9; ++ // unsigned long s10; ++ // unsigned long s11; ++ // unsigned long t3; ++ // unsigned long t4; ++ // unsigned long t5; ++ // unsigned long t6; ++ // }; ++ ++ // NOTE: the indices for the various registers must be maintained as ++ // listed across various operating systems. However, only a small ++ // subset of the registers' values are guaranteed to be present (and ++ // must be present for the SA's stack walking to work) ++ ++ // One instance of the Native annotation is enough to trigger header generation ++ // for this file. ++ @Native ++ public static final int R0 = 0; ++ public static final int R1 = 1; ++ public static final int R2 = 2; ++ public static final int R3 = 3; ++ public static final int R4 = 4; ++ public static final int R5 = 5; ++ public static final int R6 = 6; ++ public static final int R7 = 7; ++ public static final int R8 = 8; ++ public static final int R9 = 9; ++ public static final int R10 = 10; ++ public static final int R11 = 11; ++ public static final int R12 = 12; ++ public static final int R13 = 13; ++ public static final int R14 = 14; ++ public static final int R15 = 15; ++ public static final int R16 = 16; ++ public static final int R17 = 17; ++ public static final int R18 = 18; ++ public static final int R19 = 19; ++ public static final int R20 = 20; ++ public static final int R21 = 21; ++ public static final int R22 = 22; ++ public static final int R23 = 23; ++ public static final int R24 = 24; ++ public static final int R25 = 25; ++ public static final int R26 = 26; ++ public static final int R27 = 27; ++ public static final int R28 = 28; ++ public static final int R29 = 29; ++ public static final int R30 = 30; ++ public static final int R31 = 31; ++ ++ public static final int NPRGREG = 32; ++ ++ public static final int PC = R0; ++ public static final int LR = R1; ++ public static final int SP = R2; ++ public static final int FP = R8; ++ ++ private long[] data; ++ ++ public RISCV64ThreadContext() { ++ data = new long[NPRGREG]; ++ } ++ ++ public int getNumRegisters() { ++ return NPRGREG; ++ } ++ ++ public String getRegisterName(int index) { ++ switch (index) { ++ case LR: return "lr"; ++ case SP: return "sp"; ++ case PC: return "pc"; ++ default: ++ return "r" + index; ++ } ++ } ++ ++ public void setRegister(int index, long value) { ++ data[index] = value; ++ } ++ ++ public long getRegister(int index) { ++ return data[index]; ++ } ++ ++ public CFrame getTopFrame(Debugger dbg) { ++ return null; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ * tie the implementation to, for example, the debugging system */ ++ public abstract void setRegisterAsAddress(int index, Address value); ++ ++ /** This can't be implemented in this class since we would have to ++ * tie the implementation to, for example, the debugging system */ ++ public abstract Address getRegisterAsAddress(int index); ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +index 190062785a7..89d676fe3b9 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -38,6 +38,7 @@ + import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; ++import sun.jvm.hotspot.runtime.linux_riscv64.LinuxRISCV64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess; + import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; +@@ -99,6 +100,8 @@ private static synchronized void initialize(TypeDataBase db) { + access = new LinuxPPC64JavaThreadPDAccess(); + } else if (cpu.equals("aarch64")) { + access = new LinuxAARCH64JavaThreadPDAccess(); ++ } else if (cpu.equals("riscv64")) { ++ access = new LinuxRISCV64JavaThreadPDAccess(); + } else { + try { + access = (JavaThreadPDAccess) +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java +new file mode 100644 +index 00000000000..f2e224f28ee +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java +@@ -0,0 +1,134 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.linux_riscv64; ++ ++import java.io.*; ++import java.util.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.runtime.riscv64.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++import sun.jvm.hotspot.utilities.Observable; ++import sun.jvm.hotspot.utilities.Observer; ++ ++public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess { ++ private static AddressField lastJavaFPField; ++ private static AddressField osThreadField; ++ ++ // Field from OSThread ++ private static CIntegerField osThreadThreadIDField; ++ ++ // This is currently unneeded but is being kept in case we change ++ // the currentFrameGuess algorithm ++ private static final long GUESS_SCAN_RANGE = 128 * 1024; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaThread"); ++ osThreadField = type.getAddressField("_osthread"); ++ ++ Type anchorType = db.lookupType("JavaFrameAnchor"); ++ lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); ++ ++ Type osThreadType = db.lookupType("OSThread"); ++ osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); ++ } ++ ++ public Address getLastJavaFP(Address addr) { ++ return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); ++ } ++ ++ public Address getLastJavaPC(Address addr) { ++ return null; ++ } ++ ++ public Address getBaseOfStackPointer(Address addr) { ++ return null; ++ } ++ ++ public Frame getLastFramePD(JavaThread thread, Address addr) { ++ Address fp = thread.getLastJavaFP(); ++ if (fp == null) { ++ return null; // no information ++ } ++ return new RISCV64Frame(thread.getLastJavaSP(), fp); ++ } ++ ++ public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { ++ return new RISCV64RegisterMap(thread, updateMap); ++ } ++ ++ public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext(); ++ RISCV64CurrentFrameGuess guesser = new RISCV64CurrentFrameGuess(context, thread); ++ if (!guesser.run(GUESS_SCAN_RANGE)) { ++ return null; ++ } ++ if (guesser.getPC() == null) { ++ return new RISCV64Frame(guesser.getSP(), guesser.getFP()); ++ } else { ++ return new RISCV64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); ++ } ++ } ++ ++ public void printThreadIDOn(Address addr, PrintStream tty) { ++ tty.print(getThreadProxy(addr)); ++ } ++ ++ public void printInfoOn(Address threadAddr, PrintStream tty) { ++ tty.print("Thread id: "); ++ printThreadIDOn(threadAddr, tty); ++ } ++ ++ public Address getLastSP(Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext(); ++ return context.getRegisterAsAddress(RISCV64ThreadContext.SP); ++ } ++ ++ public ThreadProxy getThreadProxy(Address addr) { ++ // Addr is the address of the JavaThread. ++ // Fetch the OSThread (for now and for simplicity, not making a ++ // separate "OSThread" class in this package) ++ Address osThreadAddr = osThreadField.getValue(addr); ++ // Get the address of the _thread_id from the OSThread ++ Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); ++ ++ JVMDebugger debugger = VM.getVM().getDebugger(); ++ return debugger.getThreadForIdentifierAddress(threadIdAddr); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java +new file mode 100644 +index 00000000000..34701c6922f +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java +@@ -0,0 +1,223 @@ ++/* ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.interpreter.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.runtime.riscv64.*; ++ ++/**

Should be able to be used on all riscv64 platforms we support ++ (Linux/riscv64) to implement JavaThread's "currentFrameGuess()" ++ functionality. Input is an RISCV64ThreadContext; output is SP, FP, ++ and PC for an RISCV64Frame. Instantiation of the RISCV64Frame is ++ left to the caller, since we may need to subclass RISCV64Frame to ++ support signal handler frames on Unix platforms.

++ ++

Algorithm is to walk up the stack within a given range (say, ++ 512K at most) looking for a plausible PC and SP for a Java frame, ++ also considering those coming in from the context. If we find a PC ++ that belongs to the VM (i.e., in generated code like the ++ interpreter or CodeCache) then we try to find an associated FP. ++ We repeat this until we either find a complete frame or run out of ++ stack to look at.

*/ ++ ++public class RISCV64CurrentFrameGuess { ++ private RISCV64ThreadContext context; ++ private JavaThread thread; ++ private Address spFound; ++ private Address fpFound; ++ private Address pcFound; ++ ++ private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.riscv64.RISCV64Frame.DEBUG") ++ != null; ++ ++ public RISCV64CurrentFrameGuess(RISCV64ThreadContext context, ++ JavaThread thread) { ++ this.context = context; ++ this.thread = thread; ++ } ++ ++ /** Returns false if not able to find a frame within a reasonable range. */ ++ public boolean run(long regionInBytesToSearch) { ++ Address sp = context.getRegisterAsAddress(RISCV64ThreadContext.SP); ++ Address pc = context.getRegisterAsAddress(RISCV64ThreadContext.PC); ++ Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP); ++ if (sp == null) { ++ // Bail out if no last java frame either ++ if (thread.getLastJavaSP() != null) { ++ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); ++ return true; ++ } ++ return false; ++ } ++ Address end = sp.addOffsetTo(regionInBytesToSearch); ++ VM vm = VM.getVM(); ++ ++ setValues(null, null, null); // Assume we're not going to find anything ++ ++ if (vm.isJavaPCDbg(pc)) { ++ if (vm.isClientCompiler()) { ++ // If the topmost frame is a Java frame, we are (pretty much) ++ // guaranteed to have a viable FP. We should be more robust ++ // than this (we have the potential for losing entire threads' ++ // stack traces) but need to see how much work we really have ++ // to do here. Searching the stack for an (SP, FP) pair is ++ // hard since it's easy to misinterpret inter-frame stack ++ // pointers as base-of-frame pointers; we also don't know the ++ // sizes of C1 frames (not registered in the nmethod) so can't ++ // derive them from SP. ++ ++ setValues(sp, fp, pc); ++ return true; ++ } else { ++ if (vm.getInterpreter().contains(pc)) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + ++ sp + ", fp = " + fp + ", pc = " + pc); ++ } ++ setValues(sp, fp, pc); ++ return true; ++ } ++ ++ // For the server compiler, FP is not guaranteed to be valid ++ // for compiled code. In addition, an earlier attempt at a ++ // non-searching algorithm (see below) failed because the ++ // stack pointer from the thread context was pointing ++ // (considerably) beyond the ostensible end of the stack, into ++ // garbage; walking from the topmost frame back caused a crash. ++ // ++ // This algorithm takes the current PC as a given and tries to ++ // find the correct corresponding SP by walking up the stack ++ // and repeatedly performing stackwalks (very inefficient). ++ // ++ // FIXME: there is something wrong with stackwalking across ++ // adapter frames...this is likely to be the root cause of the ++ // failure with the simpler algorithm below. ++ ++ for (long offset = 0; ++ offset < regionInBytesToSearch; ++ offset += vm.getAddressSize()) { ++ try { ++ Address curSP = sp.addOffsetTo(offset); ++ Frame frame = new RISCV64Frame(curSP, null, pc); ++ RegisterMap map = thread.newRegisterMap(false); ++ while (frame != null) { ++ if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { ++ // We were able to traverse all the way to the ++ // bottommost Java frame. ++ // This sp looks good. Keep it. ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); ++ } ++ setValues(curSP, null, pc); ++ return true; ++ } ++ frame = frame.sender(map); ++ } ++ } catch (Exception e) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); ++ } ++ // Bad SP. Try another. ++ } ++ } ++ ++ // Were not able to find a plausible SP to go with this PC. ++ // Bail out. ++ return false; ++ } ++ } else { ++ // If the current program counter was not known to us as a Java ++ // PC, we currently assume that we are in the run-time system ++ // and attempt to look to thread-local storage for saved SP and ++ // FP. Note that if these are null (because we were, in fact, ++ // in Java code, i.e., vtable stubs or similar, and the SA ++ // didn't have enough insight into the target VM to understand ++ // that) then we are going to lose the entire stack trace for ++ // the thread, which is sub-optimal. FIXME. ++ ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + ++ thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); ++ } ++ if (thread.getLastJavaSP() == null) { ++ return false; // No known Java frames on stack ++ } ++ ++ // The runtime has a nasty habit of not saving fp in the frame ++ // anchor, leaving us to grovel about in the stack to find a ++ // plausible address. Fortunately, this only happens in ++ // compiled code; there we always have a valid PC, and we always ++ // push LR and FP onto the stack as a pair, with FP at the lower ++ // address. ++ pc = thread.getLastJavaPC(); ++ fp = thread.getLastJavaFP(); ++ sp = thread.getLastJavaSP(); ++ ++ if (fp == null) { ++ CodeCache cc = vm.getCodeCache(); ++ if (cc.contains(pc)) { ++ CodeBlob cb = cc.findBlob(pc); ++ if (DEBUG) { ++ System.out.println("FP is null. Found blob frame size " + cb.getFrameSize()); ++ } ++ // See if we can derive a frame pointer from SP and PC ++ long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize(); ++ if (link_offset >= 0) { ++ fp = sp.addOffsetTo(link_offset); ++ } ++ } ++ } ++ ++ // We found a PC in the frame anchor. Check that it's plausible, and ++ // if it is, use it. ++ if (vm.isJavaPCDbg(pc)) { ++ setValues(sp, fp, pc); ++ } else { ++ setValues(sp, fp, null); ++ } ++ ++ return true; ++ } ++ } ++ ++ public Address getSP() { return spFound; } ++ public Address getFP() { return fpFound; } ++ /** May be null if getting values from thread-local storage; take ++ care to call the correct RISCV64Frame constructor to recover this if ++ necessary */ ++ public Address getPC() { return pcFound; } ++ ++ private void setValues(Address sp, Address fp, Address pc) { ++ spFound = sp; ++ fpFound = fp; ++ pcFound = pc; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java +new file mode 100644 +index 00000000000..df280005d72 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java +@@ -0,0 +1,556 @@ ++/* ++ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Red Hat Inc. ++ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.riscv64; ++ ++import java.util.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.compiler.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.oops.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++import sun.jvm.hotspot.utilities.Observable; ++import sun.jvm.hotspot.utilities.Observer; ++ ++/** Specialization of and implementation of abstract methods of the ++ Frame class for the riscv64 family of CPUs. */ ++ ++public class RISCV64Frame extends Frame { ++ private static final boolean DEBUG; ++ static { ++ DEBUG = System.getProperty("sun.jvm.hotspot.runtime.RISCV64.RISCV64Frame.DEBUG") != null; ++ } ++ ++ // Java frames ++ private static final int LINK_OFFSET = -2; ++ private static final int RETURN_ADDR_OFFSET = -1; ++ private static final int SENDER_SP_OFFSET = 0; ++ ++ // Interpreter frames ++ private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -3; ++ private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; ++ private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only ++ private static int INTERPRETER_FRAME_PADDING_OFFSET; ++ private static int INTERPRETER_FRAME_MIRROR_OFFSET; ++ private static int INTERPRETER_FRAME_CACHE_OFFSET; ++ private static int INTERPRETER_FRAME_LOCALS_OFFSET; ++ private static int INTERPRETER_FRAME_BCX_OFFSET; ++ private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET; ++ private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET; ++ ++ // Entry frames ++ private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -10; ++ ++ // Native frames ++ private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET = 2; ++ ++ private static VMReg fp = new VMReg(8); ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; ++ INTERPRETER_FRAME_PADDING_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; ++ INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_PADDING_OFFSET - 1; ++ INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; ++ INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; ++ INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; ++ INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; ++ INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ } ++ ++ ++ // an additional field beyond sp and pc: ++ Address raw_fp; // frame pointer ++ private Address raw_unextendedSP; ++ ++ private RISCV64Frame() { ++ } ++ ++ private void adjustForDeopt() { ++ if ( pc != null) { ++ // Look for a deopt pc and if it is deopted convert to original pc ++ CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); ++ if (cb != null && cb.isJavaMethod()) { ++ NMethod nm = (NMethod) cb; ++ if (pc.equals(nm.deoptHandlerBegin())) { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); ++ } ++ // adjust pc if frame is deoptimized. ++ pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); ++ deoptimized = true; ++ } ++ } ++ } ++ } ++ ++ public RISCV64Frame(Address raw_sp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("RISCV64Frame(sp, fp, pc): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public RISCV64Frame(Address raw_sp, Address raw_fp) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ ++ // We cannot assume SP[-1] always contains a valid return PC (e.g. if ++ // the callee is a C/C++ compiled frame). If the PC is not known to ++ // Java then this.pc is null. ++ Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ if (VM.getVM().isJavaPCDbg(savedPC)) { ++ this.pc = savedPC; ++ } ++ ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("RISCV64Frame(sp, fp): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public RISCV64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_unextendedSp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("RISCV64Frame(sp, unextendedSP, fp, pc): " + this); ++ dumpStack(); ++ } ++ ++ } ++ ++ public Object clone() { ++ RISCV64Frame frame = new RISCV64Frame(); ++ frame.raw_sp = raw_sp; ++ frame.raw_unextendedSP = raw_unextendedSP; ++ frame.raw_fp = raw_fp; ++ frame.pc = pc; ++ frame.deoptimized = deoptimized; ++ return frame; ++ } ++ ++ public boolean equals(Object arg) { ++ if (arg == null) { ++ return false; ++ } ++ ++ if (!(arg instanceof RISCV64Frame)) { ++ return false; ++ } ++ ++ RISCV64Frame other = (RISCV64Frame) arg; ++ ++ return (AddressOps.equal(getSP(), other.getSP()) && ++ AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && ++ AddressOps.equal(getFP(), other.getFP()) && ++ AddressOps.equal(getPC(), other.getPC())); ++ } ++ ++ public int hashCode() { ++ if (raw_sp == null) { ++ return 0; ++ } ++ ++ return raw_sp.hashCode(); ++ } ++ ++ public String toString() { ++ return "sp: " + (getSP() == null? "null" : getSP().toString()) + ++ ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + ++ ", fp: " + (getFP() == null? "null" : getFP().toString()) + ++ ", pc: " + (pc == null? "null" : pc.toString()); ++ } ++ ++ // accessors for the instance variables ++ public Address getFP() { return raw_fp; } ++ public Address getSP() { return raw_sp; } ++ public Address getID() { return raw_sp; } ++ ++ // FIXME: not implemented yet ++ public boolean isSignalHandlerFrameDbg() { return false; } ++ public int getSignalNumberDbg() { return 0; } ++ public String getSignalNameDbg() { return null; } ++ ++ public boolean isInterpretedFrameValid() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "Not an interpreted frame"); ++ } ++ ++ // These are reasonable sanity checks ++ if (getFP() == null || getFP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getSP() == null || getSP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { ++ return false; ++ } ++ ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (getFP().lessThanOrEqual(getSP())) { ++ // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { ++ // stack frames shouldn't be large. ++ return false; ++ } ++ ++ return true; ++ } ++ ++ public Frame sender(RegisterMap regMap, CodeBlob cb) { ++ RISCV64RegisterMap map = (RISCV64RegisterMap) regMap; ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map.setIncludeArgumentOops(false); ++ ++ if (isEntryFrame()) return senderForEntryFrame(map); ++ if (isInterpretedFrame()) return senderForInterpreterFrame(map); ++ ++ if(cb == null) { ++ cb = VM.getVM().getCodeCache().findBlob(getPC()); ++ } else { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); ++ } ++ } ++ ++ if (cb != null) { ++ return senderForCompiledFrame(map, cb); ++ } ++ ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return new RISCV64Frame(getSenderSP(), getLink(), getSenderPC()); ++ } ++ ++ private Frame senderForEntryFrame(RISCV64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForEntryFrame"); ++ } ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ RISCV64JavaCallWrapper jcw = (RISCV64JavaCallWrapper) getEntryFrameCallWrapper(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); ++ Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); ++ } ++ RISCV64Frame fr; ++ if (jcw.getLastJavaPC() != null) { ++ fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); ++ } else { ++ fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); ++ } ++ map.clear(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); ++ } ++ return fr; ++ } ++ ++ //------------------------------------------------------------------------------ ++ // frame::adjust_unextended_sp ++ private void adjustUnextendedSP() { ++ // If we are returning to a compiled MethodHandle call site, the ++ // saved_fp will in fact be a saved value of the unextended SP. The ++ // simplest way to tell whether we are returning to such a call site ++ // is as follows: ++ ++ CodeBlob cb = cb(); ++ NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); ++ if (senderNm != null) { ++ // If the sender PC is a deoptimization point, get the original ++ // PC. For MethodHandle call site the unextended_sp is stored in ++ // saved_fp. ++ if (senderNm.isDeoptMhEntry(getPC())) { ++ raw_unextendedSP = getFP(); ++ } ++ else if (senderNm.isDeoptEntry(getPC())) { ++ } ++ else if (senderNm.isMethodHandleReturn(getPC())) { ++ raw_unextendedSP = getFP(); ++ } ++ } ++ } ++ ++ private Frame senderForInterpreterFrame(RISCV64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForInterpreterFrame"); ++ } ++ Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ Address sp = addressOfStackSlot(SENDER_SP_OFFSET); ++ // We do not need to update the callee-save register mapping because above ++ // us is either another interpreter frame or a converter-frame, but never ++ // directly a compiled frame. ++ // 11/24/04 SFG. With the removal of adapter frames this is no longer true. ++ // However c2 no longer uses callee save register for java calls so there ++ // are no callee register to find. ++ ++ if (map.getUpdateMap()) ++ updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); ++ ++ return new RISCV64Frame(sp, unextendedSP, getLink(), getSenderPC()); ++ } ++ ++ private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { ++ map.setLocation(fp, savedFPAddr); ++ } ++ ++ private Frame senderForCompiledFrame(RISCV64RegisterMap map, CodeBlob cb) { ++ if (DEBUG) { ++ System.out.println("senderForCompiledFrame"); ++ } ++ ++ // ++ // NOTE: some of this code is (unfortunately) duplicated RISCV64CurrentFrameGuess ++ // ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // frame owned by optimizing compiler ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); ++ } ++ Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); ++ ++ // The return_address is always the word on the stack ++ Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ ++ // This is the saved value of FP which may or may not really be an FP. ++ // It is only an FP if the sender is an interpreter frame. ++ Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize()); ++ ++ if (map.getUpdateMap()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map.setIncludeArgumentOops(cb.callerMustGCArguments()); ++ ++ if (cb.getOopMaps() != null) { ++ ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); ++ } ++ ++ // Since the prolog does the save and restore of FP there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ updateMapWithSavedLink(map, savedFPAddr); ++ } ++ ++ return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); ++ } ++ ++ protected boolean hasSenderPD() { ++ return true; ++ } ++ ++ public long frameSize() { ++ return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); ++ } ++ ++ public Address getLink() { ++ try { ++ if (DEBUG) { ++ System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET) ++ + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0)); ++ } ++ return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); ++ } catch (Exception e) { ++ if (DEBUG) ++ System.out.println("Returning null"); ++ return null; ++ } ++ } ++ ++ public Address getUnextendedSP() { return raw_unextendedSP; } ++ ++ // Return address: ++ public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } ++ public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } ++ ++ // return address of param, zero origin index. ++ public Address getNativeParamAddr(int idx) { ++ return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx); ++ } ++ ++ public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } ++ ++ public Address addressOfInterpreterFrameLocals() { ++ return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); ++ } ++ ++ private Address addressOfInterpreterFrameBCX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); ++ } ++ ++ public int getInterpreterFrameBCI() { ++ // FIXME: this is not atomic with respect to GC and is unsuitable ++ // for use in a non-debugging, or reflective, system. Need to ++ // figure out how to express this. ++ Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); ++ Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); ++ Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); ++ return bcpToBci(bcp, method); ++ } ++ ++ public Address addressOfInterpreterFrameMDX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); ++ } ++ ++ // expression stack ++ // (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++ public Address addressOfInterpreterFrameExpressionStack() { ++ Address monitorEnd = interpreterFrameMonitorEnd().address(); ++ return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); ++ } ++ ++ public int getInterpreterFrameExpressionStackDirection() { return -1; } ++ ++ // top of expression stack ++ public Address addressOfInterpreterFrameTOS() { ++ return getSP(); ++ } ++ ++ /** Expression stack from top down */ ++ public Address addressOfInterpreterFrameTOSAt(int slot) { ++ return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); ++ } ++ ++ public Address getInterpreterFrameSenderSP() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "interpreted frame expected"); ++ } ++ return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ } ++ ++ // Monitors ++ public BasicObjectLock interpreterFrameMonitorBegin() { ++ return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); ++ } ++ ++ public BasicObjectLock interpreterFrameMonitorEnd() { ++ Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); ++ if (Assert.ASSERTS_ENABLED) { ++ // make sure the pointer points inside the frame ++ Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); ++ Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); ++ } ++ return new BasicObjectLock(result); ++ } ++ ++ public int interpreterFrameMonitorSize() { ++ return BasicObjectLock.size(); ++ } ++ ++ // Method ++ public Address addressOfInterpreterFrameMethod() { ++ return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); ++ } ++ ++ // Constant pool cache ++ public Address addressOfInterpreterFrameCPCache() { ++ return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); ++ } ++ ++ // Entry frames ++ public JavaCallWrapper getEntryFrameCallWrapper() { ++ return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); ++ } ++ ++ protected Address addressOfSavedOopResult() { ++ // offset is 2 for compiler2 and 3 for compiler1 ++ return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * ++ VM.getVM().getAddressSize()); ++ } ++ ++ protected Address addressOfSavedReceiver() { ++ return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); ++ } ++ ++ private void dumpStack() { ++ for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); ++ AddressOps.lt(addr, getSP()); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ System.out.println("-----------------------"); ++ for (Address addr = getSP(); ++ AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java +new file mode 100644 +index 00000000000..d0ad2b559a6 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java +@@ -0,0 +1,61 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.riscv64; ++ ++import java.util.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.utilities.*; ++import sun.jvm.hotspot.utilities.Observable; ++import sun.jvm.hotspot.utilities.Observer; ++ ++public class RISCV64JavaCallWrapper extends JavaCallWrapper { ++ private static AddressField lastJavaFPField; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaFrameAnchor"); ++ ++ lastJavaFPField = type.getAddressField("_last_Java_fp"); ++ } ++ ++ public RISCV64JavaCallWrapper(Address addr) { ++ super(addr); ++ } ++ ++ public Address getLastJavaFP() { ++ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java +new file mode 100644 +index 00000000000..4aeb1c6f557 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class RISCV64RegisterMap extends RegisterMap { ++ ++ /** This is the only public constructor */ ++ public RISCV64RegisterMap(JavaThread thread, boolean updateMap) { ++ super(thread, updateMap); ++ } ++ ++ protected RISCV64RegisterMap(RegisterMap map) { ++ super(map); ++ } ++ ++ public Object clone() { ++ RISCV64RegisterMap retval = new RISCV64RegisterMap(this); ++ return retval; ++ } ++ ++ // no PD state to clear or copy: ++ protected void clearPD() {} ++ protected void initializePD() {} ++ protected void initializeFromPD(RegisterMap map) {} ++ protected Address getLocationPD(VMReg reg) { return null; } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +index 7d7a6107cab..6552ce255fc 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -54,7 +54,7 @@ public static String getOS() throws UnsupportedPlatformException { + + public static boolean knownCPU(String cpu) { + final String[] KNOWN = +- new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"}; ++ new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "riscv64"}; + + for(String s : KNOWN) { + if(s.equals(cpu)) +diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java +index 7805918c28a..823b9f39dbf 100644 +--- a/test/hotspot/jtreg/compiler/c2/TestBit.java ++++ b/test/hotspot/jtreg/compiler/c2/TestBit.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -34,7 +34,7 @@ + * + * @run driver compiler.c2.TestBit + * +- * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" ++ * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64" + * @requires vm.debug == true & vm.compiler2.enabled + */ + public class TestBit { +@@ -54,7 +54,8 @@ static void runTest(String testName) throws Exception { + String expectedTestBitInstruction = + "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" : + "aarch64".equals(System.getProperty("os.arch")) ? "tb" : +- "amd64".equals(System.getProperty("os.arch")) ? "test" : null; ++ "amd64".equals(System.getProperty("os.arch")) ? "test" : ++ "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null; + + if (expectedTestBitInstruction != null) { + output.shouldContain(expectedTestBitInstruction); +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java +index 558b4218f0b..55374b116e6 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -42,6 +42,7 @@ + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; + import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; + +@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable { + SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), ++ new GenericTestCaseForUnsupportedRISCV64CPU( ++ SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), + new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( + SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), + new GenericTestCaseForOtherCPU( +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java +index 3ed72bf0a99..8fb82ee4531 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -42,6 +42,7 @@ + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; + import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; + +@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable { + SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), ++ new GenericTestCaseForUnsupportedRISCV64CPU( ++ SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), + new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( + SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), + new GenericTestCaseForOtherCPU( +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java +index c05cf309dae..aca32137eda 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -42,6 +42,7 @@ + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; + import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; + +@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable { + SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), ++ new GenericTestCaseForUnsupportedRISCV64CPU( ++ SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), + new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( + SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), + new GenericTestCaseForOtherCPU( +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java +index 58ce5366bae..8deac4f7895 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -41,6 +41,7 @@ + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; + import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU; + +@@ -53,6 +54,8 @@ public static void main(String args[]) throws Throwable { + SHAOptionsBase.USE_SHA_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA_OPTION), ++ new GenericTestCaseForUnsupportedRISCV64CPU( ++ SHAOptionsBase.USE_SHA_OPTION), + new UseSHASpecificTestCaseForUnsupportedCPU( + SHAOptionsBase.USE_SHA_OPTION), + new GenericTestCaseForOtherCPU( +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +index faa9fdbae67..26635002040 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -32,26 +32,27 @@ + + /** + * Generic test case for SHA-related options targeted to any CPU except +- * AArch64, PPC, S390x, SPARC and X86. ++ * AArch64, RISCV64, PPC, S390x, SPARC and X86. + */ + public class GenericTestCaseForOtherCPU extends + SHAOptionsBase.TestCase { + public GenericTestCaseForOtherCPU(String optionName) { +- // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86. ++ // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, SPARC and X86. + super(optionName, new NotPredicate( + new OrPredicate(Platform::isAArch64, ++ new OrPredicate(Platform::isRISCV64, + new OrPredicate(Platform::isS390x, + new OrPredicate(Platform::isSparc, + new OrPredicate(Platform::isPPC, + new OrPredicate(Platform::isX64, +- Platform::isX86))))))); ++ Platform::isX86)))))))); + } + + @Override + protected void verifyWarnings() throws Throwable { + String shouldPassMessage = String.format("JVM should start with " + + "option '%s' without any warnings", optionName); +- // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of ++ // Verify that on non-x86, non-RISCV64, non-SPARC and non-AArch64 CPU usage of + // SHA-related options will not cause any warnings. + CommandLineOptionTest.verifySameJVMStartup(null, + new String[] { ".*" + optionName + ".*" }, shouldPassMessage, +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java +new file mode 100644 +index 00000000000..2ecfec07a4c +--- /dev/null ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java +@@ -0,0 +1,115 @@ ++/* ++ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++package compiler.intrinsics.sha.cli.testcases; ++ ++import compiler.intrinsics.sha.cli.DigestOptionsBase; ++import jdk.test.lib.process.ExitCode; ++import jdk.test.lib.Platform; ++import jdk.test.lib.cli.CommandLineOptionTest; ++import jdk.test.lib.cli.predicate.AndPredicate; ++import jdk.test.lib.cli.predicate.NotPredicate; ++ ++/** ++ * Generic test case for SHA-related options targeted to RISCV64 CPUs ++ * which don't support instruction required by the tested option. ++ */ ++public class GenericTestCaseForUnsupportedRISCV64CPU extends ++ DigestOptionsBase.TestCase { ++ ++ final private boolean checkUseSHA; ++ ++ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) { ++ this(optionName, true); ++ } ++ ++ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) { ++ super(optionName, new AndPredicate(Platform::isRISCV64, ++ new NotPredicate(DigestOptionsBase.getPredicateForOption( ++ optionName)))); ++ ++ this.checkUseSHA = checkUseSHA; ++ } ++ ++ @Override ++ protected void verifyWarnings() throws Throwable { ++ String shouldPassMessage = String.format("JVM startup should pass with" ++ + "option '-XX:-%s' without any warnings", optionName); ++ //Verify that option could be disabled without any warnings. ++ CommandLineOptionTest.verifySameJVMStartup(null, new String[] { ++ DigestOptionsBase.getWarningForUnsupportedCPU(optionName) ++ }, shouldPassMessage, shouldPassMessage, ExitCode.OK, ++ DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag(optionName, false)); ++ ++ if (checkUseSHA) { ++ shouldPassMessage = String.format("If JVM is started with '-XX:-" ++ + "%s' '-XX:+%s', output should contain warning.", ++ DigestOptionsBase.USE_SHA_OPTION, optionName); ++ ++ // Verify that when the tested option is enabled, then ++ // a warning will occur in VM output if UseSHA is disabled. ++ if (!optionName.equals(DigestOptionsBase.USE_SHA_OPTION)) { ++ CommandLineOptionTest.verifySameJVMStartup( ++ new String[] { DigestOptionsBase.getWarningForUnsupportedCPU(optionName) }, ++ null, ++ shouldPassMessage, ++ shouldPassMessage, ++ ExitCode.OK, ++ DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag(DigestOptionsBase.USE_SHA_OPTION, false), ++ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); ++ } ++ } ++ } ++ ++ @Override ++ protected void verifyOptionValues() throws Throwable { ++ // Verify that option is disabled by default. ++ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", ++ String.format("Option '%s' should be disabled by default", ++ optionName), ++ DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); ++ ++ if (checkUseSHA) { ++ // Verify that option is disabled even if it was explicitly enabled ++ // using CLI options. ++ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", ++ String.format("Option '%s' should be off on unsupported " ++ + "RISCV64CPU even if set to true directly", optionName), ++ DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); ++ ++ // Verify that option is disabled when +UseSHA was passed to JVM. ++ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", ++ String.format("Option '%s' should be off on unsupported " ++ + "RISCV64CPU even if %s flag set to JVM", ++ optionName, CommandLineOptionTest.prepareBooleanFlag( ++ DigestOptionsBase.USE_SHA_OPTION, true)), ++ DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag( ++ DigestOptionsBase.USE_SHA_OPTION, true)); ++ } ++ } ++} +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java +index 2e3e2717a65..7be8af6d035 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java +index 0e06a9e4327..797927b42bf 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java +index c3cdbf37464..be8f7d586c2 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java +index d33bd411f16..d96d5e29c00 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions + * -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java +index 992fa4b5161..b09c873d05d 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8138583 + * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test +- * @requires os.arch=="aarch64" ++ * @requires os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java +index 3e79b3528b7..fe40ed6f98d 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8138583 + * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test +- * @requires os.arch=="aarch64" ++ * @requires os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java +index 6603dd224ef..51631910493 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8135028 + * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java +index d9a0c988004..d999ae423cf 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java +index 722db95aed3..65912a5c7fa 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java +index f58f21feb23..fffdc2f7565 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +index 7774dabcb5f..7afe3560f30 100644 +--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java ++++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -61,15 +61,17 @@ public class IntrinsicPredicates { + + public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null), ++ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null), + new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha1" }, null), + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha1" }, null), + // x86 variants + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), +- new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null)))))); ++ new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null))))))); + + public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256" }, null), ++ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha256" }, null), + new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha256" }, null), + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, null), + new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), +@@ -79,10 +81,11 @@ public class IntrinsicPredicates { + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), +- new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); ++ new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))))); + + public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512" }, null), ++ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha512" }, null), + new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha512" }, null), + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha512" }, null), + new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), +@@ -92,7 +95,7 @@ public class IntrinsicPredicates { + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), +- new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); ++ new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))))); + + public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE + = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE, +diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java +index 57256aa5a32..d4d43b01ae6 100644 +--- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java ++++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -112,7 +112,7 @@ public static void main(String args[]) throws Exception { + // It's ok for ARM not to have symbols, because it does not support NMT detail + // when targeting thumb2. It's also ok for Windows not to have symbols, because + // they are only available if the symbols file is included with the build. +- if (Platform.isWindows() || Platform.isARM()) { ++ if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) { + return; // we are done + } + output.reportDiagnosticSummary(); +diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +index 127bb6abcd9..eab19273ad8 100644 +--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java ++++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -239,7 +239,7 @@ private static boolean isAlwaysSupportedPlatform() { + return Platform.isAix() || + (Platform.isLinux() && + (Platform.isPPC() || Platform.isS390x() || Platform.isX64() || +- Platform.isX86())) || ++ Platform.isX86() || Platform.isRISCV64())) || + Platform.isOSX() || + Platform.isSolaris(); + } +diff --git a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java +index 54640b245f8..f0b7aed5ceb 100644 +--- a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java ++++ b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java +@@ -1,5 +1,4 @@ + /* +- * Copyright (c) 2018, Google and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +index 77458554b76..d4bfe31dd7a 100644 +--- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java ++++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +@@ -45,7 +45,7 @@ + */ + public class TestMutuallyExclusivePlatformPredicates { + private static enum MethodGroup { +- ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), ++ ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), + BITNESS("is32bit", "is64bit"), + OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"), + VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"), +diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java +index 7990c49a1f6..abeff80e5e8 100644 +--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java ++++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -54,8 +54,8 @@ public static void main(String[] args) throws Throwable { + Events.assertField(event, "hwThreads").atLeast(1); + Events.assertField(event, "cores").atLeast(1); + Events.assertField(event, "sockets").atLeast(1); +- Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390"); +- Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390"); ++ Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); ++ Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); + } + } + } +diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java +index f4ee0546c70..635cdd18054 100644 +--- a/test/lib/jdk/test/lib/Platform.java ++++ b/test/lib/jdk/test/lib/Platform.java +@@ -202,6 +202,10 @@ public static boolean isARM() { + return isArch("arm.*"); + } + ++ public static boolean isRISCV64() { ++ return isArch("riscv64"); ++ } ++ + public static boolean isPPC() { + return isArch("ppc.*"); + } + +From c51e546566c937354842a27696bd2221087101ae Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 28 Mar 2023 16:30:04 +0800 +Subject: [PATCH 002/140] Drop zgc part + +--- + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 6 +- + .../riscv/gc/z/zBarrierSetAssembler_riscv.cpp | 441 ------------------ + .../riscv/gc/z/zBarrierSetAssembler_riscv.hpp | 101 ---- + src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp | 212 --------- + src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp | 36 -- + src/hotspot/cpu/riscv/gc/z/z_riscv64.ad | 233 --------- + .../cpu/riscv/macroAssembler_riscv.cpp | 46 -- + .../cpu/riscv/macroAssembler_riscv.hpp | 9 - + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 10 - + 9 files changed, 1 insertion(+), 1093 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp + delete mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp + delete mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp + delete mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp + delete mode 100644 src/hotspot/cpu/riscv/gc/z/z_riscv64.ad + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 742c2126e60..bba3bd4709c 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -871,11 +871,7 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch + if (UseCompressedOops && !wide) { + __ decode_heap_oop(dest->as_register()); + } +- +- if (!UseZGC) { +- // Load barrier has not yet been applied, so ZGC can't verify the oop here +- __ verify_oop(dest->as_register()); +- } ++ __ verify_oop(dest->as_register()); + } + } + +diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp +deleted file mode 100644 +index 3d3f4d4d774..00000000000 +--- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp ++++ /dev/null +@@ -1,441 +0,0 @@ +-/* +- * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "asm/macroAssembler.inline.hpp" +-#include "code/codeBlob.hpp" +-#include "code/vmreg.inline.hpp" +-#include "gc/z/zBarrier.inline.hpp" +-#include "gc/z/zBarrierSet.hpp" +-#include "gc/z/zBarrierSetAssembler.hpp" +-#include "gc/z/zBarrierSetRuntime.hpp" +-#include "gc/z/zThreadLocalData.hpp" +-#include "memory/resourceArea.hpp" +-#include "runtime/sharedRuntime.hpp" +-#include "utilities/macros.hpp" +-#ifdef COMPILER1 +-#include "c1/c1_LIRAssembler.hpp" +-#include "c1/c1_MacroAssembler.hpp" +-#include "gc/z/c1/zBarrierSetC1.hpp" +-#endif // COMPILER1 +-#ifdef COMPILER2 +-#include "gc/z/c2/zBarrierSetC2.hpp" +-#endif // COMPILER2 +- +-#ifdef PRODUCT +-#define BLOCK_COMMENT(str) /* nothing */ +-#else +-#define BLOCK_COMMENT(str) __ block_comment(str) +-#endif +- +-#undef __ +-#define __ masm-> +- +-void ZBarrierSetAssembler::load_at(MacroAssembler* masm, +- DecoratorSet decorators, +- BasicType type, +- Register dst, +- Address src, +- Register tmp1, +- Register tmp_thread) { +- if (!ZBarrierSet::barrier_needed(decorators, type)) { +- // Barrier not needed +- BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); +- return; +- } +- +- assert_different_registers(t1, src.base()); +- assert_different_registers(t0, t1, dst); +- +- Label done; +- +- // Load bad mask into temp register. +- __ la(t0, src); +- __ ld(t1, address_bad_mask_from_thread(xthread)); +- __ ld(dst, Address(t0)); +- +- // Test reference against bad mask. If mask bad, then we need to fix it up. +- __ andr(t1, dst, t1); +- __ beqz(t1, done); +- +- __ enter(); +- +- __ push_call_clobbered_registers_except(RegSet::of(dst)); +- +- if (c_rarg0 != dst) { +- __ mv(c_rarg0, dst); +- } +- +- __ mv(c_rarg1, t0); +- +- __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); +- +- // Make sure dst has the return value. +- if (dst != x10) { +- __ mv(dst, x10); +- } +- +- __ pop_call_clobbered_registers_except(RegSet::of(dst)); +- __ leave(); +- +- __ bind(done); +-} +- +-#ifdef ASSERT +- +-void ZBarrierSetAssembler::store_at(MacroAssembler* masm, +- DecoratorSet decorators, +- BasicType type, +- Address dst, +- Register val, +- Register tmp1, +- Register tmp2) { +- // Verify value +- if (is_reference_type(type)) { +- // Note that src could be noreg, which means we +- // are storing null and can skip verification. +- if (val != noreg) { +- Label done; +- +- // tmp1 and tmp2 are often set to noreg. +- RegSet savedRegs = RegSet::of(t0); +- __ push_reg(savedRegs, sp); +- +- __ ld(t0, address_bad_mask_from_thread(xthread)); +- __ andr(t0, val, t0); +- __ beqz(t0, done); +- __ stop("Verify oop store failed"); +- __ should_not_reach_here(); +- __ bind(done); +- __ pop_reg(savedRegs, sp); +- } +- } +- +- // Store value +- BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); +-} +- +-#endif // ASSERT +- +-void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, +- DecoratorSet decorators, +- bool is_oop, +- Register src, +- Register dst, +- Register count, +- RegSet saved_regs) { +- if (!is_oop) { +- // Barrier not needed +- return; +- } +- +- BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {"); +- +- assert_different_registers(src, count, t0); +- +- __ push_reg(saved_regs, sp); +- +- if (count == c_rarg0 && src == c_rarg1) { +- // exactly backwards!! +- __ xorr(c_rarg0, c_rarg0, c_rarg1); +- __ xorr(c_rarg1, c_rarg0, c_rarg1); +- __ xorr(c_rarg0, c_rarg0, c_rarg1); +- } else { +- __ mv(c_rarg0, src); +- __ mv(c_rarg1, count); +- } +- +- __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2); +- +- __ pop_reg(saved_regs, sp); +- +- BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue"); +-} +- +-void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, +- Register jni_env, +- Register robj, +- Register tmp, +- Label& slowpath) { +- BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {"); +- +- assert_different_registers(jni_env, robj, tmp); +- +- // Resolve jobject +- BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath); +- +- // Compute the offset of address bad mask from the field of jni_environment +- long int bad_mask_relative_offset = (long int) (in_bytes(ZThreadLocalData::address_bad_mask_offset()) - +- in_bytes(JavaThread::jni_environment_offset())); +- +- // Load the address bad mask +- __ ld(tmp, Address(jni_env, bad_mask_relative_offset)); +- +- // Check address bad mask +- __ andr(tmp, robj, tmp); +- __ bnez(tmp, slowpath); +- +- BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native"); +-} +- +-#ifdef COMPILER2 +- +-OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { +- if (!OptoReg::is_reg(opto_reg)) { +- return OptoReg::Bad; +- } +- +- const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); +- if (vm_reg->is_FloatRegister()) { +- return opto_reg & ~1; +- } +- +- return opto_reg; +-} +- +-#undef __ +-#define __ _masm-> +- +-class ZSaveLiveRegisters { +-private: +- MacroAssembler* const _masm; +- RegSet _gp_regs; +- FloatRegSet _fp_regs; +- VectorRegSet _vp_regs; +- +-public: +- void initialize(ZLoadBarrierStubC2* stub) { +- // Record registers that needs to be saved/restored +- RegMaskIterator rmi(stub->live()); +- while (rmi.has_next()) { +- const OptoReg::Name opto_reg = rmi.next(); +- if (OptoReg::is_reg(opto_reg)) { +- const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); +- if (vm_reg->is_Register()) { +- _gp_regs += RegSet::of(vm_reg->as_Register()); +- } else if (vm_reg->is_FloatRegister()) { +- _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister()); +- } else if (vm_reg->is_VectorRegister()) { +- const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegisterImpl::max_slots_per_register - 1)); +- _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister()); +- } else { +- fatal("Unknown register type"); +- } +- } +- } +- +- // Remove C-ABI SOE registers, tmp regs and _ref register that will be updated +- _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2) + RegSet::of(x8, x9) + RegSet::of(x5, stub->ref()); +- } +- +- ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : +- _masm(masm), +- _gp_regs(), +- _fp_regs(), +- _vp_regs() { +- // Figure out what registers to save/restore +- initialize(stub); +- +- // Save registers +- __ push_reg(_gp_regs, sp); +- __ push_fp(_fp_regs, sp); +- __ push_vp(_vp_regs, sp); +- } +- +- ~ZSaveLiveRegisters() { +- // Restore registers +- __ pop_vp(_vp_regs, sp); +- __ pop_fp(_fp_regs, sp); +- __ pop_reg(_gp_regs, sp); +- } +-}; +- +-class ZSetupArguments { +-private: +- MacroAssembler* const _masm; +- const Register _ref; +- const Address _ref_addr; +- +-public: +- ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : +- _masm(masm), +- _ref(stub->ref()), +- _ref_addr(stub->ref_addr()) { +- +- // Setup arguments +- if (_ref_addr.base() == noreg) { +- // No self healing +- if (_ref != c_rarg0) { +- __ mv(c_rarg0, _ref); +- } +- __ mv(c_rarg1, zr); +- } else { +- // Self healing +- if (_ref == c_rarg0) { +- // _ref is already at correct place +- __ la(c_rarg1, _ref_addr); +- } else if (_ref != c_rarg1) { +- // _ref is in wrong place, but not in c_rarg1, so fix it first +- __ la(c_rarg1, _ref_addr); +- __ mv(c_rarg0, _ref); +- } else if (_ref_addr.base() != c_rarg0) { +- assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0"); +- __ mv(c_rarg0, _ref); +- __ la(c_rarg1, _ref_addr); +- } else { +- assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0"); +- if (_ref_addr.base() == c_rarg0) { +- __ mv(t1, c_rarg1); +- __ la(c_rarg1, _ref_addr); +- __ mv(c_rarg0, t1); +- } else { +- ShouldNotReachHere(); +- } +- } +- } +- } +- +- ~ZSetupArguments() { +- // Transfer result +- if (_ref != x10) { +- __ mv(_ref, x10); +- } +- } +-}; +- +-#undef __ +-#define __ masm-> +- +-void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const { +- BLOCK_COMMENT("ZLoadBarrierStubC2"); +- +- // Stub entry +- __ bind(*stub->entry()); +- +- { +- ZSaveLiveRegisters save_live_registers(masm, stub); +- ZSetupArguments setup_arguments(masm, stub); +- int32_t offset = 0; +- __ la_patchable(t0, stub->slow_path(), offset); +- __ jalr(x1, t0, offset); +- } +- +- // Stub exit +- __ j(*stub->continuation()); +-} +- +-#undef __ +- +-#endif // COMPILER2 +- +-#ifdef COMPILER1 +-#undef __ +-#define __ ce->masm()-> +- +-void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, +- LIR_Opr ref) const { +- assert_different_registers(xthread, ref->as_register(), t1); +- __ ld(t1, address_bad_mask_from_thread(xthread)); +- __ andr(t1, t1, ref->as_register()); +-} +- +-void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce, +- ZLoadBarrierStubC1* stub) const { +- // Stub entry +- __ bind(*stub->entry()); +- +- Register ref = stub->ref()->as_register(); +- Register ref_addr = noreg; +- Register tmp = noreg; +- +- if (stub->tmp()->is_valid()) { +- // Load address into tmp register +- ce->leal(stub->ref_addr(), stub->tmp()); +- ref_addr = tmp = stub->tmp()->as_pointer_register(); +- } else { +- // Address already in register +- ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register(); +- } +- +- assert_different_registers(ref, ref_addr, noreg); +- +- // Save x10 unless it is the result or tmp register +- // Set up SP to accomodate parameters and maybe x10. +- if (ref != x10 && tmp != x10) { +- __ sub(sp, sp, 32); +- __ sd(x10, Address(sp, 16)); +- } else { +- __ sub(sp, sp, 16); +- } +- +- // Setup arguments and call runtime stub +- ce->store_parameter(ref_addr, 1); +- ce->store_parameter(ref, 0); +- +- __ far_call(stub->runtime_stub()); +- +- // Verify result +- __ verify_oop(x10, "Bad oop"); +- +- +- // Move result into place +- if (ref != x10) { +- __ mv(ref, x10); +- } +- +- // Restore x10 unless it is the result or tmp register +- if (ref != x10 && tmp != x10) { +- __ ld(x10, Address(sp, 16)); +- __ add(sp, sp, 32); +- } else { +- __ add(sp, sp, 16); +- } +- +- // Stub exit +- __ j(*stub->continuation()); +-} +- +-#undef __ +-#define __ sasm-> +- +-void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, +- DecoratorSet decorators) const { +- __ prologue("zgc_load_barrier stub", false); +- +- __ push_call_clobbered_registers_except(RegSet::of(x10)); +- +- // Setup arguments +- __ load_parameter(0, c_rarg0); +- __ load_parameter(1, c_rarg1); +- +- __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); +- +- __ pop_call_clobbered_registers_except(RegSet::of(x10)); +- +- __ epilogue(); +-} +- +-#undef __ +-#endif // COMPILER1 +diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp +deleted file mode 100644 +index dc07ab635fe..00000000000 +--- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp ++++ /dev/null +@@ -1,101 +0,0 @@ +-/* +- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#ifndef CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP +-#define CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP +- +-#include "code/vmreg.hpp" +-#include "oops/accessDecorators.hpp" +-#ifdef COMPILER2 +-#include "opto/optoreg.hpp" +-#endif // COMPILER2 +- +-#ifdef COMPILER1 +-class LIR_Assembler; +-class LIR_Opr; +-class StubAssembler; +-class ZLoadBarrierStubC1; +-#endif // COMPILER1 +- +-#ifdef COMPILER2 +-class Node; +-class ZLoadBarrierStubC2; +-#endif // COMPILER2 +- +-class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { +-public: +- virtual void load_at(MacroAssembler* masm, +- DecoratorSet decorators, +- BasicType type, +- Register dst, +- Address src, +- Register tmp1, +- Register tmp_thread); +- +-#ifdef ASSERT +- virtual void store_at(MacroAssembler* masm, +- DecoratorSet decorators, +- BasicType type, +- Address dst, +- Register val, +- Register tmp1, +- Register tmp2); +-#endif // ASSERT +- +- virtual void arraycopy_prologue(MacroAssembler* masm, +- DecoratorSet decorators, +- bool is_oop, +- Register src, +- Register dst, +- Register count, +- RegSet saved_regs); +- +- virtual void try_resolve_jobject_in_native(MacroAssembler* masm, +- Register jni_env, +- Register robj, +- Register tmp, +- Label& slowpath); +- +-#ifdef COMPILER1 +- void generate_c1_load_barrier_test(LIR_Assembler* ce, +- LIR_Opr ref) const; +- +- void generate_c1_load_barrier_stub(LIR_Assembler* ce, +- ZLoadBarrierStubC1* stub) const; +- +- void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, +- DecoratorSet decorators) const; +-#endif // COMPILER1 +- +-#ifdef COMPILER2 +- OptoReg::Name refine_register(const Node* node, +- OptoReg::Name opto_reg); +- +- void generate_c2_load_barrier_stub(MacroAssembler* masm, +- ZLoadBarrierStubC2* stub) const; +-#endif // COMPILER2 +-}; +- +-#endif // CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp +deleted file mode 100644 +index d14997790af..00000000000 +--- a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp ++++ /dev/null +@@ -1,212 +0,0 @@ +-/* +- * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "gc/shared/gcLogPrecious.hpp" +-#include "gc/shared/gc_globals.hpp" +-#include "gc/z/zGlobals.hpp" +-#include "runtime/globals.hpp" +-#include "runtime/os.hpp" +-#include "utilities/globalDefinitions.hpp" +-#include "utilities/powerOfTwo.hpp" +- +-#ifdef LINUX +-#include +-#endif // LINUX +- +-// +-// The heap can have three different layouts, depending on the max heap size. +-// +-// Address Space & Pointer Layout 1 +-// -------------------------------- +-// +-// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) +-// . . +-// . . +-// . . +-// +--------------------------------+ 0x0000014000000000 (20TB) +-// | Remapped View | +-// +--------------------------------+ 0x0000010000000000 (16TB) +-// . . +-// +--------------------------------+ 0x00000c0000000000 (12TB) +-// | Marked1 View | +-// +--------------------------------+ 0x0000080000000000 (8TB) +-// | Marked0 View | +-// +--------------------------------+ 0x0000040000000000 (4TB) +-// . . +-// +--------------------------------+ 0x0000000000000000 +-// +-// 6 4 4 4 4 +-// 3 6 5 2 1 0 +-// +--------------------+----+-----------------------------------------------+ +-// |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111| +-// +--------------------+----+-----------------------------------------------+ +-// | | | +-// | | * 41-0 Object Offset (42-bits, 4TB address space) +-// | | +-// | * 45-42 Metadata Bits (4-bits) 0001 = Marked0 (Address view 4-8TB) +-// | 0010 = Marked1 (Address view 8-12TB) +-// | 0100 = Remapped (Address view 16-20TB) +-// | 1000 = Finalizable (Address view N/A) +-// | +-// * 63-46 Fixed (18-bits, always zero) +-// +-// +-// Address Space & Pointer Layout 2 +-// -------------------------------- +-// +-// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) +-// . . +-// . . +-// . . +-// +--------------------------------+ 0x0000280000000000 (40TB) +-// | Remapped View | +-// +--------------------------------+ 0x0000200000000000 (32TB) +-// . . +-// +--------------------------------+ 0x0000180000000000 (24TB) +-// | Marked1 View | +-// +--------------------------------+ 0x0000100000000000 (16TB) +-// | Marked0 View | +-// +--------------------------------+ 0x0000080000000000 (8TB) +-// . . +-// +--------------------------------+ 0x0000000000000000 +-// +-// 6 4 4 4 4 +-// 3 7 6 3 2 0 +-// +------------------+-----+------------------------------------------------+ +-// |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111| +-// +-------------------+----+------------------------------------------------+ +-// | | | +-// | | * 42-0 Object Offset (43-bits, 8TB address space) +-// | | +-// | * 46-43 Metadata Bits (4-bits) 0001 = Marked0 (Address view 8-16TB) +-// | 0010 = Marked1 (Address view 16-24TB) +-// | 0100 = Remapped (Address view 32-40TB) +-// | 1000 = Finalizable (Address view N/A) +-// | +-// * 63-47 Fixed (17-bits, always zero) +-// +-// +-// Address Space & Pointer Layout 3 +-// -------------------------------- +-// +-// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) +-// . . +-// . . +-// . . +-// +--------------------------------+ 0x0000500000000000 (80TB) +-// | Remapped View | +-// +--------------------------------+ 0x0000400000000000 (64TB) +-// . . +-// +--------------------------------+ 0x0000300000000000 (48TB) +-// | Marked1 View | +-// +--------------------------------+ 0x0000200000000000 (32TB) +-// | Marked0 View | +-// +--------------------------------+ 0x0000100000000000 (16TB) +-// . . +-// +--------------------------------+ 0x0000000000000000 +-// +-// 6 4 4 4 4 +-// 3 8 7 4 3 0 +-// +------------------+----+-------------------------------------------------+ +-// |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111| +-// +------------------+----+-------------------------------------------------+ +-// | | | +-// | | * 43-0 Object Offset (44-bits, 16TB address space) +-// | | +-// | * 47-44 Metadata Bits (4-bits) 0001 = Marked0 (Address view 16-32TB) +-// | 0010 = Marked1 (Address view 32-48TB) +-// | 0100 = Remapped (Address view 64-80TB) +-// | 1000 = Finalizable (Address view N/A) +-// | +-// * 63-48 Fixed (16-bits, always zero) +-// +- +-// Default value if probing is not implemented for a certain platform: 128TB +-static const size_t DEFAULT_MAX_ADDRESS_BIT = 47; +-// Minimum value returned, if probing fails: 64GB +-static const size_t MINIMUM_MAX_ADDRESS_BIT = 36; +- +-static size_t probe_valid_max_address_bit() { +-#ifdef LINUX +- size_t max_address_bit = 0; +- const size_t page_size = os::vm_page_size(); +- for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) { +- const uintptr_t base_addr = ((uintptr_t) 1U) << i; +- if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) { +- // msync suceeded, the address is valid, and maybe even already mapped. +- max_address_bit = i; +- break; +- } +- if (errno != ENOMEM) { +- // Some error occured. This should never happen, but msync +- // has some undefined behavior, hence ignore this bit. +-#ifdef ASSERT +- fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); +-#else // ASSERT +- log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); +-#endif // ASSERT +- continue; +- } +- // Since msync failed with ENOMEM, the page might not be mapped. +- // Try to map it, to see if the address is valid. +- void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); +- if (result_addr != MAP_FAILED) { +- munmap(result_addr, page_size); +- } +- if ((uintptr_t) result_addr == base_addr) { +- // address is valid +- max_address_bit = i; +- break; +- } +- } +- if (max_address_bit == 0) { +- // probing failed, allocate a very high page and take that bit as the maximum +- const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT; +- void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); +- if (result_addr != MAP_FAILED) { +- max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1; +- munmap(result_addr, page_size); +- } +- } +- log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit); +- return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT); +-#else // LINUX +- return DEFAULT_MAX_ADDRESS_BIT; +-#endif // LINUX +-} +- +-size_t ZPlatformAddressOffsetBits() { +- const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1; +- const size_t max_address_offset_bits = valid_max_address_offset_bits - 3; +- const size_t min_address_offset_bits = max_address_offset_bits - 2; +- const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio); +- const size_t address_offset_bits = log2i_exact(address_offset); +- return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits); +-} +- +-size_t ZPlatformAddressMetadataShift() { +- return ZPlatformAddressOffsetBits(); +-} +diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp +deleted file mode 100644 +index f20ecd9b073..00000000000 +--- a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp ++++ /dev/null +@@ -1,36 +0,0 @@ +-/* +- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#ifndef CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP +-#define CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP +- +-const size_t ZPlatformGranuleSizeShift = 21; // 2MB +-const size_t ZPlatformHeapViews = 3; +-const size_t ZPlatformCacheLineSize = 64; +- +-size_t ZPlatformAddressOffsetBits(); +-size_t ZPlatformAddressMetadataShift(); +- +-#endif // CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad +deleted file mode 100644 +index 6b6f87814a5..00000000000 +--- a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad ++++ /dev/null +@@ -1,233 +0,0 @@ +-// +-// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. +-// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +-// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +-// +-// This code is free software; you can redistribute it and/or modify it +-// under the terms of the GNU General Public License version 2 only, as +-// published by the Free Software Foundation. +-// +-// This code is distributed in the hope that it will be useful, but WITHOUT +-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +-// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +-// version 2 for more details (a copy is included in the LICENSE file that +-// accompanied this code). +-// +-// You should have received a copy of the GNU General Public License version +-// 2 along with this work; if not, write to the Free Software Foundation, +-// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +-// +-// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +-// or visit www.oracle.com if you need additional information or have any +-// questions. +-// +- +-source_hpp %{ +- +-#include "gc/shared/gc_globals.hpp" +-#include "gc/z/c2/zBarrierSetC2.hpp" +-#include "gc/z/zThreadLocalData.hpp" +- +-%} +- +-source %{ +- +-static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) { +- if (barrier_data == ZLoadBarrierElided) { +- return; +- } +- ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data); +- __ ld(tmp, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); +- __ andr(tmp, tmp, ref); +- __ bnez(tmp, *stub->entry(), true /* far */); +- __ bind(*stub->continuation()); +-} +- +-static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { +- ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong); +- __ j(*stub->entry()); +- __ bind(*stub->continuation()); +-} +- +-%} +- +-// Load Pointer +-instruct zLoadP(iRegPNoSp dst, memory mem) +-%{ +- match(Set dst (LoadP mem)); +- predicate(UseZGC && (n->as_Load()->barrier_data() != 0)); +- effect(TEMP dst); +- +- ins_cost(4 * DEFAULT_COST); +- +- format %{ "ld $dst, $mem, #@zLoadP" %} +- +- ins_encode %{ +- const Address ref_addr (as_Register($mem$$base), $mem$$disp); +- __ ld($dst$$Register, ref_addr); +- z_load_barrier(_masm, this, ref_addr, $dst$$Register, t0 /* tmp */, barrier_data()); +- %} +- +- ins_pipe(iload_reg_mem); +-%} +- +-instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ +- match(Set res (CompareAndSwapP mem (Binary oldval newval))); +- match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); +- predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); +- effect(KILL cr, TEMP_DEF res); +- +- ins_cost(2 * VOLATILE_REF_COST); +- +- format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapP\n\t" +- "mv $res, $res == $oldval" %} +- +- ins_encode %{ +- Label failed; +- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, +- true /* result_as_bool */); +- __ beqz($res$$Register, failed); +- __ mv(t0, $oldval$$Register); +- __ bind(failed); +- if (barrier_data() != ZLoadBarrierElided) { +- Label good; +- __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); +- __ andr(t1, t1, t0); +- __ beqz(t1, good); +- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, +- true /* result_as_bool */); +- __ bind(good); +- } +- %} +- +- ins_pipe(pipe_slow); +-%} +- +-instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ +- match(Set res (CompareAndSwapP mem (Binary oldval newval))); +- match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); +- predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); +- effect(KILL cr, TEMP_DEF res); +- +- ins_cost(2 * VOLATILE_REF_COST); +- +- format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapPAcq\n\t" +- "mv $res, $res == $oldval" %} +- +- ins_encode %{ +- Label failed; +- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, +- true /* result_as_bool */); +- __ beqz($res$$Register, failed); +- __ mv(t0, $oldval$$Register); +- __ bind(failed); +- if (barrier_data() != ZLoadBarrierElided) { +- Label good; +- __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); +- __ andr(t1, t1, t0); +- __ beqz(t1, good); +- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, +- true /* result_as_bool */); +- __ bind(good); +- } +- %} +- +- ins_pipe(pipe_slow); +-%} +- +-instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ +- match(Set res (CompareAndExchangeP mem (Binary oldval newval))); +- predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); +- effect(TEMP_DEF res); +- +- ins_cost(2 * VOLATILE_REF_COST); +- +- format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangeP" %} +- +- ins_encode %{ +- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); +- if (barrier_data() != ZLoadBarrierElided) { +- Label good; +- __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); +- __ andr(t0, t0, $res$$Register); +- __ beqz(t0, good); +- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); +- __ bind(good); +- } +- %} +- +- ins_pipe(pipe_slow); +-%} +- +-instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ +- match(Set res (CompareAndExchangeP mem (Binary oldval newval))); +- predicate(UseZGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); +- effect(TEMP_DEF res); +- +- ins_cost(2 * VOLATILE_REF_COST); +- +- format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangePAcq" %} +- +- ins_encode %{ +- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); +- if (barrier_data() != ZLoadBarrierElided) { +- Label good; +- __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); +- __ andr(t0, t0, $res$$Register); +- __ beqz(t0, good); +- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); +- __ bind(good); +- } +- %} +- +- ins_pipe(pipe_slow); +-%} +- +-instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ +- match(Set prev (GetAndSetP mem newv)); +- predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); +- effect(TEMP_DEF prev, KILL cr); +- +- ins_cost(2 * VOLATILE_REF_COST); +- +- format %{ "atomic_xchg $prev, $newv, [$mem], #@zGetAndSetP" %} +- +- ins_encode %{ +- __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); +- z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); +- %} +- +- ins_pipe(pipe_serial); +-%} +- +-instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ +- match(Set prev (GetAndSetP mem newv)); +- predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0)); +- effect(TEMP_DEF prev, KILL cr); +- +- ins_cost(VOLATILE_REF_COST); +- +- format %{ "atomic_xchg_acq $prev, $newv, [$mem], #@zGetAndSetPAcq" %} +- +- ins_encode %{ +- __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); +- z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); +- %} +- ins_pipe(pipe_serial); +-%} +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 86710295444..9d2cc4cf89f 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1046,52 +1046,6 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { + return count; + } + +-#ifdef COMPILER2 +-int MacroAssembler::push_vp(unsigned int bitset, Register stack) { +- CompressibleRegion cr(this); +- int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); +- +- // Scan bitset to accumulate register pairs +- unsigned char regs[32]; +- int count = 0; +- for (int reg = 31; reg >= 0; reg--) { +- if ((1U << 31) & bitset) { +- regs[count++] = reg; +- } +- bitset <<= 1; +- } +- +- for (int i = 0; i < count; i++) { +- sub(stack, stack, vector_size_in_bytes); +- vs1r_v(as_VectorRegister(regs[i]), stack); +- } +- +- return count * vector_size_in_bytes / wordSize; +-} +- +-int MacroAssembler::pop_vp(unsigned int bitset, Register stack) { +- CompressibleRegion cr(this); +- int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); +- +- // Scan bitset to accumulate register pairs +- unsigned char regs[32]; +- int count = 0; +- for (int reg = 31; reg >= 0; reg--) { +- if ((1U << 31) & bitset) { +- regs[count++] = reg; +- } +- bitset <<= 1; +- } +- +- for (int i = count - 1; i >= 0; i--) { +- vl1r_v(as_VectorRegister(regs[i]), stack); +- add(stack, stack, vector_size_in_bytes); +- } +- +- return count * vector_size_in_bytes / wordSize; +-} +-#endif // COMPILER2 +- + void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { + CompressibleRegion cr(this); + // Push integer registers x7, x10-x17, x28-x31. +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 23e09475be1..b2f0455a1f1 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -484,12 +484,6 @@ class MacroAssembler: public Assembler { + void pop_reg(Register Rd); + int push_reg(unsigned int bitset, Register stack); + int pop_reg(unsigned int bitset, Register stack); +- void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } +- void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } +-#ifdef COMPILER2 +- void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); } +- void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); } +-#endif // COMPILER2 + + // Push and pop everything that might be clobbered by a native + // runtime call except t0 and t1. (They are always +@@ -783,9 +777,6 @@ class MacroAssembler: public Assembler { + int push_fp(unsigned int bitset, Register stack); + int pop_fp(unsigned int bitset, Register stack); + +- int push_vp(unsigned int bitset, Register stack); +- int pop_vp(unsigned int bitset, Register stack); +- + // vext + void vmnot_m(VectorRegister vd, VectorRegister vs); + void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); +diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +index b3fdd04db1b..b05edf7172c 100644 +--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +@@ -546,16 +546,6 @@ class StubGenerator: public StubCodeGenerator { + // make sure object is 'reasonable' + __ beqz(x10, exit); // if obj is NULL it is OK + +-#if INCLUDE_ZGC +- if (UseZGC) { +- // Check if mask is good. +- // verifies that ZAddressBadMask & x10 == 0 +- __ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); +- __ andr(c_rarg2, x10, c_rarg3); +- __ bnez(c_rarg2, error); +- } +-#endif +- + // Check if the oop is in the right area of memory + __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask()); + __ andr(c_rarg2, x10, c_rarg3); + +From 7772140df96747b42b13007d0827fc21d2a8b926 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Mon, 27 Mar 2023 15:43:39 +0800 +Subject: [PATCH 003/140] Drop the C2 Vector part + +--- + make/hotspot/gensrc/GensrcAdlc.gmk | 1 - + .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 325 --- + .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 52 - + src/hotspot/cpu/riscv/globals_riscv.hpp | 8 +- + .../cpu/riscv/macroAssembler_riscv.cpp | 22 +- + .../cpu/riscv/macroAssembler_riscv.hpp | 4 +- + src/hotspot/cpu/riscv/matcher_riscv.hpp | 44 +- + src/hotspot/cpu/riscv/register_riscv.cpp | 5 - + src/hotspot/cpu/riscv/register_riscv.hpp | 4 +- + src/hotspot/cpu/riscv/riscv.ad | 476 +--- + src/hotspot/cpu/riscv/riscv_v.ad | 2065 ----------------- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 61 +- + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 110 - + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 4 - + src/hotspot/cpu/riscv/vmreg_riscv.cpp | 10 +- + src/hotspot/cpu/riscv/vmreg_riscv.hpp | 17 +- + 16 files changed, 41 insertions(+), 3167 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/riscv_v.ad + +diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk +index 67f4c6f0574..51137b99db2 100644 +--- a/make/hotspot/gensrc/GensrcAdlc.gmk ++++ b/make/hotspot/gensrc/GensrcAdlc.gmk +@@ -152,7 +152,6 @@ ifeq ($(call check-jvm-feature, compiler2), true) + + ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv) + AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ +- $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \ + ))) + endif +diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +index 27770dc17aa..73f84a724ca 100644 +--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +@@ -1319,328 +1319,3 @@ void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRe + + bind(Done); + } +- +-void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, +- VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) { +- Label loop; +- Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16; +- +- bind(loop); +- vsetvli(tmp1, cnt, sew, Assembler::m2); +- vlex_v(vr1, a1, sew); +- vlex_v(vr2, a2, sew); +- vmsne_vv(vrs, vr1, vr2); +- vfirst_m(tmp2, vrs); +- bgez(tmp2, DONE); +- sub(cnt, cnt, tmp1); +- if (!islatin) { +- slli(tmp1, tmp1, 1); // get byte counts +- } +- add(a1, a1, tmp1); +- add(a2, a2, tmp1); +- bnez(cnt, loop); +- +- mv(result, true); +-} +- +-void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) { +- Label DONE; +- Register tmp1 = t0; +- Register tmp2 = t1; +- +- BLOCK_COMMENT("string_equals_v {"); +- +- mv(result, false); +- +- if (elem_size == 2) { +- srli(cnt, cnt, 1); +- } +- +- element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); +- +- bind(DONE); +- BLOCK_COMMENT("} string_equals_v"); +-} +- +-// used by C2 ClearArray patterns. +-// base: Address of a buffer to be zeroed +-// cnt: Count in HeapWords +-// +-// base, cnt, v0, v1 and t0 are clobbered. +-void C2_MacroAssembler::clear_array_v(Register base, Register cnt) { +- Label loop; +- +- // making zero words +- vsetvli(t0, cnt, Assembler::e64, Assembler::m4); +- vxor_vv(v0, v0, v0); +- +- bind(loop); +- vsetvli(t0, cnt, Assembler::e64, Assembler::m4); +- vse64_v(v0, base); +- sub(cnt, cnt, t0); +- shadd(base, t0, base, t0, 3); +- bnez(cnt, loop); +-} +- +-void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result, +- Register cnt1, int elem_size) { +- Label DONE; +- Register tmp1 = t0; +- Register tmp2 = t1; +- Register cnt2 = tmp2; +- int length_offset = arrayOopDesc::length_offset_in_bytes(); +- int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); +- +- BLOCK_COMMENT("arrays_equals_v {"); +- +- // if (a1 == a2), return true +- mv(result, true); +- beq(a1, a2, DONE); +- +- mv(result, false); +- // if a1 == null or a2 == null, return false +- beqz(a1, DONE); +- beqz(a2, DONE); +- // if (a1.length != a2.length), return false +- lwu(cnt1, Address(a1, length_offset)); +- lwu(cnt2, Address(a2, length_offset)); +- bne(cnt1, cnt2, DONE); +- +- la(a1, Address(a1, base_offset)); +- la(a2, Address(a2, base_offset)); +- +- element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); +- +- bind(DONE); +- +- BLOCK_COMMENT("} arrays_equals_v"); +-} +- +-void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, +- Register result, Register tmp1, Register tmp2, int encForm) { +- Label DIFFERENCE, DONE, L, loop; +- bool encLL = encForm == StrIntrinsicNode::LL; +- bool encLU = encForm == StrIntrinsicNode::LU; +- bool encUL = encForm == StrIntrinsicNode::UL; +- +- bool str1_isL = encLL || encLU; +- bool str2_isL = encLL || encUL; +- +- int minCharsInWord = encLL ? wordSize : wordSize / 2; +- +- BLOCK_COMMENT("string_compare {"); +- +- // for Lating strings, 1 byte for 1 character +- // for UTF16 strings, 2 bytes for 1 character +- if (!str1_isL) +- sraiw(cnt1, cnt1, 1); +- if (!str2_isL) +- sraiw(cnt2, cnt2, 1); +- +- // if str1 == str2, return the difference +- // save the minimum of the string lengths in cnt2. +- sub(result, cnt1, cnt2); +- bgt(cnt1, cnt2, L); +- mv(cnt2, cnt1); +- bind(L); +- +- if (str1_isL == str2_isL) { // LL or UU +- element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE); +- j(DONE); +- } else { // LU or UL +- Register strL = encLU ? str1 : str2; +- Register strU = encLU ? str2 : str1; +- VectorRegister vstr1 = encLU ? v4 : v0; +- VectorRegister vstr2 = encLU ? v0 : v4; +- +- bind(loop); +- vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2); +- vle8_v(vstr1, strL); +- vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4); +- vzext_vf2(vstr2, vstr1); +- vle16_v(vstr1, strU); +- vmsne_vv(v0, vstr2, vstr1); +- vfirst_m(tmp2, v0); +- bgez(tmp2, DIFFERENCE); +- sub(cnt2, cnt2, tmp1); +- add(strL, strL, tmp1); +- shadd(strU, tmp1, strU, tmp1, 1); +- bnez(cnt2, loop); +- j(DONE); +- } +- bind(DIFFERENCE); +- slli(tmp1, tmp2, 1); +- add(str1, str1, str1_isL ? tmp2 : tmp1); +- add(str2, str2, str2_isL ? tmp2 : tmp1); +- str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0)); +- str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0)); +- sub(result, tmp1, tmp2); +- +- bind(DONE); +-} +- +-void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) { +- Label loop; +- assert_different_registers(src, dst, len, tmp, t0); +- +- BLOCK_COMMENT("byte_array_inflate_v {"); +- bind(loop); +- vsetvli(tmp, len, Assembler::e8, Assembler::m2); +- vle8_v(v2, src); +- vsetvli(t0, len, Assembler::e16, Assembler::m4); +- vzext_vf2(v0, v2); +- vse16_v(v0, dst); +- sub(len, len, tmp); +- add(src, src, tmp); +- shadd(dst, tmp, dst, tmp, 1); +- bnez(len, loop); +- BLOCK_COMMENT("} byte_array_inflate_v"); +-} +- +-// Compress char[] array to byte[]. +-// result: the array length if every element in array can be encoded; 0, otherwise. +-void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) { +- Label done; +- encode_iso_array_v(src, dst, len, result, tmp); +- beqz(len, done); +- mv(result, zr); +- bind(done); +-} +- +-// result: the number of elements had been encoded. +-void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) { +- Label loop, DIFFERENCE, DONE; +- +- BLOCK_COMMENT("encode_iso_array_v {"); +- mv(result, 0); +- +- bind(loop); +- mv(tmp, 0xff); +- vsetvli(t0, len, Assembler::e16, Assembler::m2); +- vle16_v(v2, src); +- // if element > 0xff, stop +- vmsgtu_vx(v1, v2, tmp); +- vfirst_m(tmp, v1); +- vmsbf_m(v0, v1); +- // compress char to byte +- vsetvli(t0, len, Assembler::e8); +- vncvt_x_x_w(v1, v2, Assembler::v0_t); +- vse8_v(v1, dst, Assembler::v0_t); +- +- bgez(tmp, DIFFERENCE); +- add(result, result, t0); +- add(dst, dst, t0); +- sub(len, len, t0); +- shadd(src, t0, src, t0, 1); +- bnez(len, loop); +- j(DONE); +- +- bind(DIFFERENCE); +- add(result, result, tmp); +- +- bind(DONE); +- BLOCK_COMMENT("} encode_iso_array_v"); +-} +- +-void C2_MacroAssembler::count_positives_v(Register ary, Register len, Register result, Register tmp) { +- Label LOOP, SET_RESULT, DONE; +- +- BLOCK_COMMENT("count_positives_v {"); +- mv(result, zr); +- +- bind(LOOP); +- vsetvli(t0, len, Assembler::e8, Assembler::m4); +- vle8_v(v0, ary); +- vmslt_vx(v0, v0, zr); +- vfirst_m(tmp, v0); +- bgez(tmp, SET_RESULT); +- // if tmp == -1, all bytes are positive +- add(result, result, t0); +- +- sub(len, len, t0); +- add(ary, ary, t0); +- bnez(len, LOOP); +- j(DONE); +- +- // add remaining positive bytes count +- bind(SET_RESULT); +- add(result, result, tmp); +- +- bind(DONE); +- BLOCK_COMMENT("} count_positives_v"); +-} +- +-void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1, +- Register ch, Register result, +- Register tmp1, Register tmp2, +- bool isL) { +- mv(result, zr); +- +- Label loop, MATCH, DONE; +- Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16; +- bind(loop); +- vsetvli(tmp1, cnt1, sew, Assembler::m4); +- vlex_v(v0, str1, sew); +- vmseq_vx(v0, v0, ch); +- vfirst_m(tmp2, v0); +- bgez(tmp2, MATCH); // if equal, return index +- +- add(result, result, tmp1); +- sub(cnt1, cnt1, tmp1); +- if (!isL) slli(tmp1, tmp1, 1); +- add(str1, str1, tmp1); +- bnez(cnt1, loop); +- +- mv(result, -1); +- j(DONE); +- +- bind(MATCH); +- add(result, result, tmp2); +- +- bind(DONE); +-} +- +-// Set dst to NaN if any NaN input. +-void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, +- bool is_double, bool is_min) { +- assert_different_registers(dst, src1, src2); +- +- vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); +- +- is_min ? vfmin_vv(dst, src1, src2) +- : vfmax_vv(dst, src1, src2); +- +- vmfne_vv(v0, src1, src1); +- vfadd_vv(dst, src1, src1, Assembler::v0_t); +- vmfne_vv(v0, src2, src2); +- vfadd_vv(dst, src2, src2, Assembler::v0_t); +-} +- +-// Set dst to NaN if any NaN input. +-void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst, +- FloatRegister src1, VectorRegister src2, +- VectorRegister tmp1, VectorRegister tmp2, +- bool is_double, bool is_min) { +- assert_different_registers(src2, tmp1, tmp2); +- +- Label L_done, L_NaN; +- vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); +- vfmv_s_f(tmp2, src1); +- +- is_min ? vfredmin_vs(tmp1, src2, tmp2) +- : vfredmax_vs(tmp1, src2, tmp2); +- +- fsflags(zr); +- // Checking NaNs +- vmflt_vf(tmp2, src2, src1); +- frflags(t0); +- bnez(t0, L_NaN); +- j(L_done); +- +- bind(L_NaN); +- vfmv_s_f(tmp2, src1); +- vfredsum_vs(tmp1, src2, tmp2); +- +- bind(L_done); +- vfmv_f_s(dst, tmp1); +-} +diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +index c71df4c101b..90b6554af02 100644 +--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +@@ -28,13 +28,6 @@ + + // C2_MacroAssembler contains high-level macros for C2 + +- private: +- void element_compare(Register r1, Register r2, +- Register result, Register cnt, +- Register tmp1, Register tmp2, +- VectorRegister vr1, VectorRegister vr2, +- VectorRegister vrs, +- bool is_latin, Label& DONE); + public: + + void string_compare(Register str1, Register str2, +@@ -145,49 +138,4 @@ + FloatRegister src1, FloatRegister src2, + bool is_double, bool is_min); + +- // intrinsic methods implemented by rvv instructions +- void string_equals_v(Register r1, Register r2, +- Register result, Register cnt1, +- int elem_size); +- +- void arrays_equals_v(Register r1, Register r2, +- Register result, Register cnt1, +- int elem_size); +- +- void string_compare_v(Register str1, Register str2, +- Register cnt1, Register cnt2, +- Register result, +- Register tmp1, Register tmp2, +- int encForm); +- +- void clear_array_v(Register base, Register cnt); +- +- void byte_array_inflate_v(Register src, Register dst, +- Register len, Register tmp); +- +- void char_array_compress_v(Register src, Register dst, +- Register len, Register result, +- Register tmp); +- +- void encode_iso_array_v(Register src, Register dst, +- Register len, Register result, +- Register tmp); +- +- void count_positives_v(Register ary, Register len, +- Register result, Register tmp); +- +- void string_indexof_char_v(Register str1, Register cnt1, +- Register ch, Register result, +- Register tmp1, Register tmp2, +- bool isL); +- +- void minmax_FD_v(VectorRegister dst, +- VectorRegister src1, VectorRegister src2, +- bool is_double, bool is_min); +- +- void reduce_minmax_FD_v(FloatRegister dst, +- FloatRegister src1, VectorRegister src2, +- VectorRegister tmp1, VectorRegister tmp2, +- bool is_double, bool is_min); +- + #endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index cbfc0583883..845064d6cbc 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -90,10 +90,8 @@ define_pd_global(intx, InlineSmallCode, 1000); + "Extend fence.i to fence.i + fence.") \ + product(bool, AvoidUnalignedAccesses, true, \ + "Avoid generating unaligned memory accesses") \ +- product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions") \ +- product(bool, UseRVB, false, EXPERIMENTAL, "Use RVB instructions") \ +- product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions") \ +- product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \ +- "Use RVV instructions for left/right shift of BigInteger") ++ experimental(bool, UseRVV, false, "Use RVV instructions") \ ++ experimental(bool, UseRVB, false, "Use RVB instructions") \ ++ experimental(bool, UseRVC, false, "Use RVC instructions") + + #endif // CPU_RISCV_GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 9d2cc4cf89f..8b8d126f6c9 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1086,7 +1086,7 @@ void MacroAssembler::popa() { + pop_reg(0xffffffe2, sp); + } + +-void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { ++void MacroAssembler::push_CPU_state() { + CompressibleRegion cr(this); + // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) + push_reg(0xffffffe0, sp); +@@ -1096,28 +1096,10 @@ void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) + for (int i = 0; i < 32; i++) { + fsd(as_FloatRegister(i), Address(sp, i * wordSize)); + } +- +- // vector registers +- if (save_vectors) { +- sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers); +- vsetvli(t0, x0, Assembler::e64, Assembler::m8); +- for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { +- add(t0, sp, vector_size_in_bytes * i); +- vse64_v(as_VectorRegister(i), t0); +- } +- } + } + +-void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { ++void MacroAssembler::pop_CPU_state() { + CompressibleRegion cr(this); +- // vector registers +- if (restore_vectors) { +- vsetvli(t0, x0, Assembler::e64, Assembler::m8); +- for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { +- vle64_v(as_VectorRegister(i), sp); +- add(sp, sp, vector_size_in_bytes * 8); +- } +- } + + // float registers + for (int i = 0; i < 32; i++) { +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index b2f0455a1f1..b43131514c1 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -501,8 +501,8 @@ class MacroAssembler: public Assembler { + + void pusha(); + void popa(); +- void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0); +- void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0); ++ void push_CPU_state(); ++ void pop_CPU_state(); + + // if heap base register is used - reinit it with the correct value + void reinit_heapbase(); +diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp +index 23a75d20502..4c7fabd7240 100644 +--- a/src/hotspot/cpu/riscv/matcher_riscv.hpp ++++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp +@@ -31,16 +31,9 @@ + // false => size gets scaled to BytesPerLong, ok. + static const bool init_array_count_is_in_bytes = false; + +- // Whether this platform implements the scalable vector feature +- static const bool implements_scalable_vector = true; +- +- static const bool supports_scalable_vector() { +- return UseRVV; +- } +- +- // riscv supports misaligned vectors store/load. ++ // riscv doesn't support misaligned vectors store/load on JDK11. + static constexpr bool misaligned_vectors_ok() { +- return true; ++ return false; + } + + // Whether code generation need accurate ConvI2L types. +@@ -53,9 +46,6 @@ + // the cpu only look at the lower 5/6 bits anyway? + static const bool need_masked_shift_count = false; + +- // No support for generic vector operands. +- static const bool supports_generic_vector_operands = false; +- + static constexpr bool isSimpleConstant64(jlong value) { + // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. + // Probably always true, even if a temp register is required. +@@ -127,31 +117,6 @@ + // the relevant 32 bits. + static const bool int_in_long = true; + +- // Does the CPU supports vector variable shift instructions? +- static constexpr bool supports_vector_variable_shifts(void) { +- return false; +- } +- +- // Does the CPU supports vector variable rotate instructions? +- static constexpr bool supports_vector_variable_rotates(void) { +- return false; +- } +- +- // Does the CPU supports vector constant rotate instructions? +- static constexpr bool supports_vector_constant_rotates(int shift) { +- return false; +- } +- +- // Does the CPU supports vector unsigned comparison instructions? +- static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { +- return false; +- } +- +- // Some microarchitectures have mask registers used on vectors +- static const bool has_predicated_vectors(void) { +- return false; +- } +- + // true means we have fast l2f convers + // false means that conversion is done by runtime call + static constexpr bool convL2FSupported(void) { +@@ -161,9 +126,4 @@ + // Implements a variant of EncodeISOArrayNode that encode ASCII only + static const bool supports_encode_ascii_array = false; + +- // Returns pre-selection estimated size of a vector operation. +- static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) { +- return 0; +- } +- + #endif // CPU_RISCV_MATCHER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp +index f8116e9df8c..96cf1996a83 100644 +--- a/src/hotspot/cpu/riscv/register_riscv.cpp ++++ b/src/hotspot/cpu/riscv/register_riscv.cpp +@@ -37,11 +37,6 @@ const int ConcreteRegisterImpl::max_fpr = + ConcreteRegisterImpl::max_gpr + + FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; + +-const int ConcreteRegisterImpl::max_vpr = +- ConcreteRegisterImpl::max_fpr + +- VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register; +- +- + const char* RegisterImpl::name() const { + static const char *const names[number_of_registers] = { + "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9", +diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp +index a9200cac647..d697751f55f 100644 +--- a/src/hotspot/cpu/riscv/register_riscv.hpp ++++ b/src/hotspot/cpu/riscv/register_riscv.hpp +@@ -307,14 +307,12 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl { + // it's optoregs. + + number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + +- FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers + +- VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers) ++ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers) + }; + + // added to make it compile + static const int max_gpr; + static const int max_fpr; +- static const int max_vpr; + }; + + typedef AbstractRegSet RegSet; +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 588887e1d96..85593a942e9 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -226,177 +226,6 @@ reg_def F30_H ( SOC, SOC, Op_RegF, 30, f30->as_VMReg()->next() ); + reg_def F31 ( SOC, SOC, Op_RegF, 31, f31->as_VMReg() ); + reg_def F31_H ( SOC, SOC, Op_RegF, 31, f31->as_VMReg()->next() ); + +-// ---------------------------- +-// Vector Registers +-// ---------------------------- +- +-// For RVV vector registers, we simply extend vector register size to 4 +-// 'logical' slots. This is nominally 128 bits but it actually covers +-// all possible 'physical' RVV vector register lengths from 128 ~ 1024 +-// bits. The 'physical' RVV vector register length is detected during +-// startup, so the register allocator is able to identify the correct +-// number of bytes needed for an RVV spill/unspill. +- +-reg_def V0 ( SOC, SOC, Op_VecA, 0, v0->as_VMReg() ); +-reg_def V0_H ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next() ); +-reg_def V0_J ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(2) ); +-reg_def V0_K ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(3) ); +- +-reg_def V1 ( SOC, SOC, Op_VecA, 1, v1->as_VMReg() ); +-reg_def V1_H ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next() ); +-reg_def V1_J ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(2) ); +-reg_def V1_K ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(3) ); +- +-reg_def V2 ( SOC, SOC, Op_VecA, 2, v2->as_VMReg() ); +-reg_def V2_H ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next() ); +-reg_def V2_J ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(2) ); +-reg_def V2_K ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(3) ); +- +-reg_def V3 ( SOC, SOC, Op_VecA, 3, v3->as_VMReg() ); +-reg_def V3_H ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next() ); +-reg_def V3_J ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(2) ); +-reg_def V3_K ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(3) ); +- +-reg_def V4 ( SOC, SOC, Op_VecA, 4, v4->as_VMReg() ); +-reg_def V4_H ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next() ); +-reg_def V4_J ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(2) ); +-reg_def V4_K ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(3) ); +- +-reg_def V5 ( SOC, SOC, Op_VecA, 5, v5->as_VMReg() ); +-reg_def V5_H ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next() ); +-reg_def V5_J ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(2) ); +-reg_def V5_K ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(3) ); +- +-reg_def V6 ( SOC, SOC, Op_VecA, 6, v6->as_VMReg() ); +-reg_def V6_H ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next() ); +-reg_def V6_J ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(2) ); +-reg_def V6_K ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(3) ); +- +-reg_def V7 ( SOC, SOC, Op_VecA, 7, v7->as_VMReg() ); +-reg_def V7_H ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next() ); +-reg_def V7_J ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(2) ); +-reg_def V7_K ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(3) ); +- +-reg_def V8 ( SOC, SOC, Op_VecA, 8, v8->as_VMReg() ); +-reg_def V8_H ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next() ); +-reg_def V8_J ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(2) ); +-reg_def V8_K ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(3) ); +- +-reg_def V9 ( SOC, SOC, Op_VecA, 9, v9->as_VMReg() ); +-reg_def V9_H ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next() ); +-reg_def V9_J ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(2) ); +-reg_def V9_K ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(3) ); +- +-reg_def V10 ( SOC, SOC, Op_VecA, 10, v10->as_VMReg() ); +-reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next() ); +-reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) ); +-reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) ); +- +-reg_def V11 ( SOC, SOC, Op_VecA, 11, v11->as_VMReg() ); +-reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next() ); +-reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) ); +-reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) ); +- +-reg_def V12 ( SOC, SOC, Op_VecA, 12, v12->as_VMReg() ); +-reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next() ); +-reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) ); +-reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) ); +- +-reg_def V13 ( SOC, SOC, Op_VecA, 13, v13->as_VMReg() ); +-reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next() ); +-reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) ); +-reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) ); +- +-reg_def V14 ( SOC, SOC, Op_VecA, 14, v14->as_VMReg() ); +-reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next() ); +-reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) ); +-reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) ); +- +-reg_def V15 ( SOC, SOC, Op_VecA, 15, v15->as_VMReg() ); +-reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next() ); +-reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) ); +-reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) ); +- +-reg_def V16 ( SOC, SOC, Op_VecA, 16, v16->as_VMReg() ); +-reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next() ); +-reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) ); +-reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) ); +- +-reg_def V17 ( SOC, SOC, Op_VecA, 17, v17->as_VMReg() ); +-reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next() ); +-reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) ); +-reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) ); +- +-reg_def V18 ( SOC, SOC, Op_VecA, 18, v18->as_VMReg() ); +-reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next() ); +-reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) ); +-reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) ); +- +-reg_def V19 ( SOC, SOC, Op_VecA, 19, v19->as_VMReg() ); +-reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next() ); +-reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) ); +-reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) ); +- +-reg_def V20 ( SOC, SOC, Op_VecA, 20, v20->as_VMReg() ); +-reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next() ); +-reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) ); +-reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) ); +- +-reg_def V21 ( SOC, SOC, Op_VecA, 21, v21->as_VMReg() ); +-reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next() ); +-reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) ); +-reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) ); +- +-reg_def V22 ( SOC, SOC, Op_VecA, 22, v22->as_VMReg() ); +-reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next() ); +-reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) ); +-reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) ); +- +-reg_def V23 ( SOC, SOC, Op_VecA, 23, v23->as_VMReg() ); +-reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next() ); +-reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) ); +-reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) ); +- +-reg_def V24 ( SOC, SOC, Op_VecA, 24, v24->as_VMReg() ); +-reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next() ); +-reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) ); +-reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) ); +- +-reg_def V25 ( SOC, SOC, Op_VecA, 25, v25->as_VMReg() ); +-reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next() ); +-reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) ); +-reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) ); +- +-reg_def V26 ( SOC, SOC, Op_VecA, 26, v26->as_VMReg() ); +-reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next() ); +-reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) ); +-reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) ); +- +-reg_def V27 ( SOC, SOC, Op_VecA, 27, v27->as_VMReg() ); +-reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next() ); +-reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) ); +-reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) ); +- +-reg_def V28 ( SOC, SOC, Op_VecA, 28, v28->as_VMReg() ); +-reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next() ); +-reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) ); +-reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) ); +- +-reg_def V29 ( SOC, SOC, Op_VecA, 29, v29->as_VMReg() ); +-reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next() ); +-reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) ); +-reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) ); +- +-reg_def V30 ( SOC, SOC, Op_VecA, 30, v30->as_VMReg() ); +-reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next() ); +-reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) ); +-reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) ); +- +-reg_def V31 ( SOC, SOC, Op_VecA, 31, v31->as_VMReg() ); +-reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next() ); +-reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) ); +-reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) ); +- + // ---------------------------- + // Special Registers + // ---------------------------- +@@ -495,42 +324,7 @@ alloc_class chunk1( + F27, F27_H, + ); + +-alloc_class chunk2( +- V0, V0_H, V0_J, V0_K, +- V1, V1_H, V1_J, V1_K, +- V2, V2_H, V2_J, V2_K, +- V3, V3_H, V3_J, V3_K, +- V4, V4_H, V4_J, V4_K, +- V5, V5_H, V5_J, V5_K, +- V6, V6_H, V6_J, V6_K, +- V7, V7_H, V7_J, V7_K, +- V8, V8_H, V8_J, V8_K, +- V9, V9_H, V9_J, V9_K, +- V10, V10_H, V10_J, V10_K, +- V11, V11_H, V11_J, V11_K, +- V12, V12_H, V12_J, V12_K, +- V13, V13_H, V13_J, V13_K, +- V14, V14_H, V14_J, V14_K, +- V15, V15_H, V15_J, V15_K, +- V16, V16_H, V16_J, V16_K, +- V17, V17_H, V17_J, V17_K, +- V18, V18_H, V18_J, V18_K, +- V19, V19_H, V19_J, V19_K, +- V20, V20_H, V20_J, V20_K, +- V21, V21_H, V21_J, V21_K, +- V22, V22_H, V22_J, V22_K, +- V23, V23_H, V23_J, V23_K, +- V24, V24_H, V24_J, V24_K, +- V25, V25_H, V25_J, V25_K, +- V26, V26_H, V26_J, V26_K, +- V27, V27_H, V27_J, V27_K, +- V28, V28_H, V28_J, V28_K, +- V29, V29_H, V29_J, V29_K, +- V30, V30_H, V30_J, V30_K, +- V31, V31_H, V31_J, V31_K, +-); +- +-alloc_class chunk3(RFLAGS); ++alloc_class chunk2(RFLAGS); + + //----------Architecture Description Register Classes-------------------------- + // Several register classes are automatically defined based upon information in +@@ -826,41 +620,6 @@ reg_class double_reg( + F31, F31_H + ); + +-// Class for all RVV vector registers +-reg_class vectora_reg( +- V1, V1_H, V1_J, V1_K, +- V2, V2_H, V2_J, V2_K, +- V3, V3_H, V3_J, V3_K, +- V4, V4_H, V4_J, V4_K, +- V5, V5_H, V5_J, V5_K, +- V6, V6_H, V6_J, V6_K, +- V7, V7_H, V7_J, V7_K, +- V8, V8_H, V8_J, V8_K, +- V9, V9_H, V9_J, V9_K, +- V10, V10_H, V10_J, V10_K, +- V11, V11_H, V11_J, V11_K, +- V12, V12_H, V12_J, V12_K, +- V13, V13_H, V13_J, V13_K, +- V14, V14_H, V14_J, V14_K, +- V15, V15_H, V15_J, V15_K, +- V16, V16_H, V16_J, V16_K, +- V17, V17_H, V17_J, V17_K, +- V18, V18_H, V18_J, V18_K, +- V19, V19_H, V19_J, V19_K, +- V20, V20_H, V20_J, V20_K, +- V21, V21_H, V21_J, V21_K, +- V22, V22_H, V22_J, V22_K, +- V23, V23_H, V23_J, V23_K, +- V24, V24_H, V24_J, V24_K, +- V25, V25_H, V25_J, V25_K, +- V26, V26_H, V26_J, V26_K, +- V27, V27_H, V27_J, V27_K, +- V28, V28_H, V28_J, V28_K, +- V29, V29_H, V29_J, V29_K, +- V30, V30_H, V30_J, V30_K, +- V31, V31_H, V31_J, V31_K +-); +- + // Class for 64 bit register f0 + reg_class f0_reg( + F0, F0_H +@@ -881,31 +640,6 @@ reg_class f3_reg( + F3, F3_H + ); + +-// class for vector register v1 +-reg_class v1_reg( +- V1, V1_H, V1_J, V1_K +-); +- +-// class for vector register v2 +-reg_class v2_reg( +- V2, V2_H, V2_J, V2_K +-); +- +-// class for vector register v3 +-reg_class v3_reg( +- V3, V3_H, V3_J, V3_K +-); +- +-// class for vector register v4 +-reg_class v4_reg( +- V4, V4_H, V4_J, V4_K +-); +- +-// class for vector register v5 +-reg_class v5_reg( +- V5, V5_H, V5_J, V5_K +-); +- + // class for condition codes + reg_class reg_flags(RFLAGS); + %} +@@ -1447,7 +1181,7 @@ const Pipeline * MachEpilogNode::pipeline() const { + + // Figure out which register class each belongs in: rc_int, rc_float or + // rc_stack. +-enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack }; ++enum RC { rc_bad, rc_int, rc_float, rc_stack }; + + static enum RC rc_class(OptoReg::Name reg) { + +@@ -1468,13 +1202,7 @@ static enum RC rc_class(OptoReg::Name reg) { + return rc_float; + } + +- // we have 32 vector register * 4 halves +- int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers; +- if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) { +- return rc_vector; +- } +- +- // Between vector regs & stack is the flags regs. ++ // Between float regs & stack is the flags regs. + assert(OptoReg::is_stack(reg), "blow up if spilling flags"); + + return rc_stack; +@@ -1512,30 +1240,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo + int src_offset = ra_->reg2offset(src_lo); + int dst_offset = ra_->reg2offset(dst_lo); + +- if (bottom_type()->isa_vect() != NULL) { +- uint ireg = ideal_reg(); +- if (ireg == Op_VecA && cbuf) { +- C2_MacroAssembler _masm(cbuf); +- Assembler::CompressibleRegion cr(&_masm); +- int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); +- if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { +- // stack to stack +- __ spill_copy_vector_stack_to_stack(src_offset, dst_offset, +- vector_reg_size_in_bytes); +- } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) { +- // vpr to stack +- __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo)); +- } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) { +- // stack to vpr +- __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo)); +- } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) { +- // vpr to vpr +- __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo])); +- } else { +- ShouldNotReachHere(); +- } +- } +- } else if (cbuf != NULL) { ++ if (cbuf != NULL) { + C2_MacroAssembler _masm(cbuf); + Assembler::CompressibleRegion cr(&_masm); + switch (src_lo_rc) { +@@ -1619,17 +1324,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo + } else { + st->print("%s", Matcher::regName[dst_lo]); + } +- if (bottom_type()->isa_vect() != NULL) { +- int vsize = 0; +- if (ideal_reg() == Op_VecA) { +- vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8; +- } else { +- ShouldNotReachHere(); +- } +- st->print("\t# vector spill size = %d", vsize); +- } else { +- st->print("\t# spill size = %d", is64 ? 64 : 32); +- } ++ st->print("\t# spill size = %d", is64 ? 64 : 32); + } + + return 0; +@@ -1796,14 +1491,6 @@ const bool Matcher::match_rule_supported(int opcode) { + } + break; + +- case Op_StrCompressedCopy: // fall through +- case Op_StrInflatedCopy: // fall through +- case Op_CountPositives: +- return UseRVV; +- +- case Op_EncodeISOArray: +- return UseRVV && SpecialEncodeISOArray; +- + case Op_PopCountI: + case Op_PopCountL: + return UsePopCountInstruction; +@@ -1821,37 +1508,15 @@ const bool Matcher::match_rule_supported(int opcode) { + } + + // Identify extra cases that we might want to provide match rules for vector nodes and +-// other intrinsics guarded with vector length (vlen) and element type (bt). +-const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { +- if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { +- return false; +- } +- +- return op_vec_supported(opcode); +-} +- +-const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { ++// other intrinsics guarded with vector length (vlen). ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + return false; + } + +-const RegMask* Matcher::predicate_reg_mask(void) { +- return NULL; +-} +- +-const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { +- return NULL; +-} +- +-// Vector calling convention not yet implemented. +-const bool Matcher::supports_vector_calling_convention(void) { ++const bool Matcher::has_predicated_vectors(void) { + return false; + } + +-OptoRegPair Matcher::vector_return_value(uint ideal_reg) { +- Unimplemented(); +- return OptoRegPair(0, 0); +-} +- + // Is this branch offset short enough that a short branch can be used? + // + // NOTE: If the platform does not provide any short branch variants, then +@@ -1877,11 +1542,6 @@ bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + + // Vector width in bytes. + const int Matcher::vector_width_in_bytes(BasicType bt) { +- if (UseRVV) { +- // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV. +- // MaxVectorSize == VM_Version::_initial_vector_length +- return MaxVectorSize; +- } + return 0; + } + +@@ -1895,34 +1555,10 @@ const int Matcher::min_vector_size(const BasicType bt) { + + // Vector ideal reg. + const uint Matcher::vector_ideal_reg(int len) { +- assert(MaxVectorSize >= len, ""); +- if (UseRVV) { +- return Op_VecA; +- } +- + ShouldNotReachHere(); + return 0; + } + +-const int Matcher::scalable_vector_reg_size(const BasicType bt) { +- return Matcher::max_vector_size(bt); +-} +- +-MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) { +- ShouldNotReachHere(); // generic vector operands not supported +- return NULL; +-} +- +-bool Matcher::is_reg2reg_move(MachNode* m) { +- ShouldNotReachHere(); // generic vector operands not supported +- return false; +-} +- +-bool Matcher::is_generic_vector(MachOper* opnd) { +- ShouldNotReachHere(); // generic vector operands not supported +- return false; +-} +- + // Return whether or not this register is ever used as an argument. + // This function is used on startup to build the trampoline stubs in + // generateOptoStub. Registers not mentioned will be killed by the VM +@@ -3384,67 +3020,6 @@ operand fRegD() + interface(REG_INTER); + %} + +-// Generic vector class. This will be used for +-// all vector operands. +-operand vReg() +-%{ +- constraint(ALLOC_IN_RC(vectora_reg)); +- match(VecA); +- op_cost(0); +- format %{ %} +- interface(REG_INTER); +-%} +- +-operand vReg_V1() +-%{ +- constraint(ALLOC_IN_RC(v1_reg)); +- match(VecA); +- match(vReg); +- op_cost(0); +- format %{ %} +- interface(REG_INTER); +-%} +- +-operand vReg_V2() +-%{ +- constraint(ALLOC_IN_RC(v2_reg)); +- match(VecA); +- match(vReg); +- op_cost(0); +- format %{ %} +- interface(REG_INTER); +-%} +- +-operand vReg_V3() +-%{ +- constraint(ALLOC_IN_RC(v3_reg)); +- match(VecA); +- match(vReg); +- op_cost(0); +- format %{ %} +- interface(REG_INTER); +-%} +- +-operand vReg_V4() +-%{ +- constraint(ALLOC_IN_RC(v4_reg)); +- match(VecA); +- match(vReg); +- op_cost(0); +- format %{ %} +- interface(REG_INTER); +-%} +- +-operand vReg_V5() +-%{ +- constraint(ALLOC_IN_RC(v5_reg)); +- match(VecA); +- match(vReg); +- op_cost(0); +- format %{ %} +- interface(REG_INTER); +-%} +- + // Java Thread Register + operand javaThread_RegP(iRegP reg) + %{ +@@ -7939,17 +7514,6 @@ instruct castDD(fRegD dst) + ins_pipe(pipe_class_empty); + %} + +-instruct castVV(vReg dst) +-%{ +- match(Set dst (CastVV dst)); +- +- size(0); +- format %{ "# castVV of $dst" %} +- ins_encode(/* empty encoding */); +- ins_cost(0); +- ins_pipe(pipe_class_empty); +-%} +- + // ============================================================================ + // Convert Instructions + +@@ -10076,7 +9640,7 @@ instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 su + instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) + %{ +- predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + +@@ -10094,7 +9658,7 @@ instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R + instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) + %{ +- predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + +@@ -10111,7 +9675,7 @@ instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R + instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) + %{ +- predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + +@@ -10129,7 +9693,7 @@ instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_ + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, + rFlagsReg cr) + %{ +- predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + +@@ -10275,7 +9839,7 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) + %{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); +- predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); ++ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + +@@ -10294,7 +9858,7 @@ instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) + %{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); +- predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); ++ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + +@@ -10310,7 +9874,6 @@ instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + // clearing of an array + instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy) + %{ +- predicate(!UseRVV); + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL cnt, USE_KILL base); + +@@ -10330,8 +9893,7 @@ instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy) + + instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) + %{ +- predicate(!UseRVV && (uint64_t)n->in(2)->get_long() +- < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); ++ predicate((uint64_t)n->in(2)->get_long() < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL base, KILL cr); + +@@ -10348,7 +9910,7 @@ instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg + instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, + iRegI_R10 result, rFlagsReg cr) + %{ +- predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); ++ predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); + +@@ -10364,7 +9926,7 @@ instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, + instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, + iRegI_R10 result, rFlagsReg cr) + %{ +- predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); ++ predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); + +@@ -10381,7 +9943,7 @@ instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, + iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, + iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) + %{ +- predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); ++ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (AryEq ary1 ary2)); + effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); + +@@ -10398,7 +9960,7 @@ instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, + iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, + iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) + %{ +- predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); ++ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (AryEq ary1 ary2)); + effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); + +diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad +deleted file mode 100644 +index 3828e096b21..00000000000 +--- a/src/hotspot/cpu/riscv/riscv_v.ad ++++ /dev/null +@@ -1,2065 +0,0 @@ +-// +-// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +-// Copyright (c) 2020, Arm Limited. All rights reserved. +-// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +-// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +-// +-// This code is free software; you can redistribute it and/or modify it +-// under the terms of the GNU General Public License version 2 only, as +-// published by the Free Software Foundation. +-// +-// This code is distributed in the hope that it will be useful, but WITHOUT +-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +-// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +-// version 2 for more details (a copy is included in the LICENSE file that +-// accompanied this code). +-// +-// You should have received a copy of the GNU General Public License version +-// 2 along with this work; if not, write to the Free Software Foundation, +-// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +-// +-// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +-// or visit www.oracle.com if you need additional information or have any +-// questions. +-// +-// +- +-// RISCV Vector Extension Architecture Description File +- +-opclass vmemA(indirect); +- +-source_hpp %{ +- bool op_vec_supported(int opcode); +-%} +- +-source %{ +- +- static void loadStore(C2_MacroAssembler masm, bool is_store, +- VectorRegister reg, BasicType bt, Register base) { +- Assembler::SEW sew = Assembler::elemtype_to_sew(bt); +- masm.vsetvli(t0, x0, sew); +- if (is_store) { +- masm.vsex_v(reg, base, sew); +- } else { +- masm.vlex_v(reg, base, sew); +- } +- } +- +- bool op_vec_supported(int opcode) { +- switch (opcode) { +- // No multiply reduction instructions +- case Op_MulReductionVD: +- case Op_MulReductionVF: +- case Op_MulReductionVI: +- case Op_MulReductionVL: +- // Others +- case Op_Extract: +- case Op_ExtractB: +- case Op_ExtractC: +- case Op_ExtractD: +- case Op_ExtractF: +- case Op_ExtractI: +- case Op_ExtractL: +- case Op_ExtractS: +- case Op_ExtractUB: +- // Vector API specific +- case Op_AndReductionV: +- case Op_OrReductionV: +- case Op_XorReductionV: +- case Op_LoadVectorGather: +- case Op_StoreVectorScatter: +- case Op_VectorBlend: +- case Op_VectorCast: +- case Op_VectorCastB2X: +- case Op_VectorCastD2X: +- case Op_VectorCastF2X: +- case Op_VectorCastI2X: +- case Op_VectorCastL2X: +- case Op_VectorCastS2X: +- case Op_VectorInsert: +- case Op_VectorLoadConst: +- case Op_VectorLoadMask: +- case Op_VectorLoadShuffle: +- case Op_VectorMaskCmp: +- case Op_VectorRearrange: +- case Op_VectorReinterpret: +- case Op_VectorStoreMask: +- case Op_VectorTest: +- return false; +- default: +- return UseRVV; +- } +- } +- +-%} +- +-definitions %{ +- int_def VEC_COST (200, 200); +-%} +- +-// All VEC instructions +- +-// vector load/store +-instruct loadV(vReg dst, vmemA mem) %{ +- match(Set dst (LoadVector mem)); +- ins_cost(VEC_COST); +- format %{ "vle $dst, $mem\t#@loadV" %} +- ins_encode %{ +- VectorRegister dst_reg = as_VectorRegister($dst$$reg); +- loadStore(C2_MacroAssembler(&cbuf), false, dst_reg, +- Matcher::vector_element_basic_type(this), as_Register($mem$$base)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct storeV(vReg src, vmemA mem) %{ +- match(Set mem (StoreVector mem src)); +- ins_cost(VEC_COST); +- format %{ "vse $src, $mem\t#@storeV" %} +- ins_encode %{ +- VectorRegister src_reg = as_VectorRegister($src$$reg); +- loadStore(C2_MacroAssembler(&cbuf), true, src_reg, +- Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector abs +- +-instruct vabsB(vReg dst, vReg src, vReg tmp) %{ +- match(Set dst (AbsVB src)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t" +- "vmax.vv $dst, $tmp, $src" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); +- __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vabsS(vReg dst, vReg src, vReg tmp) %{ +- match(Set dst (AbsVS src)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t" +- "vmax.vv $dst, $tmp, $src" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); +- __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vabsI(vReg dst, vReg src, vReg tmp) %{ +- match(Set dst (AbsVI src)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t" +- "vmax.vv $dst, $tmp, $src" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); +- __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vabsL(vReg dst, vReg src, vReg tmp) %{ +- match(Set dst (AbsVL src)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t" +- "vmax.vv $dst, $tmp, $src" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); +- __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vabsF(vReg dst, vReg src) %{ +- match(Set dst (AbsVF src)); +- ins_cost(VEC_COST); +- format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vabsD(vReg dst, vReg src) %{ +- match(Set dst (AbsVD src)); +- ins_cost(VEC_COST); +- format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector add +- +-instruct vaddB(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (AddVB src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vadd_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vaddS(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (AddVS src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vadd_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vaddI(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (AddVI src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vadd_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vaddL(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (AddVL src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vadd_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vaddF(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (AddVF src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfadd_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vaddD(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (AddVD src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfadd_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector and +- +-instruct vand(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (AndV src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vand.vv $dst, $src1, $src2\t#@vand" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vand_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector or +- +-instruct vor(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (OrV src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vor.vv $dst, $src1, $src2\t#@vor" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vor_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector xor +- +-instruct vxor(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (XorV src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vxor.vv $dst, $src1, $src2\t#@vxor" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vxor_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector float div +- +-instruct vdivF(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (DivVF src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfdiv_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vdivD(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (DivVD src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfdiv_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector integer max/min +- +-instruct vmax(vReg dst, vReg src1, vReg src2) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT && +- n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE); +- match(Set dst (MaxV src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vmax.vv $dst, $src1, $src2\t#@vmax" %} +- ins_encode %{ +- BasicType bt = Matcher::vector_element_basic_type(this); +- Assembler::SEW sew = Assembler::elemtype_to_sew(bt); +- __ vsetvli(t0, x0, sew); +- __ vmax_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vmin(vReg dst, vReg src1, vReg src2) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT && +- n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE); +- match(Set dst (MinV src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vmin.vv $dst, $src1, $src2\t#@vmin" %} +- ins_encode %{ +- BasicType bt = Matcher::vector_element_basic_type(this); +- Assembler::SEW sew = Assembler::elemtype_to_sew(bt); +- __ vsetvli(t0, x0, sew); +- __ vmin_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector float-point max/min +- +-instruct vmaxF(vReg dst, vReg src1, vReg src2) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); +- match(Set dst (MaxV src1 src2)); +- effect(TEMP_DEF dst); +- ins_cost(VEC_COST); +- format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %} +- ins_encode %{ +- __ minmax_FD_v(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), +- false /* is_double */, false /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vmaxD(vReg dst, vReg src1, vReg src2) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); +- match(Set dst (MaxV src1 src2)); +- effect(TEMP_DEF dst); +- ins_cost(VEC_COST); +- format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %} +- ins_encode %{ +- __ minmax_FD_v(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), +- true /* is_double */, false /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vminF(vReg dst, vReg src1, vReg src2) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); +- match(Set dst (MinV src1 src2)); +- effect(TEMP_DEF dst); +- ins_cost(VEC_COST); +- format %{ "vminF $dst, $src1, $src2\t#@vminF" %} +- ins_encode %{ +- __ minmax_FD_v(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), +- false /* is_double */, true /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vminD(vReg dst, vReg src1, vReg src2) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); +- match(Set dst (MinV src1 src2)); +- effect(TEMP_DEF dst); +- ins_cost(VEC_COST); +- format %{ "vminD $dst, $src1, $src2\t#@vminD" %} +- ins_encode %{ +- __ minmax_FD_v(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), +- true /* is_double */, true /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector fmla +- +-// dst_src1 = dst_src1 + src2 * src3 +-instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 + src2 * src3 +-instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector fmls +- +-// dst_src1 = dst_src1 + -src2 * src3 +-// dst_src1 = dst_src1 + src2 * -src3 +-instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); +- match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); +- ins_cost(VEC_COST); +- format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 + -src2 * src3 +-// dst_src1 = dst_src1 + src2 * -src3 +-instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); +- match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); +- ins_cost(VEC_COST); +- format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector fnmla +- +-// dst_src1 = -dst_src1 + -src2 * src3 +-// dst_src1 = -dst_src1 + src2 * -src3 +-instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); +- match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); +- ins_cost(VEC_COST); +- format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = -dst_src1 + -src2 * src3 +-// dst_src1 = -dst_src1 + src2 * -src3 +-instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); +- match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); +- ins_cost(VEC_COST); +- format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector fnmls +- +-// dst_src1 = -dst_src1 + src2 * src3 +-instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = -dst_src1 + src2 * src3 +-instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector mla +- +-// dst_src1 = dst_src1 + src2 * src3 +-instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 + src2 * src3 +-instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 + src2 * src3 +-instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 + src2 * src3 +-instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector mls +- +-// dst_src1 = dst_src1 - src2 * src3 +-instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 - src2 * src3 +-instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 - src2 * src3 +-instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 - src2 * src3 +-instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector mul +- +-instruct vmulB(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (MulVB src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vmulS(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (MulVS src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vmulI(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (MulVI src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vmulL(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (MulVL src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vmulF(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (MulVF src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vmulD(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (MulVD src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector fneg +- +-instruct vnegF(vReg dst, vReg src) %{ +- match(Set dst (NegVF src)); +- ins_cost(VEC_COST); +- format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vnegD(vReg dst, vReg src) %{ +- match(Set dst (NegVD src)); +- ins_cost(VEC_COST); +- format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// popcount vector +- +-instruct vpopcountI(iRegINoSp dst, vReg src) %{ +- match(Set dst (PopCountVI src)); +- format %{ "vpopc.m $dst, $src\t#@vpopcountI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector add reduction +- +-instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); +- match(Set dst (AddReductionVI src1 src2)); +- effect(TEMP tmp); +- ins_cost(VEC_COST); +- format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t" +- "vredsum.vs $tmp, $src2, $tmp\n\t" +- "vmv.x.s $dst, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); +- match(Set dst (AddReductionVI src1 src2)); +- effect(TEMP tmp); +- ins_cost(VEC_COST); +- format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t" +- "vredsum.vs $tmp, $src2, $tmp\n\t" +- "vmv.x.s $dst, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); +- match(Set dst (AddReductionVI src1 src2)); +- effect(TEMP tmp); +- ins_cost(VEC_COST); +- format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t" +- "vredsum.vs $tmp, $src2, $tmp\n\t" +- "vmv.x.s $dst, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); +- match(Set dst (AddReductionVL src1 src2)); +- effect(TEMP tmp); +- ins_cost(VEC_COST); +- format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t" +- "vredsum.vs $tmp, $src2, $tmp\n\t" +- "vmv.x.s $dst, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{ +- match(Set src1_dst (AddReductionVF src1_dst src2)); +- effect(TEMP tmp); +- ins_cost(VEC_COST); +- format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t" +- "vfredosum.vs $tmp, $src2, $tmp\n\t" +- "vfmv.f.s $src1_dst, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); +- __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp$$reg)); +- __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{ +- match(Set src1_dst (AddReductionVD src1_dst src2)); +- effect(TEMP tmp); +- ins_cost(VEC_COST); +- format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t" +- "vfredosum.vs $tmp, $src2, $tmp\n\t" +- "vfmv.f.s $src1_dst, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); +- __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp$$reg)); +- __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector integer max reduction +-instruct vreduce_maxB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); +- match(Set dst (MaxReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_maxB $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- Label Ldone; +- __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); +- __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); +- __ bind(Ldone); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_maxS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); +- match(Set dst (MaxReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_maxS $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- Label Ldone; +- __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); +- __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); +- __ bind(Ldone); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); +- match(Set dst (MaxReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); +- match(Set dst (MaxReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector integer min reduction +-instruct vreduce_minB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); +- match(Set dst (MinReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_minB $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- Label Ldone; +- __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); +- __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); +- __ bind(Ldone); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_minS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); +- match(Set dst (MinReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_minS $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- Label Ldone; +- __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); +- __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); +- __ bind(Ldone); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); +- match(Set dst (MinReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); +- match(Set dst (MinReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector float max reduction +- +-instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); +- match(Set dst (MaxReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); +- format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %} +- ins_encode %{ +- __ reduce_minmax_FD_v($dst$$FloatRegister, +- $src1$$FloatRegister, as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), +- false /* is_double */, false /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); +- match(Set dst (MaxReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); +- format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %} +- ins_encode %{ +- __ reduce_minmax_FD_v($dst$$FloatRegister, +- $src1$$FloatRegister, as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), +- true /* is_double */, false /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector float min reduction +- +-instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); +- match(Set dst (MinReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); +- format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %} +- ins_encode %{ +- __ reduce_minmax_FD_v($dst$$FloatRegister, +- $src1$$FloatRegister, as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), +- false /* is_double */, true /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); +- match(Set dst (MinReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); +- format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %} +- ins_encode %{ +- __ reduce_minmax_FD_v($dst$$FloatRegister, +- $src1$$FloatRegister, as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), +- true /* is_double */, true /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector Math.rint, floor, ceil +- +-instruct vroundD(vReg dst, vReg src, immI rmode) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); +- match(Set dst (RoundDoubleModeV src rmode)); +- format %{ "vroundD $dst, $src, $rmode" %} +- ins_encode %{ +- switch ($rmode$$constant) { +- case RoundDoubleModeNode::rmode_rint: +- __ csrwi(CSR_FRM, C2_MacroAssembler::rne); +- __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); +- break; +- case RoundDoubleModeNode::rmode_floor: +- __ csrwi(CSR_FRM, C2_MacroAssembler::rdn); +- __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); +- break; +- case RoundDoubleModeNode::rmode_ceil: +- __ csrwi(CSR_FRM, C2_MacroAssembler::rup); +- __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); +- break; +- default: +- ShouldNotReachHere(); +- break; +- } +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector replicate +- +-instruct replicateB(vReg dst, iRegIorL2I src) %{ +- match(Set dst (ReplicateB src)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.x $dst, $src\t#@replicateB" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateS(vReg dst, iRegIorL2I src) %{ +- match(Set dst (ReplicateS src)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.x $dst, $src\t#@replicateS" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateI(vReg dst, iRegIorL2I src) %{ +- match(Set dst (ReplicateI src)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.x $dst, $src\t#@replicateI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateL(vReg dst, iRegL src) %{ +- match(Set dst (ReplicateL src)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.x $dst, $src\t#@replicateL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateB_imm5(vReg dst, immI5 con) %{ +- match(Set dst (ReplicateB con)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.i $dst, $con\t#@replicateB_imm5" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateS_imm5(vReg dst, immI5 con) %{ +- match(Set dst (ReplicateS con)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.i $dst, $con\t#@replicateS_imm5" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateI_imm5(vReg dst, immI5 con) %{ +- match(Set dst (ReplicateI con)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.i $dst, $con\t#@replicateI_imm5" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateL_imm5(vReg dst, immL5 con) %{ +- match(Set dst (ReplicateL con)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.i $dst, $con\t#@replicateL_imm5" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateF(vReg dst, fRegF src) %{ +- match(Set dst (ReplicateF src)); +- ins_cost(VEC_COST); +- format %{ "vfmv.v.f $dst, $src\t#@replicateF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateD(vReg dst, fRegD src) %{ +- match(Set dst (ReplicateD src)); +- ins_cost(VEC_COST); +- format %{ "vfmv.v.f $dst, $src\t#@replicateD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector shift +- +-instruct vasrB(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (RShiftVB src shift)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst); +- format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t" +- "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t" +- "vmnot.m v0, v0\n\t" +- "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits +- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); +- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- BitsPerByte - 1, Assembler::v0_t); +- // otherwise, shift +- __ vmnot_m(v0, v0); +- __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg), Assembler::v0_t); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vasrS(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (RShiftVS src shift)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst); +- format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t" +- "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t" +- "vmnot.m v0, v0\n\t" +- "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits +- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); +- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- BitsPerShort - 1, Assembler::v0_t); +- // otherwise, shift +- __ vmnot_m(v0, v0); +- __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg), Assembler::v0_t); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vasrI(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (RShiftVI src shift)); +- ins_cost(VEC_COST); +- format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vasrL(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (RShiftVL src shift)); +- ins_cost(VEC_COST); +- format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslB(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (LShiftVB src shift)); +- ins_cost(VEC_COST); +- effect( TEMP_DEF dst); +- format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t" +- "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" +- "vmnot.m v0, v0\n\t" +- "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- // if shift > BitsPerByte - 1, clear the element +- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg), Assembler::v0_t); +- // otherwise, shift +- __ vmnot_m(v0, v0); +- __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg), Assembler::v0_t); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslS(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (LShiftVS src shift)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst); +- format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t" +- "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" +- "vmnot.m v0, v0\n\t" +- "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- // if shift > BitsPerShort - 1, clear the element +- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg), Assembler::v0_t); +- // otherwise, shift +- __ vmnot_m(v0, v0); +- __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg), Assembler::v0_t); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslI(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (LShiftVI src shift)); +- ins_cost(VEC_COST); +- format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslL(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (LShiftVL src shift)); +- ins_cost(VEC_COST); +- format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlsrB(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (URShiftVB src shift)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst); +- format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t" +- "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" +- "vmnot.m v0, v0, v0\n\t" +- "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- // if shift > BitsPerByte - 1, clear the element +- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg), Assembler::v0_t); +- // otherwise, shift +- __ vmnot_m(v0, v0); +- __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg), Assembler::v0_t); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlsrS(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (URShiftVS src shift)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst); +- format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t" +- "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" +- "vmnot.m v0, v0\n\t" +- "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- // if shift > BitsPerShort - 1, clear the element +- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg), Assembler::v0_t); +- // otherwise, shift +- __ vmnot_m(v0, v0); +- __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg), Assembler::v0_t); +- %} +- ins_pipe(pipe_slow); +-%} +- +- +-instruct vlsrI(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (URShiftVI src shift)); +- ins_cost(VEC_COST); +- format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +- +-instruct vlsrL(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (URShiftVL src shift)); +- ins_cost(VEC_COST); +- format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (RShiftVB src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e8); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- if (con >= BitsPerByte) con = BitsPerByte - 1; +- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (RShiftVS src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e16); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- if (con >= BitsPerShort) con = BitsPerShort - 1; +- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (RShiftVI src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e32); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ +- predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); +- match(Set dst (RShiftVL src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e64); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (URShiftVB src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e8); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- if (con >= BitsPerByte) { +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (URShiftVS src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e16); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- if (con >= BitsPerShort) { +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (URShiftVI src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e32); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ +- predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); +- match(Set dst (URShiftVL src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e64); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (LShiftVB src (LShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e8); +- if (con >= BitsPerByte) { +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (LShiftVS src (LShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e16); +- if (con >= BitsPerShort) { +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (LShiftVI src (LShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e32); +- __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ +- predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); +- match(Set dst (LShiftVL src (LShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e64); +- __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); +- match(Set dst (LShiftCntV cnt)); +- match(Set dst (RShiftCntV cnt)); +- format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || +- n->bottom_type()->is_vect()->element_basic_type() == T_CHAR); +- match(Set dst (LShiftCntV cnt)); +- match(Set dst (RShiftCntV cnt)); +- format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT); +- match(Set dst (LShiftCntV cnt)); +- match(Set dst (RShiftCntV cnt)); +- format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG); +- match(Set dst (LShiftCntV cnt)); +- match(Set dst (RShiftCntV cnt)); +- format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector sqrt +- +-instruct vsqrtF(vReg dst, vReg src) %{ +- match(Set dst (SqrtVF src)); +- ins_cost(VEC_COST); +- format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vsqrtD(vReg dst, vReg src) %{ +- match(Set dst (SqrtVD src)); +- ins_cost(VEC_COST); +- format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector sub +- +-instruct vsubB(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (SubVB src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vsubS(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (SubVS src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vsubI(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (SubVI src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vsubL(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (SubVL src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vsubF(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (SubVF src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vsubD(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (SubVD src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, +- iRegI_R10 result, vReg_V1 v1, +- vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) +-%{ +- predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); +- match(Set result (StrEquals (Binary str1 str2) cnt)); +- effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); +- +- format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} +- ins_encode %{ +- // Count is in 8-bit bytes; non-Compact chars are 16 bits. +- __ string_equals_v($str1$$Register, $str2$$Register, +- $result$$Register, $cnt$$Register, 1); +- %} +- ins_pipe(pipe_class_memory); +-%} +- +-instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, +- iRegI_R10 result, vReg_V1 v1, +- vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) +-%{ +- predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); +- match(Set result (StrEquals (Binary str1 str2) cnt)); +- effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); +- +- format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} +- ins_encode %{ +- // Count is in 8-bit bytes; non-Compact chars are 16 bits. +- __ string_equals_v($str1$$Register, $str2$$Register, +- $result$$Register, $cnt$$Register, 2); +- %} +- ins_pipe(pipe_class_memory); +-%} +- +-instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, +- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) +-%{ +- predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); +- match(Set result (AryEq ary1 ary2)); +- effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); +- +- format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} +- ins_encode %{ +- __ arrays_equals_v($ary1$$Register, $ary2$$Register, +- $result$$Register, $tmp$$Register, 1); +- %} +- ins_pipe(pipe_class_memory); +-%} +- +-instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, +- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) +-%{ +- predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); +- match(Set result (AryEq ary1 ary2)); +- effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); +- +- format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} +- ins_encode %{ +- __ arrays_equals_v($ary1$$Register, $ary2$$Register, +- $result$$Register, $tmp$$Register, 2); +- %} +- ins_pipe(pipe_class_memory); +-%} +- +-instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, +- iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, +- iRegP_R28 tmp1, iRegL_R29 tmp2) +-%{ +- predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); +- match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); +- effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, +- TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); +- +- format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} +- ins_encode %{ +- // Count is in 8-bit bytes; non-Compact chars are 16 bits. +- __ string_compare_v($str1$$Register, $str2$$Register, +- $cnt1$$Register, $cnt2$$Register, $result$$Register, +- $tmp1$$Register, $tmp2$$Register, +- StrIntrinsicNode::UU); +- %} +- ins_pipe(pipe_class_memory); +-%} +-instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, +- iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, +- iRegP_R28 tmp1, iRegL_R29 tmp2) +-%{ +- predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); +- match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); +- effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, +- TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); +- +- format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} +- ins_encode %{ +- __ string_compare_v($str1$$Register, $str2$$Register, +- $cnt1$$Register, $cnt2$$Register, $result$$Register, +- $tmp1$$Register, $tmp2$$Register, +- StrIntrinsicNode::LL); +- %} +- ins_pipe(pipe_class_memory); +-%} +- +-instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, +- iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, +- iRegP_R28 tmp1, iRegL_R29 tmp2) +-%{ +- predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); +- match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); +- effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, +- TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); +- +- format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} +- ins_encode %{ +- __ string_compare_v($str1$$Register, $str2$$Register, +- $cnt1$$Register, $cnt2$$Register, $result$$Register, +- $tmp1$$Register, $tmp2$$Register, +- StrIntrinsicNode::UL); +- %} +- ins_pipe(pipe_class_memory); +-%} +-instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, +- iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, +- iRegP_R28 tmp1, iRegL_R29 tmp2) +-%{ +- predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); +- match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); +- effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, +- TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); +- +- format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} +- ins_encode %{ +- __ string_compare_v($str1$$Register, $str2$$Register, +- $cnt1$$Register, $cnt2$$Register, $result$$Register, +- $tmp1$$Register, $tmp2$$Register, +- StrIntrinsicNode::LU); +- %} +- ins_pipe(pipe_class_memory); +-%} +- +-// fast byte[] to char[] inflation +-instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len, +- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) +-%{ +- predicate(UseRVV); +- match(Set dummy (StrInflatedCopy src (Binary dst len))); +- effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len); +- +- format %{ "String Inflate $src,$dst" %} +- ins_encode %{ +- __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register); +- %} +- ins_pipe(pipe_class_memory); +-%} +- +-// encode char[] to byte[] in ISO_8859_1 +-instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, +- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) +-%{ +- predicate(UseRVV); +- match(Set result (EncodeISOArray src (Binary dst len))); +- effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, +- TEMP v1, TEMP v2, TEMP v3, TEMP tmp); +- +- format %{ "Encode array $src,$dst,$len -> $result" %} +- ins_encode %{ +- __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register, +- $result$$Register, $tmp$$Register); +- %} +- ins_pipe( pipe_class_memory ); +-%} +- +-// fast char[] to byte[] compression +-instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, +- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) +-%{ +- predicate(UseRVV); +- match(Set result (StrCompressedCopy src (Binary dst len))); +- effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, +- TEMP v1, TEMP v2, TEMP v3, TEMP tmp); +- +- format %{ "String Compress $src,$dst -> $result // KILL R11, R12, R13" %} +- ins_encode %{ +- __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register, +- $result$$Register, $tmp$$Register); +- %} +- ins_pipe( pipe_slow ); +-%} +- +-instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, iRegL tmp) +-%{ +- predicate(UseRVV); +- match(Set result (CountPositives ary len)); +- effect(USE_KILL ary, USE_KILL len, TEMP tmp); +- +- format %{ "count positives byte[] $ary, $len -> $result" %} +- ins_encode %{ +- __ count_positives_v($ary$$Register, $len$$Register, $result$$Register, $tmp$$Register); +- %} +- +- ins_pipe(pipe_slow); +-%} +- +-instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, +- iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, +- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) +-%{ +- predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); +- match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); +- effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, +- TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); +- +- format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %} +- +- ins_encode %{ +- __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register, +- $result$$Register, $tmp1$$Register, $tmp2$$Register, +- false /* isL */); +- %} +- +- ins_pipe(pipe_class_memory); +-%} +- +-instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, +- iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, +- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) +-%{ +- predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); +- match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); +- effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, +- TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); +- +- format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %} +- +- ins_encode %{ +- __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register, +- $result$$Register, $tmp1$$Register, $tmp2$$Register, +- true /* isL */); +- %} +- +- ins_pipe(pipe_class_memory); +-%} +- +-// clearing of an array +-instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, +- vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3) +-%{ +- predicate(UseRVV); +- match(Set dummy (ClearArray cnt base)); +- effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3); +- +- format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} +- +- ins_encode %{ +- __ clear_array_v($base$$Register, $cnt$$Register); +- %} +- +- ins_pipe(pipe_class_memory); +-%} +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index f85d4b25a76..4daed17df10 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -80,9 +80,8 @@ class SimpleRuntimeFrame { + }; + + class RegisterSaver { +- const bool _save_vectors; + public: +- RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {} ++ RegisterSaver() {} + ~RegisterSaver() {} + OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); + void restore_live_registers(MacroAssembler* masm); +@@ -91,11 +90,7 @@ class RegisterSaver { + // Used by deoptimization when it is managing result register + // values on its own + // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4) +- // |---v0---|<---SP +- // |---v1---|save vectors only in generate_handler_blob +- // |-- .. --| +- // |---v31--|----- +- // |---f0---| ++ // |---f0---|<---SP + // |---f1---| + // | .. | + // |---f31--| +@@ -106,16 +101,8 @@ class RegisterSaver { + // |---x31--| + // |---fp---| + // |---ra---| +- int v0_offset_in_bytes(void) { return 0; } + int f0_offset_in_bytes(void) { +- int f0_offset = 0; +-#ifdef COMPILER2 +- if (_save_vectors) { +- f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers * +- BytesPerInt; +- } +-#endif +- return f0_offset; ++ return 0; + } + int reserved_slot_offset_in_bytes(void) { + return f0_offset_in_bytes() + +@@ -142,15 +129,6 @@ class RegisterSaver { + }; + + OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { +- int vector_size_in_bytes = 0; +- int vector_size_in_slots = 0; +-#ifdef COMPILER2 +- if (_save_vectors) { +- vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE); +- vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT); +- } +-#endif +- + assert_cond(masm != NULL && total_frame_words != NULL); + int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16); + // OopMap frame size is in compiler stack slots (jint's) not bytes or words +@@ -161,9 +139,9 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ + int frame_size_in_words = frame_size_in_bytes / wordSize; + *total_frame_words = frame_size_in_words; + +- // Save Integer, Float and Vector registers. ++ // Save Integer and Float registers. + __ enter(); +- __ push_CPU_state(_save_vectors, vector_size_in_bytes); ++ __ push_CPU_state(); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This +@@ -176,13 +154,6 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ + + int sp_offset_in_slots = 0; + int step_in_slots = 0; +- if (_save_vectors) { +- step_in_slots = vector_size_in_slots; +- for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { +- VectorRegister r = as_VectorRegister(i); +- oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); +- } +- } + + step_in_slots = FloatRegisterImpl::max_slots_per_register; + for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { +@@ -207,18 +178,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ + + void RegisterSaver::restore_live_registers(MacroAssembler* masm) { + assert_cond(masm != NULL); +-#ifdef COMPILER2 +- __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE)); +-#else +- __ pop_CPU_state(_save_vectors); +-#endif ++ __ pop_CPU_state(); + __ leave(); + } + + // Is vector's size (in bytes) bigger than a size saved by default? +-// riscv does not ovlerlay the floating-point registers on vector registers like aarch64. + bool SharedRuntime::is_wide_vector(int size) { +- return UseRVV; ++ return false; + } + + // The java_calling_convention describes stack locations as ideal slots on +@@ -674,13 +640,6 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); + } + +-int SharedRuntime::vector_calling_convention(VMRegPair *regs, +- uint num_bits, +- uint total_args_passed) { +- Unimplemented(); +- return 0; +-} +- + int SharedRuntime::c_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + VMRegPair *regs2, +@@ -1891,7 +1850,7 @@ void SharedRuntime::generate_deopt_blob() { + OopMap* map = NULL; + OopMapSet *oop_maps = new OopMapSet(); + assert_cond(masm != NULL && oop_maps != NULL); +- RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0); ++ RegisterSaver reg_saver; + + // ------------- + // This code enters when returning to a de-optimized nmethod. A return +@@ -2423,7 +2382,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t + address call_pc = NULL; + int frame_size_in_words = -1; + bool cause_return = (poll_type == POLL_AT_RETURN); +- RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); ++ RegisterSaver reg_saver; + + // Save Integer and Float registers. + map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); +@@ -2542,7 +2501,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha + assert_cond(masm != NULL); + + int frame_size_in_words = -1; +- RegisterSaver reg_saver(false /* save_vectors */); ++ RegisterSaver reg_saver; + + OopMapSet *oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); +diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +index b05edf7172c..39416441bdf 100644 +--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +@@ -2843,111 +2843,6 @@ class StubGenerator: public StubCodeGenerator { + + return entry; + } +- +- // Arguments: +- // +- // Input: +- // c_rarg0 - newArr address +- // c_rarg1 - oldArr address +- // c_rarg2 - newIdx +- // c_rarg3 - shiftCount +- // c_rarg4 - numIter +- // +- address generate_bigIntegerLeftShift() { +- __ align(CodeEntryAlignment); +- StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker"); +- address entry = __ pc(); +- +- Label loop, exit; +- +- Register newArr = c_rarg0; +- Register oldArr = c_rarg1; +- Register newIdx = c_rarg2; +- Register shiftCount = c_rarg3; +- Register numIter = c_rarg4; +- +- Register shiftRevCount = c_rarg5; +- Register oldArrNext = t1; +- +- __ beqz(numIter, exit); +- __ shadd(newArr, newIdx, newArr, t0, 2); +- +- __ li(shiftRevCount, 32); +- __ sub(shiftRevCount, shiftRevCount, shiftCount); +- +- __ bind(loop); +- __ addi(oldArrNext, oldArr, 4); +- __ vsetvli(t0, numIter, Assembler::e32, Assembler::m4); +- __ vle32_v(v0, oldArr); +- __ vle32_v(v4, oldArrNext); +- __ vsll_vx(v0, v0, shiftCount); +- __ vsrl_vx(v4, v4, shiftRevCount); +- __ vor_vv(v0, v0, v4); +- __ vse32_v(v0, newArr); +- __ sub(numIter, numIter, t0); +- __ shadd(oldArr, t0, oldArr, t1, 2); +- __ shadd(newArr, t0, newArr, t1, 2); +- __ bnez(numIter, loop); +- +- __ bind(exit); +- __ ret(); +- +- return entry; +- } +- +- // Arguments: +- // +- // Input: +- // c_rarg0 - newArr address +- // c_rarg1 - oldArr address +- // c_rarg2 - newIdx +- // c_rarg3 - shiftCount +- // c_rarg4 - numIter +- // +- address generate_bigIntegerRightShift() { +- __ align(CodeEntryAlignment); +- StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker"); +- address entry = __ pc(); +- +- Label loop, exit; +- +- Register newArr = c_rarg0; +- Register oldArr = c_rarg1; +- Register newIdx = c_rarg2; +- Register shiftCount = c_rarg3; +- Register numIter = c_rarg4; +- Register idx = numIter; +- +- Register shiftRevCount = c_rarg5; +- Register oldArrNext = c_rarg6; +- Register newArrCur = t0; +- Register oldArrCur = t1; +- +- __ beqz(idx, exit); +- __ shadd(newArr, newIdx, newArr, t0, 2); +- +- __ li(shiftRevCount, 32); +- __ sub(shiftRevCount, shiftRevCount, shiftCount); +- +- __ bind(loop); +- __ vsetvli(t0, idx, Assembler::e32, Assembler::m4); +- __ sub(idx, idx, t0); +- __ shadd(oldArrNext, idx, oldArr, t1, 2); +- __ shadd(newArrCur, idx, newArr, t1, 2); +- __ addi(oldArrCur, oldArrNext, 4); +- __ vle32_v(v0, oldArrCur); +- __ vle32_v(v4, oldArrNext); +- __ vsrl_vx(v0, v0, shiftCount); +- __ vsll_vx(v4, v4, shiftRevCount); +- __ vor_vv(v0, v0, v4); +- __ vse32_v(v0, newArrCur); +- __ bnez(idx, loop); +- +- __ bind(exit); +- __ ret(); +- +- return entry; +- } + #endif + + #ifdef COMPILER2 +@@ -3813,11 +3708,6 @@ class StubGenerator: public StubCodeGenerator { + MontgomeryMultiplyGenerator g(_masm, /*squaring*/true); + StubRoutines::_montgomerySquare = g.generate_square(); + } +- +- if (UseRVVForBigIntegerShiftIntrinsics) { +- StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift(); +- StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift(); +- } + #endif + + generate_compare_long_strings(); +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +index 768c7633ca6..2c15a834542 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +@@ -167,10 +167,6 @@ void VM_Version::c2_initialize() { + FLAG_SET_DEFAULT(MaxVectorSize, 0); + } + +- if (!UseRVV) { +- FLAG_SET_DEFAULT(UseRVVForBigIntegerShiftIntrinsics, false); +- } +- + if (UseRVV) { + if (FLAG_IS_DEFAULT(MaxVectorSize)) { + MaxVectorSize = _initial_vector_length; +diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp +index aa7222dc64a..1f6eff96cba 100644 +--- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp +@@ -45,16 +45,8 @@ void VMRegImpl::set_regName() { + freg = freg->successor(); + } + +- VectorRegister vreg = ::as_VectorRegister(0); +- for ( ; i < ConcreteRegisterImpl::max_vpr ; ) { +- for (int j = 0 ; j < VectorRegisterImpl::max_slots_per_register ; j++) { +- regName[i++] = reg->name(); +- } +- vreg = vreg->successor(); +- } +- + for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) { +- regName[i] = "NON-GPR-FPR-VPR"; ++ regName[i] = "NON-GPR-FPR"; + } + } + +diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp +index 9e611b1f671..6f613a8f11a 100644 +--- a/src/hotspot/cpu/riscv/vmreg_riscv.hpp ++++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp +@@ -34,10 +34,6 @@ inline bool is_FloatRegister() { + return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; + } + +-inline bool is_VectorRegister() { +- return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr; +-} +- + inline Register as_Register() { + assert(is_Register(), "must be"); + return ::as_Register(value() / RegisterImpl::max_slots_per_register); +@@ -49,20 +45,9 @@ inline FloatRegister as_FloatRegister() { + FloatRegisterImpl::max_slots_per_register); + } + +-inline VectorRegister as_VectorRegister() { +- assert(is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be"); +- return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) / +- VectorRegisterImpl::max_slots_per_register); +-} +- + inline bool is_concrete() { + assert(is_reg(), "must be"); +- if (is_VectorRegister()) { +- int base = value() - ConcreteRegisterImpl::max_fpr; +- return (base % VectorRegisterImpl::max_slots_per_register) == 0; +- } else { +- return is_even(value()); +- } ++ return is_even(value()); + } + + #endif // CPU_RISCV_VMREG_RISCV_HPP + +From b2011bad9b7404c1f6d0c1aa3176569d7f07d7a9 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Mon, 27 Mar 2023 16:05:55 +0800 +Subject: [PATCH 004/140] Revert: JDK-8253180: ZGC: Implementation of JEP 376: + ZGC: Concurrent Thread-Stack Processing JDK-8220051: Remove global safepoint + code + +--- + src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 14 ------ + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 14 +++--- + .../riscv/c2_safepointPollStubTable_riscv.cpp | 47 ------------------ + src/hotspot/cpu/riscv/frame_riscv.cpp | 9 +--- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 19 +------- + .../cpu/riscv/macroAssembler_riscv.cpp | 48 +++++++++++-------- + .../cpu/riscv/macroAssembler_riscv.hpp | 5 +- + src/hotspot/cpu/riscv/riscv.ad | 14 ++---- + src/hotspot/cpu/riscv/vm_version_riscv.hpp | 2 - + 9 files changed, 45 insertions(+), 127 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp + +diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +index dcd0472c540..af7bd067f33 100644 +--- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +@@ -39,20 +39,6 @@ + + #define __ ce->masm()-> + +-void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { +- __ bind(_entry); +- InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset()); +- __ code_section()->relocate(__ pc(), safepoint_pc.rspec()); +- __ la(t0, safepoint_pc.target()); +- __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); +- +- assert(SharedRuntime::polling_page_return_handler_blob() != NULL, +- "polling page return stub not created yet"); +- address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); +- +- __ far_jump(RuntimeAddress(stub)); +-} +- + void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + Metadata *m = _method->as_constant_ptr()->as_metadata(); +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index bba3bd4709c..0e383a3c139 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -424,7 +424,7 @@ int LIR_Assembler::emit_deopt_handler() { + return offset; + } + +-void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { ++void LIR_Assembler::return_op(LIR_Opr result) { + assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10"); + + // Pop the stack before the safepoint code +@@ -434,18 +434,20 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { + __ reserved_stack_check(); + } + +- code_stub->set_safepoint_offset(__ offset()); +- __ relocate(relocInfo::poll_return_type); +- __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */); ++ address polling_page(os::get_polling_page()); ++ __ read_polling_page(t0, polling_page, relocInfo::poll_return_type); + __ ret(); + } + + int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { ++ address polling_page(os::get_polling_page()); + guarantee(info != NULL, "Shouldn't be NULL"); +- __ get_polling_page(t0, relocInfo::poll_type); ++ assert(os::is_poll_address(polling_page), "should be"); ++ int32_t offset = 0; ++ __ get_polling_page(t0, polling_page, offset, relocInfo::poll_type); + add_debug_info_for_branch(info); // This isn't just debug info: + // it's the oop map +- __ read_polling_page(t0, 0, relocInfo::poll_type); ++ __ read_polling_page(t0, offset, relocInfo::poll_type); + return __ offset(); + } + +diff --git a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp +deleted file mode 100644 +index a90d9fdc160..00000000000 +--- a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp ++++ /dev/null +@@ -1,47 +0,0 @@ +-/* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "asm/macroAssembler.hpp" +-#include "opto/compile.hpp" +-#include "opto/node.hpp" +-#include "opto/output.hpp" +-#include "runtime/sharedRuntime.hpp" +- +-#define __ masm. +-void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const { +- assert(SharedRuntime::polling_page_return_handler_blob() != NULL, +- "polling page return stub not created yet"); +- address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); +- RuntimeAddress callback_addr(stub); +- +- __ bind(entry->_stub_label); +- InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset); +- masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec()); +- __ la(t0, safepoint_pc.target()); +- __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); +- __ far_jump(callback_addr); +-} +-#undef __ +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index 6e38960598a..41e52a4d491 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -39,7 +39,6 @@ + #include "runtime/monitorChunk.hpp" + #include "runtime/os.inline.hpp" + #include "runtime/signature.hpp" +-#include "runtime/stackWatermarkSet.hpp" + #include "runtime/stubCodeGenerator.hpp" + #include "runtime/stubRoutines.hpp" + #include "vmreg_riscv.inline.hpp" +@@ -509,13 +508,7 @@ frame frame::sender_raw(RegisterMap* map) const { + } + + frame frame::sender(RegisterMap* map) const { +- frame result = sender_raw(map); +- +- if (map->process_frames()) { +- StackWatermarkSet::on_iteration(map->thread(), result); +- } +- +- return result; ++ return sender_raw(map); + } + + bool frame::is_interpreted_frame_valid(JavaThread* thread) const { +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index d12dcb2af19..9090ad0c058 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -519,7 +519,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, + + if (needs_thread_local_poll) { + NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); +- ld(t1, Address(xthread, JavaThread::polling_word_offset())); ++ ld(t1, Address(xthread, Thread::polling_page_offset())); + andi(t1, t1, SafepointMechanism::poll_bit()); + bnez(t1, safepoint); + } +@@ -591,23 +591,6 @@ void InterpreterMacroAssembler::remove_activation( + // result check if synchronized method + Label unlocked, unlock, no_unlock; + +- // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, +- // that would normally not be safe to use. Such bad returns into unsafe territory of +- // the stack, will call InterpreterRuntime::at_unwind. +- Label slow_path; +- Label fast_path; +- safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */); +- j(fast_path); +- +- bind(slow_path); +- push(state); +- set_last_Java_frame(esp, fp, (address)pc(), t0); +- super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread); +- reset_last_Java_frame(true); +- pop(state); +- +- bind(fast_path); +- + // get the value of _do_not_unlock_if_synchronized into x13 + const Address do_not_unlock_if_synchronized(xthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 8b8d126f6c9..4b6136ae36b 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -2122,15 +2122,16 @@ void MacroAssembler::check_klass_subtype(Register sub_klass, + } + + void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { +- ld(t0, Address(xthread, JavaThread::polling_word_offset())); +- if (acquire) { +- membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); +- } +- if (at_return) { +- bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */); ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld(t1, Address(xthread, Thread::polling_page_offset())); ++ andi(t0, t1, SafepointMechanism::poll_bit()); ++ bnez(t0, slow_path); + } else { +- andi(t0, t0, SafepointMechanism::poll_bit()); +- bnez(t0, slow_path, true /* is_far */); ++ int32_t offset = 0; ++ la_patchable(t0, ExternalAddress(SafepointSynchronize::address_of_state()), offset); ++ lwu(t0, Address(t0, offset)); ++ assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code"); ++ bnez(t0, slow_path); + } + } + +@@ -2752,22 +2753,29 @@ void MacroAssembler::reserved_stack_check() { + } + + // Move the address of the polling page into dest. +-void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { +- ld(dest, Address(xthread, JavaThread::polling_page_offset())); ++void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld(dest, Address(xthread, Thread::polling_page_offset())); ++ } else { ++ uint64_t align = (uint64_t)page & 0xfff; ++ assert(align == 0, "polling page must be page aligned"); ++ la_patchable(dest, Address(page, rtype), offset); ++ } + } + + // Read the polling page. The address of the polling page must + // already be in r. +-address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { +- address mark; +- { +- InstructionMark im(this); +- code_section()->relocate(inst_mark(), rtype); +- lwu(zr, Address(r, offset)); +- mark = inst_mark(); +- } +- verify_cross_modify_fence_not_required(); +- return mark; ++void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) { ++ int32_t offset = 0; ++ get_polling_page(dest, page, offset, rtype); ++ read_polling_page(dest, offset, rtype); ++} ++ ++// Read the polling page. The address of the polling page must ++// already be in r. ++void MacroAssembler::read_polling_page(Register dest, int32_t offset, relocInfo::relocType rtype) { ++ code_section()->relocate(pc(), rtype); ++ lwu(zr, Address(dest, offset)); + } + + void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index b43131514c1..041c696add6 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -625,8 +625,9 @@ class MacroAssembler: public Assembler { + + void reserved_stack_check(); + +- void get_polling_page(Register dest, relocInfo::relocType rtype); +- address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); ++ void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype); ++ void read_polling_page(Register r, address page, relocInfo::relocType rtype); ++ void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); + + address trampoline_call(Address entry, CodeBuffer* cbuf = NULL); + address ic_call(address entry, jint method_index = 0); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 85593a942e9..996fa1fb68f 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1132,9 +1132,9 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + } + + if (do_polling() && C->is_method_compilation()) { +- st->print("# test polling word\n\t"); +- st->print("ld t0, [xthread,#%d]\n\t", in_bytes(JavaThread::polling_word_offset())); +- st->print("bgtu sp, t0, #slow_path"); ++ st->print("# touch polling page\n\t"); ++ st->print("li t0, #0x%lx\n\t", p2i(os::get_polling_page())); ++ st->print("ld zr, [t0]"); + } + } + #endif +@@ -1153,13 +1153,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + } + + if (do_polling() && C->is_method_compilation()) { +- Label dummy_label; +- Label* code_stub = &dummy_label; +- if (!C->output()->in_scratch_emit_size()) { +- code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset()); +- } +- __ relocate(relocInfo::poll_return_type); +- __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */); ++ __ read_polling_page(t0, os::get_polling_page(), relocInfo::poll_return_type); + } + } + +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp +index 8e35530359a..7586af01d99 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp +@@ -48,8 +48,6 @@ class VM_Version : public Abstract_VM_Version { + // Initialization + static void initialize(); + +- constexpr static bool supports_stack_watermark_barrier() { return true; } +- + enum Feature_Flag { + #define CPU_FEATURE_FLAGS(decl) \ + decl(I, "i", 8) \ + +From a032c615883fe2bd557baf40f1439cbae55be206 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Mon, 1 May 2023 15:42:09 +0800 +Subject: [PATCH 005/140] Revert JDK-8221554: aarch64 cross-modifying code + +--- + .../cpu/riscv/macroAssembler_riscv.cpp | 22 ------------------- + .../cpu/riscv/macroAssembler_riscv.hpp | 2 -- + 2 files changed, 24 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 4b6136ae36b..269d76ba69e 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -2716,7 +2716,6 @@ void MacroAssembler::build_frame(int framesize) { + sd(fp, Address(sp, framesize - 2 * wordSize)); + sd(ra, Address(sp, framesize - wordSize)); + if (PreserveFramePointer) { add(fp, sp, framesize); } +- verify_cross_modify_fence_not_required(); + } + + void MacroAssembler::remove_frame(int framesize) { +@@ -3935,26 +3934,5 @@ void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Registe + + void MacroAssembler::safepoint_ifence() { + ifence(); +-#ifndef PRODUCT +- if (VerifyCrossModifyFence) { +- // Clear the thread state. +- sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); +- } +-#endif + } + +-#ifndef PRODUCT +-void MacroAssembler::verify_cross_modify_fence_not_required() { +- if (VerifyCrossModifyFence) { +- // Check if thread needs a cross modify fence. +- lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); +- Label fence_not_required; +- beqz(t0, fence_not_required); +- // If it does then fail. +- la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure))); +- mv(c_rarg0, xthread); +- jalr(t0); +- bind(fence_not_required); +- } +-} +-#endif +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 041c696add6..b59bdadb8bf 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -821,8 +821,6 @@ class MacroAssembler: public Assembler { + void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); + void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); + +- // Check the current thread doesn't need a cross modify fence. +- void verify_cross_modify_fence_not_required() PRODUCT_RETURN; + }; + + #ifdef ASSERT + +From fd89cf689015649a5cb850e1e24dcbb7bb59735a Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:11:30 +0800 +Subject: [PATCH 006/140] Revert JDK-8242263: Diagnose synchronization on + primitive wrappers + +--- + src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 7 ------- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 7 ------- + src/hotspot/cpu/riscv/riscv.ad | 7 ------- + 3 files changed, 21 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +index 6f656c8c533..348546a9ea0 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -64,13 +64,6 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr + + null_check_offset = offset(); + +- if (DiagnoseSyncOnValueBasedClasses != 0) { +- load_klass(hdr, obj); +- lwu(hdr, Address(hdr, Klass::access_flags_offset())); +- andi(t0, hdr, JVM_ACC_IS_VALUE_BASED_CLASS); +- bnez(t0, slow_case, true /* is_far */); +- } +- + // Load object header + ld(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 9090ad0c058..8adc7b1320d 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -782,13 +782,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) + // Load object pointer into obj_reg c_rarg3 + ld(obj_reg, Address(lock_reg, obj_offset)); + +- if (DiagnoseSyncOnValueBasedClasses != 0) { +- load_klass(tmp, obj_reg); +- lwu(tmp, Address(tmp, Klass::access_flags_offset())); +- andi(tmp, tmp, JVM_ACC_IS_VALUE_BASED_CLASS); +- bnez(tmp, slow_case); +- } +- + // Load (object->mark() | 1) into swap_reg + ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + ori(swap_reg, t0, 1); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 996fa1fb68f..2eefc71dde0 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1965,13 +1965,6 @@ encode %{ + // Load markWord from object into displaced_header. + __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); + +- if (DiagnoseSyncOnValueBasedClasses != 0) { +- __ load_klass(flag, oop); +- __ lwu(flag, Address(flag, Klass::access_flags_offset())); +- __ andi(flag, flag, JVM_ACC_IS_VALUE_BASED_CLASS, tmp /* tmp */); +- __ bnez(flag, cont, true /* is_far */); +- } +- + // Check for existing monitor + __ andi(t0, disp_hdr, markWord::monitor_value); + __ bnez(t0, object_has_monitor); + +From feea78c5a227c0a57e57d6d1d544a14682310053 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:24:12 +0800 +Subject: [PATCH 007/140] Revert JDK-8278104: C1 should support the compiler + directive 'BreakAtExecute' + +--- + src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +index 348546a9ea0..e5ed25616d6 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -311,7 +311,7 @@ void C1_MacroAssembler::remove_frame(int framesize) { + } + + +-void C1_MacroAssembler::verified_entry(bool breakAtEntry) { ++void C1_MacroAssembler::verified_entry() { + // If we have to make this method not-entrant we'll overwrite its + // first instruction with a jump. For this action to be legal we + // must ensure that this first instruction is a J, JAL or NOP. + +From 651009a5783f6f5150b3e75a50069dc841622d33 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 15:57:14 +0800 +Subject: [PATCH 008/140] Revert: JDK-8234562: Move + OrderAccess::release_store*/load_acquire to Atomic JDK-8234736: Harmonize + parameter order in Atomic - store JDK-8234737: Harmonize parameter order in + Atomic - add JDK-8234740: Harmonize parameter order in Atomic - cmpxchg + JDK-8234739: Harmonize parameter order in Atomic - xchg JDK-8236778: Add + Atomic::fetch_and_add + +--- + .../os_cpu/linux_riscv/atomic_linux_riscv.hpp | 51 +++++++------------ + .../linux_riscv/orderAccess_linux_riscv.hpp | 31 +++++++---- + 2 files changed, 39 insertions(+), 43 deletions(-) + +diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp +index 761da5d743e..9b8b1a31774 100644 +--- a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp ++++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp +@@ -33,25 +33,31 @@ + // Note that memory_order_conservative requires a full barrier after atomic stores. + // See https://patchwork.kernel.org/patch/3575821/ + ++#define FULL_MEM_BARRIER __sync_synchronize() ++#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); ++#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); ++ + template +-struct Atomic::PlatformAdd { +- template +- D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { ++struct Atomic::PlatformAdd ++ : Atomic::FetchAndAdd > ++{ ++ template ++ D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { + D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE); + FULL_MEM_BARRIER; + return res; + } + +- template +- D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const { +- return add_and_fetch(dest, add_value, order) - add_value; ++ template ++ D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order order) const { ++ return add_and_fetch(add_value, dest, order) - add_value; + } + }; + + template + template +-inline T Atomic::PlatformXchg::operator()(T volatile* dest, +- T exchange_value, ++inline T Atomic::PlatformXchg::operator()(T exchange_value, ++ T volatile* dest, + atomic_memory_order order) const { + STATIC_ASSERT(byte_size == sizeof(T)); + T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE); +@@ -62,9 +68,9 @@ inline T Atomic::PlatformXchg::operator()(T volatile* dest, + // __attribute__((unused)) on dest is to get rid of spurious GCC warnings. + template + template +-inline T Atomic::PlatformCmpxchg::operator()(T volatile* dest __attribute__((unused)), ++inline T Atomic::PlatformCmpxchg::operator()(T exchange_value, ++ T volatile* dest __attribute__((unused)), + T compare_value, +- T exchange_value, + atomic_memory_order order) const { + STATIC_ASSERT(byte_size == sizeof(T)); + T value = compare_value; +@@ -83,9 +89,9 @@ inline T Atomic::PlatformCmpxchg::operator()(T volatile* dest __attri + + template<> + template +-inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((unused)), ++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, ++ T volatile* dest __attribute__((unused)), + T compare_value, +- T exchange_value, + atomic_memory_order order) const { + STATIC_ASSERT(4 == sizeof(T)); + if (order != memory_order_relaxed) { +@@ -110,25 +116,4 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__(( + return rv; + } + +-template +-struct Atomic::PlatformOrderedLoad +-{ +- template +- T operator()(const volatile T* p) const { T data; __atomic_load(const_cast(p), &data, __ATOMIC_ACQUIRE); return data; } +-}; +- +-template +-struct Atomic::PlatformOrderedStore +-{ +- template +- void operator()(volatile T* p, T v) const { __atomic_store(const_cast(p), &v, __ATOMIC_RELEASE); } +-}; +- +-template +-struct Atomic::PlatformOrderedStore +-{ +- template +- void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); } +-}; +- + #endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp +index 1c33dc1e87f..5b5d35553f7 100644 +--- a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp ++++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp +@@ -37,10 +37,6 @@ inline void OrderAccess::storestore() { release(); } + inline void OrderAccess::loadstore() { acquire(); } + inline void OrderAccess::storeload() { fence(); } + +-#define FULL_MEM_BARRIER __sync_synchronize() +-#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); +-#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); +- + inline void OrderAccess::acquire() { + READ_MEM_BARRIER; + } +@@ -53,11 +49,26 @@ inline void OrderAccess::fence() { + FULL_MEM_BARRIER; + } + +-inline void OrderAccess::cross_modify_fence_impl() { +- asm volatile("fence.i" : : : "memory"); +- if (UseConservativeFence) { +- asm volatile("fence ir, ir" : : : "memory"); +- } +-} ++ ++template ++struct OrderAccess::PlatformOrderedLoad ++{ ++ template ++ T operator()(const volatile T* p) const { T data; __atomic_load(const_cast(p), &data, __ATOMIC_ACQUIRE); return data; } ++}; ++ ++template ++struct OrderAccess::PlatformOrderedStore ++{ ++ template ++ void operator()(T v, volatile T* p) const { __atomic_store(const_cast(p), &v, __ATOMIC_RELEASE); } ++}; ++ ++template ++struct OrderAccess::PlatformOrderedStore ++{ ++ template ++ void operator()(T v, volatile T* p) const { release_store(p, v); OrderAccess::fence(); } ++}; + + #endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP + +From b078a2ec01598fbcd99aea61af15d44f9c884aaa Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 21:07:42 +0800 +Subject: [PATCH 009/140] Revert JDK-8229258: Rework markOop and markOopDesc + into a simpler mark word value carrier + +--- + .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 4 ++-- + .../shenandoahBarrierSetAssembler_riscv.cpp | 4 ++-- + src/hotspot/cpu/riscv/riscv.ad | 22 +++++++++---------- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 2 +- + 4 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +index e5ed25616d6..2d52343587e 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -67,7 +67,7 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr + // Load object header + ld(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked +- ori(hdr, hdr, markWord::unlocked_value); ++ ori(hdr, hdr, markOopDesc::unlocked_value); + // save unlocked object header into the displaced header location on the stack + sd(hdr, Address(disp_hdr, 0)); + // test if object header is still the same (i.e. unlocked), and if so, store the +@@ -141,7 +141,7 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i + void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) { + assert_different_registers(obj, klass, len); + // This assumes that all prototype bits fitr in an int32_t +- mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value()); ++ mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype()); + sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); + + if (UseCompressedClassPointers) { // Take care not to kill klass +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +index d0ac6e52436..84e1205bc25 100644 +--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +@@ -216,9 +216,9 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb + Label done; + __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); + __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1 +- __ andi(t2, tmp, markWord::lock_mask_in_place); ++ __ andi(t2, tmp, markOopDesc::lock_mask_in_place); + __ bnez(t2, done); +- __ ori(tmp, tmp, markWord::marked_value); ++ __ ori(tmp, tmp, markOopDesc::marked_value); + __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1 + __ bind(done); + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 2eefc71dde0..44ab44dece1 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1966,12 +1966,12 @@ encode %{ + __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); + + // Check for existing monitor +- __ andi(t0, disp_hdr, markWord::monitor_value); ++ __ andi(t0, disp_hdr, markOopDesc::monitor_value); + __ bnez(t0, object_has_monitor); + + if (!UseHeavyMonitors) { + // Set tmp to be (markWord of object | UNLOCK_VALUE). +- __ ori(tmp, disp_hdr, markWord::unlocked_value); ++ __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); + + // Initialize the box. (Must happen before we update the object mark!) + __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); +@@ -1993,7 +1993,7 @@ encode %{ + // Check if the owner is self by comparing the value in the + // markWord of object (disp_hdr) with the stack pointer. + __ sub(disp_hdr, disp_hdr, sp); +- __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place)); ++ __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); + // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, + // hence we can store 0 as the displaced header in the box, which indicates that it is a + // recursive lock. +@@ -2012,15 +2012,15 @@ encode %{ + // otherwise m->owner may contain a thread or a stack address. + // + // Try to CAS m->owner from NULL to current thread. +- __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value)); ++ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value)); + __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, + Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) + + // Store a non-null value into the box to avoid looking like a re-entrant + // lock. The fast-path monitor unlock code checks for +- // markWord::monitor_value so use markWord::unused_mark which has the ++ // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the + // relevant bit set, and also matches ObjectSynchronizer::slow_enter. +- __ mv(tmp, (address)markWord::unused_mark().value()); ++ __ mv(tmp, (address)markOopDesc::unused_mark()); + __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + + __ beqz(flag, cont); // CAS success means locking succeeded +@@ -2029,9 +2029,9 @@ encode %{ + + // Recursive lock case + __ mv(flag, zr); +- __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value)); ++ __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value)); + __ add(tmp, tmp, 1u); +- __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value)); ++ __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value)); + + __ bind(cont); + %} +@@ -2060,7 +2060,7 @@ encode %{ + + // Handle existing monitor. + __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); +- __ andi(t0, disp_hdr, markWord::monitor_value); ++ __ andi(t0, disp_hdr, markOopDesc::monitor_value); + __ bnez(t0, object_has_monitor); + + if (!UseHeavyMonitors) { +@@ -2080,8 +2080,8 @@ encode %{ + + // Handle existing monitor. + __ bind(object_has_monitor); +- STATIC_ASSERT(markWord::monitor_value <= INT_MAX); +- __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor ++ STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX); ++ __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor + __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); + + Label notRecursive; +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index d2a301c6e74..4e388ac4eaa 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -3559,7 +3559,7 @@ void TemplateTable::_new() { + + // initialize object hader only. + __ bind(initialize_header); +- __ mv(t0, (intptr_t)markWord::prototype().value()); ++ __ mv(t0, (intptr_t)markOopDesc::prototype()); + __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); + __ store_klass_gap(x10, zr); // zero klass gap for compressed oops + __ store_klass(x10, x14); // store klass last + +From 4b27cd8d4cfa8fb5f0f78aecaebb17d19362f300 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Fri, 31 Mar 2023 16:24:36 +0800 +Subject: [PATCH 010/140] Revert: JDK-8239895: assert(_stack_base != 0LL) + failed: Sanity check JDK-8238988: Rename thread "in stack" methods and add + in_stack_range JDK-8234372: Investigate use of Thread::stack_base() and + queries for "in stack" JDK-8203481: Incorrect constraint for unextended_sp in + frame:safe_for_sender + +--- + src/hotspot/cpu/riscv/frame_riscv.cpp | 32 +++++++++++++++++++-------- + 1 file changed, 23 insertions(+), 9 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index 41e52a4d491..8e7babe2c61 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -56,13 +56,21 @@ void RegisterMap::check_location_valid() { + // Profiling/safepoint support + + bool frame::safe_for_sender(JavaThread *thread) { +- address addr_sp = (address)_sp; +- address addr_fp = (address)_fp; ++ address sp = (address)_sp; ++ address fp = (address)_fp; + address unextended_sp = (address)_unextended_sp; + + // consider stack guards when trying to determine "safe" stack pointers ++ static size_t stack_guard_size = os::uses_stack_guard_pages() ? ++ (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0; ++ size_t usable_stack_size = thread->stack_size() - stack_guard_size; ++ + // sp must be within the usable part of the stack (not in guards) +- if (!thread->is_in_usable_stack(addr_sp)) { ++ bool sp_safe = (sp < thread->stack_base()) && ++ (sp >= thread->stack_base() - usable_stack_size); ++ ++ ++ if (!sp_safe) { + return false; + } + +@@ -79,14 +87,15 @@ bool frame::safe_for_sender(JavaThread *thread) { + // So unextended sp must be within the stack but we need not to check + // that unextended sp >= sp + +- if (!thread->is_in_full_stack_checked(unextended_sp)) { ++ bool unextended_sp_safe = (unextended_sp < thread->stack_base()); ++ ++ if (!unextended_sp_safe) { + return false; + } + + // an fp must be within the stack and above (but not equal) sp + // second evaluation on fp+ is added to handle situation where fp is -1 +- bool fp_safe = thread->is_in_stack_range_excl(addr_fp, addr_sp) && +- thread->is_in_full_stack_checked(addr_fp + (return_addr_offset * sizeof(void*))); ++ bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); + + // We know sp/unextended_sp are safe only fp is questionable here + +@@ -147,7 +156,7 @@ bool frame::safe_for_sender(JavaThread *thread) { + + sender_sp = _unextended_sp + _cb->frame_size(); + // Is sender_sp safe? +- if (!thread->is_in_full_stack_checked((address)sender_sp)) { ++ if ((address)sender_sp >= thread->stack_base()) { + return false; + } + +@@ -163,7 +172,10 @@ bool frame::safe_for_sender(JavaThread *thread) { + // fp is always saved in a recognizable place in any code we generate. However + // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp + // is really a frame pointer. +- if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { ++ ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { + return false; + } + +@@ -196,7 +208,9 @@ bool frame::safe_for_sender(JavaThread *thread) { + + // Could be the call_stub + if (StubRoutines::returns_to_call_stub(sender_pc)) { +- if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { + return false; + } + + +From d1b463b6c00c75664a49719f75bef8e6408f12df Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Fri, 31 Mar 2023 17:10:33 +0800 +Subject: [PATCH 011/140] Revert JDK-8173585: Intrinsify + StringLatin1.indexOf(char) + +--- + src/hotspot/cpu/riscv/riscv.ad | 19 ------------------- + 1 file changed, 19 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 44ab44dece1..8c7a8ede815 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -9826,7 +9826,6 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) + %{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); +- predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + +@@ -9840,24 +9839,6 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + %} + + +-instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, +- iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, +- iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +-%{ +- match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); +- predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); +- effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, +- TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); +- +- format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} +- ins_encode %{ +- __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, +- $result$$Register, $tmp1$$Register, $tmp2$$Register, +- $tmp3$$Register, $tmp4$$Register, true /* isL */); +- %} +- ins_pipe(pipe_class_memory); +-%} +- + // clearing of an array + instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy) + %{ + +From a0cdf8dfb05dbff34d2ca23104d08ae21b2d7f70 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 12:25:36 +0800 +Subject: [PATCH 012/140] Revert JDK-8281632: riscv: Improve interpreter stack + banging, and change the register t1->t0 + +--- + .../templateInterpreterGenerator_riscv.cpp | 42 ++++--------------- + 1 file changed, 8 insertions(+), 34 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index 6537b2dbd94..76ae6f89e27 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -895,42 +895,16 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract + } + + void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { +- // See more discussion in stackOverflow.hpp. +- +- const int shadow_zone_size = checked_cast(StackOverflow::stack_shadow_zone_size()); ++ // Bang each page in the shadow zone. We can't assume it's been done for ++ // an interpreter frame with greater than a page of locals, so each page ++ // needs to be checked. Only true for non-native. ++ const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size(); ++ const int start_page = native_call ? n_shadow_pages : 1; + const int page_size = os::vm_page_size(); +- const int n_shadow_pages = shadow_zone_size / page_size; +- +-#ifdef ASSERT +- Label L_good_limit; +- __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit())); +- __ bnez(t0, L_good_limit); +- __ stop("shadow zone safe limit is not initialized"); +- __ bind(L_good_limit); +- +- Label L_good_watermark; +- __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark())); +- __ bnez(t0, L_good_watermark); +- __ stop("shadow zone growth watermark is not initialized"); +- __ bind(L_good_watermark); +-#endif +- +- Label L_done; +- +- __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark())); +- __ bgtu(sp, t0, L_done); +- +- for (int p = 1; p <= n_shadow_pages; p++) { +- __ bang_stack_with_offset(p * page_size); ++ for (int pages = start_page; pages <= n_shadow_pages ; pages++) { ++ __ sub(t0, sp, pages * page_size); ++ __ sd(zr, Address(t0)); + } +- +- // Record the new watermark, but only if the update is above the safe limit. +- // Otherwise, the next time around the check above would pass the safe limit. +- __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit())); +- __ bleu(sp, t0, L_done); +- __ sd(sp, Address(xthread, JavaThread::shadow_zone_growth_watermark())); +- +- __ bind(L_done); + } + + // Interpreter stub for calling a native method. (asm interpreter) + +From 8db4bf1400d92c80a0adef8a5ec12adbf595c03f Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 14:56:25 +0800 +Subject: [PATCH 013/140] Port aarch64 style sig handler from + os_linux_aarch64.cpp + +--- + .../os_cpu/linux_riscv/os_linux_riscv.cpp | 224 +++++++++++++----- + 1 file changed, 168 insertions(+), 56 deletions(-) + +diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +index 1f46bbab0a2..db15f1946e2 100644 +--- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp ++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +@@ -48,7 +48,6 @@ + #include "runtime/stubRoutines.hpp" + #include "runtime/thread.inline.hpp" + #include "runtime/timer.hpp" +-#include "signals_posix.hpp" + #include "utilities/debug.hpp" + #include "utilities/events.hpp" + #include "utilities/vmError.hpp" +@@ -172,31 +171,138 @@ NOINLINE frame os::current_frame() { + } + + // Utility functions +-bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, +- ucontext_t* uc, JavaThread* thread) { ++extern "C" JNIEXPORT int ++JVM_handle_linux_signal(int sig, ++ siginfo_t* info, ++ void* ucVoid, ++ int abort_if_unrecognized) { ++ ucontext_t* uc = (ucontext_t*) ucVoid; ++ ++ Thread* t = Thread::current_or_null_safe(); ++ ++ // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away ++ // (no destructors can be run) ++ os::ThreadCrashProtection::check_crash_protection(sig, t); ++ ++ SignalHandlerMark shm(t); ++ ++ // Note: it's not uncommon that JNI code uses signal/sigset to install ++ // then restore certain signal handler (e.g. to temporarily block SIGPIPE, ++ // or have a SIGILL handler when detecting CPU type). When that happens, ++ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To ++ // avoid unnecessary crash when libjsig is not preloaded, try handle signals ++ // that do not require siginfo/ucontext first. ++ ++ if (sig == SIGPIPE || sig == SIGXFSZ) { ++ // allow chained handler to go first ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++ return true; ++ } else { ++ // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219 ++ return true; ++ } ++ } ++ ++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT ++ if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { ++ if (handle_assert_poison_fault(ucVoid, info->si_addr)) { ++ return 1; ++ } ++ } ++#endif ++ ++ JavaThread* thread = NULL; ++ VMThread* vmthread = NULL; ++ if (os::Linux::signal_handlers_are_installed) { ++ if (t != NULL ){ ++ if(t->is_Java_thread()) { ++ thread = (JavaThread *) t; ++ } ++ else if(t->is_VM_thread()){ ++ vmthread = (VMThread *)t; ++ } ++ } ++ } ++ ++ // Handle SafeFetch faults ++ if ((sig == SIGSEGV || sig == SIGBUS) && uc != NULL) { ++ address const pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (pc && StubRoutines::is_safefetch_fault(pc)) { ++ os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); ++ return 1; ++ } ++ } + + // decide if this trap can be handled by a stub + address stub = NULL; + +- address pc = NULL; ++ address pc = NULL; + + //%note os_trap_1 + if (info != NULL && uc != NULL && thread != NULL) { +- pc = (address) os::Posix::ucontext_get_pc(uc); +- +- address addr = (address) info->si_addr; +- +- // Make sure the high order byte is sign extended, as it may be masked away by the hardware. +- if ((uintptr_t(addr) & (uintptr_t(1) << 55)) != 0) { +- addr = address(uintptr_t(addr) | (uintptr_t(0xFF) << 56)); +- } ++ pc = (address) os::Linux::ucontext_get_pc(uc); + + // Handle ALL stack overflow variations here + if (sig == SIGSEGV) { ++ address addr = (address) info->si_addr; ++ + // check if fault address is within thread stack +- if (thread->is_in_full_stack(addr)) { +- if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) { +- return true; // continue ++ if (thread->on_local_stack(addr)) { ++ // stack overflow ++ if (thread->in_stack_yellow_reserved_zone(addr)) { ++ if (thread->thread_state() == _thread_in_Java) { ++ if (thread->in_stack_reserved_zone(addr)) { ++ frame fr; ++ if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { ++ assert(fr.is_java_frame(), "Must be a Java frame"); ++ frame activation = ++ SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); ++ if (activation.sp() != NULL) { ++ thread->disable_stack_reserved_zone(); ++ if (activation.is_interpreted_frame()) { ++ thread->set_reserved_stack_activation((address)( ++ activation.fp() + frame::interpreter_frame_initial_sp_offset)); ++ } else { ++ thread->set_reserved_stack_activation((address)activation.unextended_sp()); ++ } ++ return 1; ++ } ++ } ++ } ++ // Throw a stack overflow exception. Guard pages will be reenabled ++ // while unwinding the stack. ++ thread->disable_stack_yellow_reserved_zone(); ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); ++ } else { ++ // Thread was in the vm or native code. Return and try to finish. ++ thread->disable_stack_yellow_reserved_zone(); ++ return 1; ++ } ++ } else if (thread->in_stack_red_zone(addr)) { ++ // Fatal red zone violation. Disable the guard pages and fall through ++ // to handle_unexpected_exception way down below. ++ thread->disable_stack_red_zone(); ++ tty->print_raw_cr("An irrecoverable stack overflow has occurred."); ++ ++ // This is a likely cause, but hard to verify. Let's just print ++ // it as a hint. ++ tty->print_raw_cr("Please check if any of your loaded .so files has " ++ "enabled executable stack (see man page execstack(8))"); ++ } else { ++ // Accessing stack address below sp may cause SEGV if current ++ // thread has MAP_GROWSDOWN stack. This should only happen when ++ // current thread was created by user code with MAP_GROWSDOWN flag ++ // and then attached to VM. See notes in os_linux.cpp. ++ if (thread->osthread()->expanding_stack() == 0) { ++ thread->osthread()->set_expanding_stack(); ++ if (os::Linux::manually_expand_stack(thread, addr)) { ++ thread->osthread()->clear_expanding_stack(); ++ return 1; ++ } ++ thread->osthread()->clear_expanding_stack(); ++ } else { ++ fatal("recursive segv. expanding stack."); ++ } + } + } + } +@@ -212,7 +318,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, + tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL"); + } + stub = SharedRuntime::get_handle_wrong_method_stub(); +- } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) { ++ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { + stub = SharedRuntime::get_poll_stub(pc); + } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { + // BugId 4454115: A read from a MappedByteBuffer can fault +@@ -220,34 +326,12 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, + // Do not crash the VM in such a case. + CodeBlob* cb = CodeCache::find_blob_unsafe(pc); + CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; +- bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); +- if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { ++ if (nm != NULL && nm->has_unsafe_access()) { + address next_pc = pc + NativeCall::instruction_size; +- if (is_unsafe_arraycopy) { +- next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); +- } + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } +- } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) { +- // Pull a pointer to the error message out of the instruction +- // stream. +- const uint64_t *detail_msg_ptr +- = (uint64_t*)(pc + NativeInstruction::instruction_size); +- const char *detail_msg = (const char *)*detail_msg_ptr; +- const char *msg = "stop"; +- if (TraceTraps) { +- tty->print_cr("trap: %s: (SIGILL)", msg); +- } +- +- // End life with a fatal error, message and detail message and the context. +- // Note: no need to do any post-processing here (e.g. signal chaining) +- va_list va_dummy; +- VMError::report_and_die(thread, uc, NULL, 0, msg, detail_msg, va_dummy); +- va_end(va_dummy); +- +- ShouldNotReachHere(); + } else if (sig == SIGFPE && +- (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { ++ (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { + stub = + SharedRuntime:: + continuation_for_implicit_exception(thread, +@@ -255,42 +339,70 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, + SharedRuntime:: + IMPLICIT_DIVIDE_BY_ZERO); + } else if (sig == SIGSEGV && +- MacroAssembler::uses_implicit_null_check((void*)addr)) { ++ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { + // Determination of interpreter/vtable stub/compiled code null exception + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); + } +- } else if ((thread->thread_state() == _thread_in_vm || +- thread->thread_state() == _thread_in_native) && +- sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ +- thread->doing_unsafe_access()) { ++ } else if (thread->thread_state() == _thread_in_vm && ++ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ ++ thread->doing_unsafe_access()) { + address next_pc = pc + NativeCall::instruction_size; +- if (UnsafeCopyMemory::contains_pc(pc)) { +- next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); +- } + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } + + // jni_fast_GetField can trap at certain pc's if a GC kicks in + // and the heap gets shrunk before the field access. + if ((sig == SIGSEGV) || (sig == SIGBUS)) { +- address addr_slow = JNI_FastGetField::find_slowcase_pc(pc); +- if (addr_slow != (address)-1) { +- stub = addr_slow; ++ address addr = JNI_FastGetField::find_slowcase_pc(pc); ++ if (addr != (address)-1) { ++ stub = addr; + } + } ++ ++ // Check to see if we caught the safepoint code in the ++ // process of write protecting the memory serialization page. ++ // It write enables the page immediately after protecting it ++ // so we can just return to retry the write. ++ if ((sig == SIGSEGV) && ++ os::is_memory_serialize_page(thread, (address) info->si_addr)) { ++ // Block current thread until the memory serialize page permission restored. ++ os::block_on_serialize_page_trap(); ++ return true; ++ } + } + + if (stub != NULL) { + // save all thread context in case we need to restore it +- if (thread != NULL) { +- thread->set_saved_exception_pc(pc); +- } ++ if (thread != NULL) thread->set_saved_exception_pc(pc); + +- os::Posix::ucontext_set_pc(uc, stub); ++ os::Linux::ucontext_set_pc(uc, stub); + return true; + } + +- return false; // Mute compiler ++ // signal-chaining ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++ return true; ++ } ++ ++ if (!abort_if_unrecognized) { ++ // caller wants another chance, so give it to him ++ return false; ++ } ++ ++ if (pc == NULL && uc != NULL) { ++ pc = os::Linux::ucontext_get_pc(uc); ++ } ++ ++ // unmask current signal ++ sigset_t newset; ++ sigemptyset(&newset); ++ sigaddset(&newset, sig); ++ sigprocmask(SIG_UNBLOCK, &newset, NULL); ++ ++ VMError::report_and_die(t, sig, pc, info, ucVoid); ++ ++ ShouldNotReachHere(); ++ return true; // Mute compiler + } + + void os::Linux::init_thread_fpu_state(void) { + +From fd3897410308e2fc54d84a9bd453b1b375e6aace Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 15:24:57 +0800 +Subject: [PATCH 014/140] Revert: JDK-8248240: Remove extendedPC.hpp and + fetch_frame_from_ucontext JDK-8253742: POSIX signal code cleanup + +--- + .../os_cpu/linux_riscv/os_linux_riscv.cpp | 38 ++++++++++++++----- + .../os_cpu/linux_riscv/thread_linux_riscv.cpp | 9 +++-- + 2 files changed, 33 insertions(+), 14 deletions(-) + +diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +index db15f1946e2..4f1c84c60a0 100644 +--- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp ++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +@@ -37,6 +37,7 @@ + #include "prims/jniFastGetField.hpp" + #include "prims/jvm_misc.hpp" + #include "runtime/arguments.hpp" ++#include "runtime/extendedPC.hpp" + #include "runtime/frame.inline.hpp" + #include "runtime/interfaceSupport.inline.hpp" + #include "runtime/java.hpp" +@@ -85,11 +86,11 @@ char* os::non_memory_address_word() { + return (char*) -1; + } + +-address os::Posix::ucontext_get_pc(const ucontext_t * uc) { ++address os::Linux::ucontext_get_pc(const ucontext_t * uc) { + return (address)uc->uc_mcontext.__gregs[REG_PC]; + } + +-void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) { ++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { + uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc; + } + +@@ -101,13 +102,29 @@ intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; + } + +-address os::fetch_frame_from_context(const void* ucVoid, +- intptr_t** ret_sp, intptr_t** ret_fp) { +- address epc; ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread ++// is currently interrupted by SIGPROF. ++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal ++// frames. Currently we don't do that on Linux, so it's the same as ++// os::fetch_frame_from_context(). ++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, ++ const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ assert(thread != NULL, "just checking"); ++ assert(ret_sp != NULL, "just checking"); ++ assert(ret_fp != NULL, "just checking"); ++ ++ return os::fetch_frame_from_context(uc, ret_sp, ret_fp); ++} ++ ++ExtendedPC os::fetch_frame_from_context(const void* ucVoid, ++ intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ ExtendedPC epc; + const ucontext_t* uc = (const ucontext_t*)ucVoid; + + if (uc != NULL) { +- epc = os::Posix::ucontext_get_pc(uc); ++ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); + if (ret_sp != NULL) { + *ret_sp = os::Linux::ucontext_get_sp(uc); + } +@@ -115,7 +132,8 @@ address os::fetch_frame_from_context(const void* ucVoid, + *ret_fp = os::Linux::ucontext_get_fp(uc); + } + } else { +- epc = NULL; ++ // construct empty ExtendedPC for return value checking ++ epc = ExtendedPC(NULL); + if (ret_sp != NULL) { + *ret_sp = (intptr_t *)NULL; + } +@@ -142,8 +160,8 @@ frame os::fetch_compiled_frame_from_context(const void* ucVoid) { + frame os::fetch_frame_from_context(const void* ucVoid) { + intptr_t* frame_sp = NULL; + intptr_t* frame_fp = NULL; +- address epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp); +- return frame(frame_sp, frame_fp, epc); ++ ExtendedPC epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp); ++ return frame(frame_sp, frame_fp, epc.pc()); + } + + // By default, gcc always saves frame pointer rfp on this stack. This +@@ -465,7 +483,7 @@ void os::print_context(outputStream *st, const void *context) { + // Note: it may be unsafe to inspect memory near pc. For example, pc may + // point to garbage if entry point in an nmethod is corrupted. Leave + // this at the end, and hope for the best. +- address pc = os::Posix::ucontext_get_pc(uc); ++ address pc = os::Linux::ucontext_get_pc(uc); + print_instructions(st, pc, sizeof(char)); + st->cr(); + } +diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +index 3100572e9fd..e46efc420b0 100644 +--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp ++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +@@ -61,16 +61,17 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) + + intptr_t* ret_fp = NULL; + intptr_t* ret_sp = NULL; +- address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp); +- if (addr == NULL || ret_sp == NULL ) { ++ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, ++ &ret_sp, &ret_fp); ++ if (addr.pc() == NULL || ret_sp == NULL ) { + // ucontext wasn't useful + return false; + } + +- frame ret_frame(ret_sp, ret_fp, addr); ++ frame ret_frame(ret_sp, ret_fp, addr.pc()); + if (!ret_frame.safe_for_sender(this)) { + #ifdef COMPILER2 +- frame ret_frame2(ret_sp, NULL, addr); ++ frame ret_frame2(ret_sp, NULL, addr.pc()); + if (!ret_frame2.safe_for_sender(this)) { + // nothing else to try if the frame isn't good + return false; + +From 892b40a435ae3f7e85659100ef68db1aeda7ef23 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 15:33:50 +0800 +Subject: [PATCH 015/140] Revert JDK-8263002: Remove CDS MiscCode region + +--- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 10 ++++++++++ + src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp | 6 ++++++ + 2 files changed, 16 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index 4daed17df10..21aa3b58c09 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -187,6 +187,16 @@ bool SharedRuntime::is_wide_vector(int size) { + return false; + } + ++size_t SharedRuntime::trampoline_size() { ++ return 6 * NativeInstruction::instruction_size; ++} ++ ++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { ++ int32_t offset = 0; ++ __ movptr_with_offset(t0, destination, offset); ++ __ jalr(x0, t0, offset); ++} ++ + // The java_calling_convention describes stack locations as ideal slots on + // a frame with no abi restrictions. Since we must observe abi restrictions + // (like the placement of the register window) the slots must be biased by +diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +index e46efc420b0..31d9254d8ad 100644 +--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp ++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +@@ -68,6 +68,12 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) + return false; + } + ++ if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { ++ // In the middle of a trampoline call. Bail out for safety. ++ // This happens rarely so shouldn't affect profiling. ++ return false; ++ } ++ + frame ret_frame(ret_sp, ret_fp, addr.pc()); + if (!ret_frame.safe_for_sender(this)) { + #ifdef COMPILER2 + +From 945a317797bc96efe3f0717ca7258f081b96b14d Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 15:52:43 +0800 +Subject: [PATCH 016/140] Revert JDK-8254158: Consolidate per-platform stack + overflow handling code + +--- + .../os_cpu/linux_riscv/os_linux_riscv.cpp | 52 ++++++++++++++----- + 1 file changed, 40 insertions(+), 12 deletions(-) + +diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +index 4f1c84c60a0..8b772892b4b 100644 +--- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp ++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +@@ -145,18 +145,6 @@ ExtendedPC os::fetch_frame_from_context(const void* ucVoid, + return epc; + } + +-frame os::fetch_compiled_frame_from_context(const void* ucVoid) { +- const ucontext_t* uc = (const ucontext_t*)ucVoid; +- // In compiled code, the stack banging is performed before RA +- // has been saved in the frame. RA is live, and SP and FP +- // belong to the caller. +- intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc); +- intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc); +- address frame_pc = (address)(uc->uc_mcontext.__gregs[REG_LR] +- - NativeInstruction::instruction_size); +- return frame(frame_sp, frame_fp, frame_pc); +-} +- + frame os::fetch_frame_from_context(const void* ucVoid) { + intptr_t* frame_sp = NULL; + intptr_t* frame_fp = NULL; +@@ -164,6 +152,46 @@ frame os::fetch_frame_from_context(const void* ucVoid) { + return frame(frame_sp, frame_fp, epc.pc()); + } + ++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) { ++ address pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (Interpreter::contains(pc)) { ++ // interpreter performs stack banging after the fixed frame header has ++ // been generated while the compilers perform it before. To maintain ++ // semantic consistency between interpreted and compiled frames, the ++ // method returns the Java sender of the current frame. ++ *fr = os::fetch_frame_from_context(uc); ++ if (!fr->is_first_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } else { ++ // more complex code with compiled code ++ assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above"); ++ CodeBlob* cb = CodeCache::find_blob(pc); ++ if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) { ++ // Not sure where the pc points to, fallback to default ++ // stack overflow handling ++ return false; ++ } else { ++ // In compiled code, the stack banging is performed before RA ++ // has been saved in the frame. RA is live, and SP and FP ++ // belong to the caller. ++ intptr_t* fp = os::Linux::ucontext_get_fp(uc); ++ intptr_t* sp = os::Linux::ucontext_get_sp(uc); ++ address pc = (address)(uc->uc_mcontext.__gregs[REG_LR] ++ - NativeInstruction::instruction_size); ++ *fr = frame(sp, fp, pc); ++ if (!fr->is_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ assert(!fr->is_first_frame(), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } ++ } ++ assert(fr->is_java_frame(), "Safety check"); ++ return true; ++} ++ + // By default, gcc always saves frame pointer rfp on this stack. This + // may get turned off by -fomit-frame-pointer. + frame os::get_sender_for_C_frame(frame* fr) { + +From c1a03e0a376cc2c8748d83d66b576b66ee2e6962 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 16:14:19 +0800 +Subject: [PATCH 017/140] Revert JDK-8202579: Revisit VM_Version and + VM_Version_ext for overlap and consolidation + +--- + .../cpu/riscv/vm_version_ext_riscv.cpp | 87 +++++++++++++++++++ + .../cpu/riscv/vm_version_ext_riscv.hpp | 55 ++++++++++++ + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 14 --- + 3 files changed, 142 insertions(+), 14 deletions(-) + create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp + +diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp +new file mode 100644 +index 00000000000..6bdce51506e +--- /dev/null ++++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp +@@ -0,0 +1,87 @@ ++/* ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "memory/allocation.hpp" ++#include "memory/allocation.inline.hpp" ++#include "runtime/os.inline.hpp" ++#include "vm_version_ext_riscv.hpp" ++ ++// VM_Version_Ext statics ++int VM_Version_Ext::_no_of_threads = 0; ++int VM_Version_Ext::_no_of_cores = 0; ++int VM_Version_Ext::_no_of_sockets = 0; ++bool VM_Version_Ext::_initialized = false; ++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; ++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; ++ ++void VM_Version_Ext::initialize_cpu_information(void) { ++ // do nothing if cpu info has been initialized ++ if (_initialized) { ++ return; ++ } ++ ++ _no_of_cores = os::processor_count(); ++ _no_of_threads = _no_of_cores; ++ _no_of_sockets = _no_of_cores; ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string); ++ _initialized = true; ++} ++ ++int VM_Version_Ext::number_of_threads(void) { ++ initialize_cpu_information(); ++ return _no_of_threads; ++} ++ ++int VM_Version_Ext::number_of_cores(void) { ++ initialize_cpu_information(); ++ return _no_of_cores; ++} ++ ++int VM_Version_Ext::number_of_sockets(void) { ++ initialize_cpu_information(); ++ return _no_of_sockets; ++} ++ ++const char* VM_Version_Ext::cpu_name(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); ++ return tmp; ++} ++ ++const char* VM_Version_Ext::cpu_description(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); ++ return tmp; ++} +diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp +new file mode 100644 +index 00000000000..711e4aeaf68 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP ++#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP ++ ++#include "runtime/vm_version.hpp" ++#include "utilities/macros.hpp" ++ ++class VM_Version_Ext : public VM_Version { ++ private: ++ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; ++ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; ++ ++ static int _no_of_threads; ++ static int _no_of_cores; ++ static int _no_of_sockets; ++ static bool _initialized; ++ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; ++ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; ++ ++ public: ++ static int number_of_threads(void); ++ static int number_of_cores(void); ++ static int number_of_sockets(void); ++ ++ static const char* cpu_name(void); ++ static const char* cpu_description(void); ++ static void initialize_cpu_information(void); ++ ++}; ++ ++#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +index 2c15a834542..dd65f32277f 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +@@ -210,17 +210,3 @@ void VM_Version::c2_initialize() { + } + } + #endif // COMPILER2 +- +-void VM_Version::initialize_cpu_information(void) { +- // do nothing if cpu info has been initialized +- if (_initialized) { +- return; +- } +- +- _no_of_cores = os::processor_count(); +- _no_of_threads = _no_of_cores; +- _no_of_sockets = _no_of_cores; +- snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); +- snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string); +- _initialized = true; +-} + +From 0cfdbd8595c710b71be008bb531b59acf9c4b016 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 17:16:05 +0800 +Subject: [PATCH 018/140] Revert JDK-8191278: MappedByteBuffer bulk access + memory failures are not handled gracefully + +--- + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 19 ++----------------- + 1 file changed, 2 insertions(+), 17 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +index 39416441bdf..8392b768847 100644 +--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +@@ -1049,12 +1049,7 @@ class StubGenerator: public StubCodeGenerator { + __ push_reg(RegSet::of(d, count), sp); + } + +- { +- // UnsafeCopyMemory page error: continue after ucm +- bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); +- UnsafeCopyMemoryMark ucmm(this, add_entry, true); +- copy_memory(aligned, s, d, count, t0, size); +- } ++ copy_memory(aligned, s, d, count, t0, size); + + if (is_oop) { + __ pop_reg(RegSet::of(d, count), sp); +@@ -1122,12 +1117,7 @@ class StubGenerator: public StubCodeGenerator { + __ push_reg(RegSet::of(d, count), sp); + } + +- { +- // UnsafeCopyMemory page error: continue after ucm +- bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); +- UnsafeCopyMemoryMark ucmm(this, add_entry, true); +- copy_memory(aligned, s, d, count, t0, -size); +- } ++ copy_memory(aligned, s, d, count, t0, -size); + + if (is_oop) { + __ pop_reg(RegSet::of(d, count), sp); +@@ -3734,11 +3724,6 @@ class StubGenerator: public StubCodeGenerator { + ~StubGenerator() {} + }; // end class declaration + +-#define UCM_TABLE_MAX_ENTRIES 8 + void StubGenerator_generate(CodeBuffer* code, bool all) { +- if (UnsafeCopyMemory::_table == NULL) { +- UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); +- } +- + StubGenerator g(code, all); + } + +From dd6a7c520a5adeef5b6686c161554adcba61113f Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 15:55:09 +0800 +Subject: [PATCH 019/140] Revert JDK-8282085: The REGISTER_DEFINITION macro is + useless after JDK-8269122 + +--- + .../cpu/riscv/register_definitions_riscv.cpp | 192 ++++++++++++++++++ + 1 file changed, 192 insertions(+) + create mode 100644 src/hotspot/cpu/riscv/register_definitions_riscv.cpp + +diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp +new file mode 100644 +index 00000000000..583f67573ca +--- /dev/null ++++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp +@@ -0,0 +1,192 @@ ++/* ++ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/register.hpp" ++#include "interp_masm_riscv.hpp" ++#include "register_riscv.hpp" ++ ++REGISTER_DEFINITION(Register, noreg); ++ ++REGISTER_DEFINITION(Register, x0); ++REGISTER_DEFINITION(Register, x1); ++REGISTER_DEFINITION(Register, x2); ++REGISTER_DEFINITION(Register, x3); ++REGISTER_DEFINITION(Register, x4); ++REGISTER_DEFINITION(Register, x5); ++REGISTER_DEFINITION(Register, x6); ++REGISTER_DEFINITION(Register, x7); ++REGISTER_DEFINITION(Register, x8); ++REGISTER_DEFINITION(Register, x9); ++REGISTER_DEFINITION(Register, x10); ++REGISTER_DEFINITION(Register, x11); ++REGISTER_DEFINITION(Register, x12); ++REGISTER_DEFINITION(Register, x13); ++REGISTER_DEFINITION(Register, x14); ++REGISTER_DEFINITION(Register, x15); ++REGISTER_DEFINITION(Register, x16); ++REGISTER_DEFINITION(Register, x17); ++REGISTER_DEFINITION(Register, x18); ++REGISTER_DEFINITION(Register, x19); ++REGISTER_DEFINITION(Register, x20); ++REGISTER_DEFINITION(Register, x21); ++REGISTER_DEFINITION(Register, x22); ++REGISTER_DEFINITION(Register, x23); ++REGISTER_DEFINITION(Register, x24); ++REGISTER_DEFINITION(Register, x25); ++REGISTER_DEFINITION(Register, x26); ++REGISTER_DEFINITION(Register, x27); ++REGISTER_DEFINITION(Register, x28); ++REGISTER_DEFINITION(Register, x29); ++REGISTER_DEFINITION(Register, x30); ++REGISTER_DEFINITION(Register, x31); ++ ++REGISTER_DEFINITION(FloatRegister, fnoreg); ++ ++REGISTER_DEFINITION(FloatRegister, f0); ++REGISTER_DEFINITION(FloatRegister, f1); ++REGISTER_DEFINITION(FloatRegister, f2); ++REGISTER_DEFINITION(FloatRegister, f3); ++REGISTER_DEFINITION(FloatRegister, f4); ++REGISTER_DEFINITION(FloatRegister, f5); ++REGISTER_DEFINITION(FloatRegister, f6); ++REGISTER_DEFINITION(FloatRegister, f7); ++REGISTER_DEFINITION(FloatRegister, f8); ++REGISTER_DEFINITION(FloatRegister, f9); ++REGISTER_DEFINITION(FloatRegister, f10); ++REGISTER_DEFINITION(FloatRegister, f11); ++REGISTER_DEFINITION(FloatRegister, f12); ++REGISTER_DEFINITION(FloatRegister, f13); ++REGISTER_DEFINITION(FloatRegister, f14); ++REGISTER_DEFINITION(FloatRegister, f15); ++REGISTER_DEFINITION(FloatRegister, f16); ++REGISTER_DEFINITION(FloatRegister, f17); ++REGISTER_DEFINITION(FloatRegister, f18); ++REGISTER_DEFINITION(FloatRegister, f19); ++REGISTER_DEFINITION(FloatRegister, f20); ++REGISTER_DEFINITION(FloatRegister, f21); ++REGISTER_DEFINITION(FloatRegister, f22); ++REGISTER_DEFINITION(FloatRegister, f23); ++REGISTER_DEFINITION(FloatRegister, f24); ++REGISTER_DEFINITION(FloatRegister, f25); ++REGISTER_DEFINITION(FloatRegister, f26); ++REGISTER_DEFINITION(FloatRegister, f27); ++REGISTER_DEFINITION(FloatRegister, f28); ++REGISTER_DEFINITION(FloatRegister, f29); ++REGISTER_DEFINITION(FloatRegister, f30); ++REGISTER_DEFINITION(FloatRegister, f31); ++ ++REGISTER_DEFINITION(VectorRegister, vnoreg); ++ ++REGISTER_DEFINITION(VectorRegister, v0); ++REGISTER_DEFINITION(VectorRegister, v1); ++REGISTER_DEFINITION(VectorRegister, v2); ++REGISTER_DEFINITION(VectorRegister, v3); ++REGISTER_DEFINITION(VectorRegister, v4); ++REGISTER_DEFINITION(VectorRegister, v5); ++REGISTER_DEFINITION(VectorRegister, v6); ++REGISTER_DEFINITION(VectorRegister, v7); ++REGISTER_DEFINITION(VectorRegister, v8); ++REGISTER_DEFINITION(VectorRegister, v9); ++REGISTER_DEFINITION(VectorRegister, v10); ++REGISTER_DEFINITION(VectorRegister, v11); ++REGISTER_DEFINITION(VectorRegister, v12); ++REGISTER_DEFINITION(VectorRegister, v13); ++REGISTER_DEFINITION(VectorRegister, v14); ++REGISTER_DEFINITION(VectorRegister, v15); ++REGISTER_DEFINITION(VectorRegister, v16); ++REGISTER_DEFINITION(VectorRegister, v17); ++REGISTER_DEFINITION(VectorRegister, v18); ++REGISTER_DEFINITION(VectorRegister, v19); ++REGISTER_DEFINITION(VectorRegister, v20); ++REGISTER_DEFINITION(VectorRegister, v21); ++REGISTER_DEFINITION(VectorRegister, v22); ++REGISTER_DEFINITION(VectorRegister, v23); ++REGISTER_DEFINITION(VectorRegister, v24); ++REGISTER_DEFINITION(VectorRegister, v25); ++REGISTER_DEFINITION(VectorRegister, v26); ++REGISTER_DEFINITION(VectorRegister, v27); ++REGISTER_DEFINITION(VectorRegister, v28); ++REGISTER_DEFINITION(VectorRegister, v29); ++REGISTER_DEFINITION(VectorRegister, v30); ++REGISTER_DEFINITION(VectorRegister, v31); ++ ++REGISTER_DEFINITION(Register, c_rarg0); ++REGISTER_DEFINITION(Register, c_rarg1); ++REGISTER_DEFINITION(Register, c_rarg2); ++REGISTER_DEFINITION(Register, c_rarg3); ++REGISTER_DEFINITION(Register, c_rarg4); ++REGISTER_DEFINITION(Register, c_rarg5); ++REGISTER_DEFINITION(Register, c_rarg6); ++REGISTER_DEFINITION(Register, c_rarg7); ++ ++REGISTER_DEFINITION(FloatRegister, c_farg0); ++REGISTER_DEFINITION(FloatRegister, c_farg1); ++REGISTER_DEFINITION(FloatRegister, c_farg2); ++REGISTER_DEFINITION(FloatRegister, c_farg3); ++REGISTER_DEFINITION(FloatRegister, c_farg4); ++REGISTER_DEFINITION(FloatRegister, c_farg5); ++REGISTER_DEFINITION(FloatRegister, c_farg6); ++REGISTER_DEFINITION(FloatRegister, c_farg7); ++ ++REGISTER_DEFINITION(Register, j_rarg0); ++REGISTER_DEFINITION(Register, j_rarg1); ++REGISTER_DEFINITION(Register, j_rarg2); ++REGISTER_DEFINITION(Register, j_rarg3); ++REGISTER_DEFINITION(Register, j_rarg4); ++REGISTER_DEFINITION(Register, j_rarg5); ++REGISTER_DEFINITION(Register, j_rarg6); ++REGISTER_DEFINITION(Register, j_rarg7); ++ ++REGISTER_DEFINITION(FloatRegister, j_farg0); ++REGISTER_DEFINITION(FloatRegister, j_farg1); ++REGISTER_DEFINITION(FloatRegister, j_farg2); ++REGISTER_DEFINITION(FloatRegister, j_farg3); ++REGISTER_DEFINITION(FloatRegister, j_farg4); ++REGISTER_DEFINITION(FloatRegister, j_farg5); ++REGISTER_DEFINITION(FloatRegister, j_farg6); ++REGISTER_DEFINITION(FloatRegister, j_farg7); ++ ++REGISTER_DEFINITION(Register, zr); ++REGISTER_DEFINITION(Register, gp); ++REGISTER_DEFINITION(Register, tp); ++REGISTER_DEFINITION(Register, xmethod); ++REGISTER_DEFINITION(Register, ra); ++REGISTER_DEFINITION(Register, sp); ++REGISTER_DEFINITION(Register, fp); ++REGISTER_DEFINITION(Register, xheapbase); ++REGISTER_DEFINITION(Register, xcpool); ++REGISTER_DEFINITION(Register, xmonitors); ++REGISTER_DEFINITION(Register, xlocals); ++REGISTER_DEFINITION(Register, xthread); ++REGISTER_DEFINITION(Register, xbcp); ++REGISTER_DEFINITION(Register, xdispatch); ++REGISTER_DEFINITION(Register, esp); ++ ++REGISTER_DEFINITION(Register, t0); ++REGISTER_DEFINITION(Register, t1); ++REGISTER_DEFINITION(Register, t2); + +From 561261b051d88ddb0053733f03cbefc75dedcea8 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 16:41:03 +0800 +Subject: [PATCH 020/140] Revert JDK-7175279: Don't use x87 FPU on x86-64 + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 0e383a3c139..977563fe5f4 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -2019,6 +2019,18 @@ address LIR_Assembler::int_constant(jlong n) { + } + } + ++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::reset_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::fpop() { Unimplemented(); } ++ ++void LIR_Assembler::fxch(int i) { Unimplemented(); } ++ ++void LIR_Assembler::fld(int i) { Unimplemented(); } ++ ++void LIR_Assembler::ffree(int i) { Unimplemented(); } ++ + void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { + __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */, + Assembler::rl /* release */, t0, true /* result as bool */); + +From ff4e1443fd000208714b506d52c0fab1c91e4ac8 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 16:41:15 +0800 +Subject: [PATCH 021/140] Revert JDK-8255909: Remove unused delayed_value + methods + +--- + src/hotspot/cpu/riscv/assembler_riscv.hpp | 7 +++++++ + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 16 ++++++++++++++++ + src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 4 ++++ + 3 files changed, 27 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp +index 4923962a496..44e8d4b4ff1 100644 +--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp +@@ -3027,6 +3027,13 @@ enum Nf { + Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) { + } + ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ ShouldNotCallThis(); ++ return RegisterOrConstant(); ++ } ++ + // Stack overflow checking + virtual void bang_stack_with_offset(int offset) { Unimplemented(); } + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 269d76ba69e..878957cbede 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -191,6 +191,22 @@ void MacroAssembler::call_VM(Register oop_result, + void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} + void MacroAssembler::check_and_handle_popframe(Register java_thread) {} + ++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ intptr_t value = *delayed_value_addr; ++ if (value != 0) ++ return RegisterOrConstant(value + offset); ++ ++ // load indirectly to solve generation ordering problem ++ ld(tmp, ExternalAddress((address) delayed_value_addr)); ++ ++ if (offset != 0) ++ add(tmp, tmp, offset); ++ ++ return RegisterOrConstant(tmp); ++} ++ + // Calls to C land + // + // When entering C land, the fp, & esp of the last Java frame have to be recorded +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index b59bdadb8bf..f23f7e7d1e6 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -625,6 +625,10 @@ class MacroAssembler: public Assembler { + + void reserved_stack_check(); + ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset); ++ + void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype); + void read_polling_page(Register r, address page, relocInfo::relocType rtype); + void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); + +From afe35a3fdc705645bfe2a2e797a95ce1d5203872 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 16:51:39 +0800 +Subject: [PATCH 022/140] Revert JDK-8263679: C1: Remove vtable call + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 977563fe5f4..a0ecc63d851 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -1382,6 +1382,11 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { + add_call_info(code_offset(), op->info()); + } + ++/* Currently, vtable-dispatch is only enabled for sparc platforms */ ++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ++ ShouldNotReachHere(); ++} ++ + void LIR_Assembler::emit_static_call_stub() { + address call_pc = __ pc(); + assert((__ offset() % 4) == 0, "bad alignment"); + +From 655b34c00ec5ff6fa7e82de96a78a0c58ba91985 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 16:55:57 +0800 +Subject: [PATCH 023/140] Revert JDK-8264063: Outer Safepoint poll load should + not reference the head of inner strip mined loop. + +--- + src/hotspot/cpu/riscv/riscv.ad | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 8c7a8ede815..fcddf752564 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -952,6 +952,20 @@ int CallDynamicJavaDirectNode::compute_padding(int current_offset) const + return align_up(current_offset, alignment_required()) - current_offset; + } + ++// Indicate if the safepoint node needs the polling page as an input ++ ++// the shared code plants the oop data at the start of the generated ++// code for the safepoint node and that needs ot be at the load ++// instruction itself. so we cannot plant a mov of the safepoint poll ++// address followed by a load. setting this to true means the mov is ++// scheduled as a prior instruction. that's better for scheduling ++// anyway. ++ ++bool SafePointNode::needs_polling_address_input() ++{ ++ return true; ++} ++ + //============================================================================= + + #ifndef PRODUCT + +From 4a6f7dafdb4e0cf054b7867de60f789d4ca1d9f3 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 17:26:29 +0800 +Subject: [PATCH 024/140] Revert: JDK-8266810: Move trivial Matcher code to + cpu-specific header files JDK-8254966: Remove unused code from Matcher + +--- + src/hotspot/cpu/riscv/matcher_riscv.hpp | 129 ------------------------ + src/hotspot/cpu/riscv/riscv.ad | 108 +++++++++++++++++++- + 2 files changed, 107 insertions(+), 130 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/matcher_riscv.hpp + +diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp +deleted file mode 100644 +index 4c7fabd7240..00000000000 +--- a/src/hotspot/cpu/riscv/matcher_riscv.hpp ++++ /dev/null +@@ -1,129 +0,0 @@ +-/* +- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#ifndef CPU_RISCV_MATCHER_RISCV_HPP +-#define CPU_RISCV_MATCHER_RISCV_HPP +- +- // Defined within class Matcher +- +- // false => size gets scaled to BytesPerLong, ok. +- static const bool init_array_count_is_in_bytes = false; +- +- // riscv doesn't support misaligned vectors store/load on JDK11. +- static constexpr bool misaligned_vectors_ok() { +- return false; +- } +- +- // Whether code generation need accurate ConvI2L types. +- static const bool convi2l_type_required = false; +- +- // Does the CPU require late expand (see block.cpp for description of late expand)? +- static const bool require_postalloc_expand = false; +- +- // Do we need to mask the count passed to shift instructions or does +- // the cpu only look at the lower 5/6 bits anyway? +- static const bool need_masked_shift_count = false; +- +- static constexpr bool isSimpleConstant64(jlong value) { +- // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. +- // Probably always true, even if a temp register is required. +- return true; +- } +- +- // Use conditional move (CMOVL) +- static constexpr int long_cmove_cost() { +- // long cmoves are no more expensive than int cmoves +- return 0; +- } +- +- static constexpr int float_cmove_cost() { +- // float cmoves are no more expensive than int cmoves +- return 0; +- } +- +- // This affects two different things: +- // - how Decode nodes are matched +- // - how ImplicitNullCheck opportunities are recognized +- // If true, the matcher will try to remove all Decodes and match them +- // (as operands) into nodes. NullChecks are not prepared to deal with +- // Decodes by final_graph_reshaping(). +- // If false, final_graph_reshaping() forces the decode behind the Cmp +- // for a NullCheck. The matcher matches the Decode node into a register. +- // Implicit_null_check optimization moves the Decode along with the +- // memory operation back up before the NullCheck. +- static bool narrow_oop_use_complex_address() { +- return CompressedOops::shift() == 0; +- } +- +- static bool narrow_klass_use_complex_address() { +- return false; +- } +- +- static bool const_oop_prefer_decode() { +- // Prefer ConN+DecodeN over ConP in simple compressed oops mode. +- return CompressedOops::base() == NULL; +- } +- +- static bool const_klass_prefer_decode() { +- // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. +- return CompressedKlassPointers::base() == NULL; +- } +- +- // Is it better to copy float constants, or load them directly from +- // memory? Intel can load a float constant from a direct address, +- // requiring no extra registers. Most RISCs will have to materialize +- // an address into a register first, so they would do better to copy +- // the constant from stack. +- static const bool rematerialize_float_constants = false; +- +- // If CPU can load and store mis-aligned doubles directly then no +- // fixup is needed. Else we split the double into 2 integer pieces +- // and move it piece-by-piece. Only happens when passing doubles into +- // C code as the Java calling convention forces doubles to be aligned. +- static const bool misaligned_doubles_ok = true; +- +- // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. +- static const bool strict_fp_requires_explicit_rounding = false; +- +- // Are floats converted to double when stored to stack during +- // deoptimization? +- static constexpr bool float_in_double() { return false; } +- +- // Do ints take an entire long register or just half? +- // The relevant question is how the int is callee-saved: +- // the whole long is written but de-opt'ing will have to extract +- // the relevant 32 bits. +- static const bool int_in_long = true; +- +- // true means we have fast l2f convers +- // false means that conversion is done by runtime call +- static constexpr bool convL2FSupported(void) { +- return true; +- } +- +- // Implements a variant of EncodeISOArrayNode that encode ASCII only +- static const bool supports_encode_ascii_array = false; +- +-#endif // CPU_RISCV_MATCHER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index fcddf752564..a9e5f2e6841 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -330,7 +330,9 @@ alloc_class chunk2(RFLAGS); + // Several register classes are automatically defined based upon information in + // this architecture description. + // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) +-// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) ++// 2) reg_class compiler_method_reg ( /* as def'd in frame section */ ) ++// 2) reg_class interpreter_method_reg ( /* as def'd in frame section */ ) ++// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) + // + + // Class for all 32 bit general purpose registers +@@ -1548,6 +1550,17 @@ bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + return (-4096 <= offs && offs < 4096); + } + ++const bool Matcher::isSimpleConstant64(jlong value) { ++ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. ++ // Probably always true, even if a temp register is required. ++ return true; ++} ++ ++// true just means we have fast l2f conversion ++const bool Matcher::convL2FSupported(void) { ++ return true; ++} ++ + // Vector width in bytes. + const int Matcher::vector_width_in_bytes(BasicType bt) { + return 0; +@@ -1567,6 +1580,94 @@ const uint Matcher::vector_ideal_reg(int len) { + return 0; + } + ++// RISC-V supports misaligned vectors store/load. ++const bool Matcher::misaligned_vectors_ok() { ++ return true; ++} ++ ++// false => size gets scaled to BytesPerLong, ok. ++const bool Matcher::init_array_count_is_in_bytes = false; ++ ++// Use conditional move (CMOVL) ++const int Matcher::long_cmove_cost() { ++ // long cmoves are no more expensive than int cmoves ++ return 0; ++} ++ ++const int Matcher::float_cmove_cost() { ++ // float cmoves are no more expensive than int cmoves ++ return 0; ++} ++ ++// Does the CPU require late expand (see block.cpp for description of late expand)? ++const bool Matcher::require_postalloc_expand = false; ++ ++// Do we need to mask the count passed to shift instructions or does ++// the cpu only look at the lower 5/6 bits anyway? ++const bool Matcher::need_masked_shift_count = false; ++ ++// This affects two different things: ++// - how Decode nodes are matched ++// - how ImplicitNullCheck opportunities are recognized ++// If true, the matcher will try to remove all Decodes and match them ++// (as operands) into nodes. NullChecks are not prepared to deal with ++// Decodes by final_graph_reshaping(). ++// If false, final_graph_reshaping() forces the decode behind the Cmp ++// for a NullCheck. The matcher matches the Decode node into a register. ++// Implicit_null_check optimization moves the Decode along with the ++// memory operation back up before the NullCheck. ++bool Matcher::narrow_oop_use_complex_address() { ++ return Universe::narrow_oop_shift() == 0; ++} ++ ++bool Matcher::narrow_klass_use_complex_address() { ++// TODO ++// decide whether we need to set this to true ++ return false; ++} ++ ++bool Matcher::const_oop_prefer_decode() { ++ // Prefer ConN+DecodeN over ConP in simple compressed oops mode. ++ return Universe::narrow_oop_base() == NULL; ++} ++ ++bool Matcher::const_klass_prefer_decode() { ++ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. ++ return Universe::narrow_klass_base() == NULL; ++} ++ ++// Is it better to copy float constants, or load them directly from ++// memory? Intel can load a float constant from a direct address, ++// requiring no extra registers. Most RISCs will have to materialize ++// an address into a register first, so they would do better to copy ++// the constant from stack. ++const bool Matcher::rematerialize_float_constants = false; ++ ++// If CPU can load and store mis-aligned doubles directly then no ++// fixup is needed. Else we split the double into 2 integer pieces ++// and move it piece-by-piece. Only happens when passing doubles into ++// C code as the Java calling convention forces doubles to be aligned. ++const bool Matcher::misaligned_doubles_ok = true; ++ ++// No-op on amd64 ++void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { ++ Unimplemented(); ++} ++ ++// Advertise here if the CPU requires explicit rounding operations to ++// implement the UseStrictFP mode. ++const bool Matcher::strict_fp_requires_explicit_rounding = false; ++ ++// Are floats converted to double when stored to stack during ++// deoptimization? ++bool Matcher::float_in_double() { return false; } ++ ++// Do ints take an entire long register or just half? ++// The relevant question is how the int is callee-saved: ++// the whole long is written but de-opt'ing will have to extract ++// the relevant 32 bits. ++const bool Matcher::int_in_long = true; ++ + // Return whether or not this register is ever used as an argument. + // This function is used on startup to build the trampoline stubs in + // generateOptoStub. Registers not mentioned will be killed by the VM +@@ -1671,6 +1772,8 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) { + return true; + } + ++const bool Matcher::convi2l_type_required = false; ++ + // Should the Matcher clone input 'm' of node 'n'? + bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { + assert_cond(m != NULL); +@@ -2250,6 +2353,9 @@ frame %{ + // Inline Cache Register or methodOop for I2C. + inline_cache_reg(R31); + ++ // Method Oop Register when calling interpreter. ++ interpreter_method_oop_reg(R31); ++ + // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] + cisc_spilling_operand_name(indOffset); + + +From 4b0f20882cd9b5e5da92d61c2fa02e0cbea0ef0c Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 17:30:42 +0800 +Subject: [PATCH 025/140] Revert JDK-8256238: Remove + Matcher::pass_original_key_for_aes + +--- + src/hotspot/cpu/riscv/riscv.ad | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index a9e5f2e6841..0d1afd5584a 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1580,6 +1580,11 @@ const uint Matcher::vector_ideal_reg(int len) { + return 0; + } + ++// AES support not yet implemented ++const bool Matcher::pass_original_key_for_aes() { ++ return false; ++} ++ + // RISC-V supports misaligned vectors store/load. + const bool Matcher::misaligned_vectors_ok() { + return true; + +From 36d7ecedbcd95911d1b355bbab3e8fdf81b36e7d Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 17:42:37 +0800 +Subject: [PATCH 026/140] Revert JDK-8242492: C2: Remove + Matcher::vector_shift_count_ideal_reg() + +--- + src/hotspot/cpu/riscv/riscv.ad | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 0d1afd5584a..c10e91633a5 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1580,6 +1580,11 @@ const uint Matcher::vector_ideal_reg(int len) { + return 0; + } + ++const uint Matcher::vector_shift_count_ideal_reg(int size) { ++ fatal("vector shift is not supported"); ++ return Node::NotAMachineReg; ++} ++ + // AES support not yet implemented + const bool Matcher::pass_original_key_for_aes() { + return false; + +From b78e448a460fcdc66553e66342e93e5ac87c0c61 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 17:47:13 +0800 +Subject: [PATCH 027/140] Revert JDK-8266937: Remove Compile::reshape_address + +--- + src/hotspot/cpu/riscv/riscv.ad | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index c10e91633a5..2c5ec0451b8 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1801,6 +1801,9 @@ bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, + return clone_base_plus_offset_address(m, mstack, address_visited); + } + ++void Compile::reshape_address(AddPNode* addp) { ++} ++ + %} + + + +From cd34a5ce5d120cdac939217976d1e7b7e98bf654 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 17:49:09 +0800 +Subject: [PATCH 028/140] Revert JDK-8272771: frame::pd_ps() is not implemented + on any platform + +--- + src/hotspot/cpu/riscv/frame_riscv.cpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index 8e7babe2c61..8e4f20fe561 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -683,6 +683,7 @@ frame::frame(void* ptr_sp, void* ptr_fp, void* pc) { + init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc); + } + ++void frame::pd_ps() {} + #endif + + void JavaFrameAnchor::make_walkable(JavaThread* thread) { + +From bdb16daf6d809d0c38256be99ecbe922d24b889b Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 17:56:27 +0800 +Subject: [PATCH 029/140] Revert JDK-8268858: Determine register pressure + automatically by the number of available registers for allocation + +--- + src/hotspot/cpu/riscv/riscv.ad | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 2c5ec0451b8..a6aa52de29e 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1527,6 +1527,10 @@ const bool Matcher::has_predicated_vectors(void) { + return false; + } + ++const int Matcher::float_pressure(int default_pressure_threshold) { ++ return default_pressure_threshold; ++} ++ + // Is this branch offset short enough that a short branch can be used? + // + // NOTE: If the platform does not provide any short branch variants, then + +From bbaa7a97b5d8110ead9dc44f31e2c5fe3bcd83d5 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 17:58:16 +0800 +Subject: [PATCH 030/140] Revert JDK-8253040: Remove unused + Matcher::regnum_to_fpu_offset() + +--- + src/hotspot/cpu/riscv/riscv.ad | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index a6aa52de29e..2d847cb6454 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1531,6 +1531,12 @@ const int Matcher::float_pressure(int default_pressure_threshold) { + return default_pressure_threshold; + } + ++int Matcher::regnum_to_fpu_offset(int regnum) ++{ ++ Unimplemented(); ++ return 0; ++} ++ + // Is this branch offset short enough that a short branch can be used? + // + // NOTE: If the platform does not provide any short branch variants, then + +From ce9ad0af72e405153534369bff1b1725697f3e40 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 18:03:23 +0800 +Subject: [PATCH 031/140] Revert JDK-8254084: Remove + TemplateTable::pd_initialize + +--- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index 4e388ac4eaa..c9d399ccdaf 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -48,6 +48,12 @@ + + #define __ _masm-> + ++// Platform-dependent initialization ++ ++void TemplateTable::pd_initialize() { ++ // No RISC-V specific initialization ++} ++ + // Address computation: local variables + + static inline Address iaddress(int n) { + +From 49429187846e6f2b00ab2853e27097eae274a947 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 20:17:07 +0800 +Subject: [PATCH 032/140] Revert JDK-8224815: 8224815: Remove non-GC uses of + CollectedHeap::is_in_reserved() + +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 878957cbede..cf01d7d74bb 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1632,7 +1632,7 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { + #ifdef ASSERT + { + ThreadInVMfromUnknown tiv; +- assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); ++ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); + } + #endif + oop_index = oop_recorder()->find_index(obj); +@@ -2800,7 +2800,7 @@ void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { + assert (UseCompressedOops, "should only be used for compressed oops"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); +- assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); ++ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); + } + #endif + int oop_index = oop_recorder()->find_index(obj); +@@ -2815,7 +2815,7 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { + assert (UseCompressedClassPointers, "should only be used for compressed headers"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int index = oop_recorder()->find_index(k); +- assert(!Universe::heap()->is_in(k), "should not be an oop"); ++ assert(!Universe::heap()->is_in_reserved(k), "should not be an oop"); + + InstructionMark im(this); + RelocationHolder rspec = metadata_Relocation::spec(index); + +From a71fabb1ff05db9955557a888be6cd1b5f87deea Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 21:14:30 +0800 +Subject: [PATCH 033/140] Revert JDK-8253540: InterpreterRuntime::monitorexit + should be a JRT_LEAF function + +--- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 8adc7b1320d..48957803fdc 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -839,7 +839,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) + assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); + + if (UseHeavyMonitors) { +- call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), ++ lock_reg); + } else { + Label done; + +@@ -871,7 +873,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) + + // Call the runtime routine for slow case. + sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj +- call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), ++ lock_reg); + + bind(done); + + +From a0b18eea3c83ef8f1de2c1b3cd55452f0f6b9af2 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Fri, 7 Apr 2023 12:51:33 +0800 +Subject: [PATCH 034/140] Revert JDK-8278387: riscv: Implement UseHeavyMonitors + consistently && JDK-8279826: riscv: Preserve result in native wrapper with + +UseHeavyMonitors + +--- + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 8 +- + src/hotspot/cpu/riscv/riscv.ad | 92 +++++++++---------- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 85 ++++++++--------- + 3 files changed, 80 insertions(+), 105 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index a0ecc63d851..dd657963438 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -367,11 +367,7 @@ int LIR_Assembler::emit_unwind_handler() { + if (method()->is_synchronized()) { + monitor_address(0, FrameMap::r10_opr); + stub = new MonitorExitStub(FrameMap::r10_opr, true, 0); +- if (UseHeavyMonitors) { +- __ j(*stub->entry()); +- } else { +- __ unlock_object(x15, x14, x10, *stub->entry()); +- } ++ __ unlock_object(x15, x14, x10, *stub->entry()); + __ bind(*stub->continuation()); + } + +@@ -1512,7 +1508,7 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { + Register obj = op->obj_opr()->as_register(); // may not be an oop + Register hdr = op->hdr_opr()->as_register(); + Register lock = op->lock_opr()->as_register(); +- if (UseHeavyMonitors) { ++ if (!UseFastLocking) { + __ j(*op->stub()->entry()); + } else if (op->code() == lir_lock) { + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 2d847cb6454..29027d594a0 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -2109,40 +2109,36 @@ encode %{ + __ andi(t0, disp_hdr, markOopDesc::monitor_value); + __ bnez(t0, object_has_monitor); + +- if (!UseHeavyMonitors) { +- // Set tmp to be (markWord of object | UNLOCK_VALUE). +- __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); +- +- // Initialize the box. (Must happen before we update the object mark!) +- __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); +- +- // Compare object markWord with an unlocked value (tmp) and if +- // equal exchange the stack address of our box with object markWord. +- // On failure disp_hdr contains the possibly locked markWord. +- __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, +- Assembler::rl, /*result*/disp_hdr); +- __ mv(flag, zr); +- __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas +- +- assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); +- +- // If the compare-and-exchange succeeded, then we found an unlocked +- // object, will have now locked it will continue at label cont +- // We did not see an unlocked object so try the fast recursive case. +- +- // Check if the owner is self by comparing the value in the +- // markWord of object (disp_hdr) with the stack pointer. +- __ sub(disp_hdr, disp_hdr, sp); +- __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); +- // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, +- // hence we can store 0 as the displaced header in the box, which indicates that it is a +- // recursive lock. +- __ andr(tmp/*==0?*/, disp_hdr, tmp); +- __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); +- __ mv(flag, tmp); // we can use the value of tmp as the result here +- } else { +- __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path +- } ++ // Set tmp to be (markWord of object | UNLOCK_VALUE). ++ __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); ++ ++ // Initialize the box. (Must happen before we update the object mark!) ++ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ ++ // Compare object markWord with an unlocked value (tmp) and if ++ // equal exchange the stack address of our box with object markWord. ++ // On failure disp_hdr contains the possibly locked markWord. ++ __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, ++ Assembler::rl, /*result*/disp_hdr); ++ __ mv(flag, zr); ++ __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas ++ ++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); ++ ++ // If the compare-and-exchange succeeded, then we found an unlocked ++ // object, will have now locked it will continue at label cont ++ // We did not see an unlocked object so try the fast recursive case. ++ ++ // Check if the owner is self by comparing the value in the ++ // markWord of object (disp_hdr) with the stack pointer. ++ __ sub(disp_hdr, disp_hdr, sp); ++ __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); ++ // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, ++ // hence we can store 0 as the displaced header in the box, which indicates that it is a ++ // recursive lock. ++ __ andr(tmp/*==0?*/, disp_hdr, tmp); ++ __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ __ mv(flag, tmp); // we can use the value of tmp as the result here + + __ j(cont); + +@@ -2189,31 +2185,25 @@ encode %{ + + assert_different_registers(oop, box, tmp, disp_hdr, flag); + +- if (!UseHeavyMonitors) { +- // Find the lock address and load the displaced header from the stack. +- __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ // Find the lock address and load the displaced header from the stack. ++ __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); + +- // If the displaced header is 0, we have a recursive unlock. +- __ mv(flag, disp_hdr); +- __ beqz(disp_hdr, cont); +- } ++ // If the displaced header is 0, we have a recursive unlock. ++ __ mv(flag, disp_hdr); ++ __ beqz(disp_hdr, cont); + + // Handle existing monitor. + __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); + __ andi(t0, disp_hdr, markOopDesc::monitor_value); + __ bnez(t0, object_has_monitor); + +- if (!UseHeavyMonitors) { +- // Check if it is still a light weight lock, this is true if we +- // see the stack address of the basicLock in the markWord of the +- // object. ++ // Check if it is still a light weight lock, this is true if we ++ // see the stack address of the basicLock in the markWord of the ++ // object. + +- __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, +- Assembler::rl, /*result*/tmp); +- __ xorr(flag, box, tmp); // box == tmp if cas succeeds +- } else { +- __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path +- } ++ __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, ++ Assembler::rl, /*result*/tmp); ++ __ xorr(flag, box, tmp); // box == tmp if cas succeeds + __ j(cont); + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index 21aa3b58c09..5203200b068 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1488,39 +1488,35 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + // Load the oop from the handle + __ ld(obj_reg, Address(oop_handle_reg, 0)); + +- if (!UseHeavyMonitors) { +- // Load (object->mark() | 1) into swap_reg % x10 +- __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); +- __ ori(swap_reg, t0, 1); +- +- // Save (object->mark() | 1) into BasicLock's displaced header +- __ sd(swap_reg, Address(lock_reg, mark_word_offset)); +- +- // src -> dest if dest == x10 else x10 <- dest +- { +- Label here; +- __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL); +- } ++ // Load (object->mark() | 1) into swap_reg % x10 ++ __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ __ ori(swap_reg, t0, 1); + +- // Test if the oopMark is an obvious stack pointer, i.e., +- // 1) (mark & 3) == 0, and +- // 2) sp <= mark < mark + os::pagesize() +- // These 3 tests can be done by evaluating the following +- // expression: ((mark - sp) & (3 - os::vm_page_size())), +- // assuming both stack pointer and pagesize have their +- // least significant 2 bits clear. +- // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg +- +- __ sub(swap_reg, swap_reg, sp); +- __ andi(swap_reg, swap_reg, 3 - os::vm_page_size()); +- +- // Save the test result, for recursive case, the result is zero +- __ sd(swap_reg, Address(lock_reg, mark_word_offset)); +- __ bnez(swap_reg, slow_path_lock); +- } else { +- __ j(slow_path_lock); ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); ++ ++ // src -> dest if dest == x10 else x10 <- dest ++ { ++ Label here; ++ __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL); + } + ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg ++ ++ __ sub(swap_reg, swap_reg, sp); ++ __ andi(swap_reg, swap_reg, 3 - os::vm_page_size()); ++ ++ // Save the test result, for recursive case, the result is zero ++ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); ++ __ bnez(swap_reg, slow_path_lock); ++ + // Slow path will re-enter here + __ bind(lock_done); + } +@@ -1608,31 +1604,24 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + + Label done; + +- if (!UseHeavyMonitors) { +- // Simple recursive lock? +- __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); +- __ beqz(t0, done); +- } +- ++ // Simple recursive lock? ++ __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ __ beqz(t0, done); + + // Must save x10 if if it is live now because cmpxchg must use it + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + save_native_result(masm, ret_type, stack_slots); + } + +- if (!UseHeavyMonitors) { +- // get address of the stack lock +- __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); +- // get old displaced header +- __ ld(old_hdr, Address(x10, 0)); ++ // get address of the stack lock ++ __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ // get old displaced header ++ __ ld(old_hdr, Address(x10, 0)); + +- // Atomic swap old header if oop still contains the stack lock +- Label succeed; +- __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock); +- __ bind(succeed); +- } else { +- __ j(slow_path_unlock); +- } ++ // Atomic swap old header if oop still contains the stack lock ++ Label succeed; ++ __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock); ++ __ bind(succeed); + + // slow path re-enters here + __ bind(unlock_done); + +From 1e844b8019cb3516c0843826de2bd3fcd2222f41 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 16:49:19 +0800 +Subject: [PATCH 035/140] Revert JDK-8258192: Obsolete the CriticalJNINatives + flag. CriticalJNINatives is unimplemented() even on AArch64. See + https://bugs.openjdk.org/browse/JDK-8254694. + +Also following up 8191129: AARCH64: Invalid value passed to critical JNI function +--- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 3 ++- + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 2 ++ + .../criticalnatives/argumentcorruption/CheckLongArgs.java | 2 +- + .../jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java | 2 +- + 4 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index 5203200b068..f8585afbdc2 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1111,7 +1111,8 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + int compile_id, + BasicType* in_sig_bt, + VMRegPair* in_regs, +- BasicType ret_type) { ++ BasicType ret_type, ++ address critical_entry) { + if (method->is_method_handle_intrinsic()) { + vmIntrinsics::ID iid = method->intrinsic_id(); + intptr_t start = (intptr_t)__ pc(); +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +index dd65f32277f..c0491d23fa6 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +@@ -147,6 +147,8 @@ void VM_Version::initialize() { + #ifdef COMPILER2 + c2_initialize(); + #endif // COMPILER2 ++ ++ UNSUPPORTED_OPTION(CriticalJNINatives); + } + + #ifdef COMPILER2 +diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +index acb86812d25..2c866f26f08 100644 +--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java ++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +@@ -24,7 +24,7 @@ + + /* @test + * @bug 8167409 +- * @requires (os.arch != "aarch64") & (os.arch != "arm") ++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm") + * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs + */ + package compiler.runtime.criticalnatives.argumentcorruption; +diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +index eab36f93113..1da369fde23 100644 +--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java ++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +@@ -24,7 +24,7 @@ + + /* @test + * @bug 8167408 +- * @requires (os.arch != "aarch64") & (os.arch != "arm") ++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm") + * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp + */ + package compiler.runtime.criticalnatives.lookup; + +From 58ad930e78501c6fad024e7ef05066ec19eb6219 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 11 Apr 2023 11:45:04 +0800 +Subject: [PATCH 036/140] 8202976: Add C1 lea patching support for x86 (RISC-V + part) + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index dd657963438..46a20a64194 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -1818,6 +1818,7 @@ void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, C + return; + } + ++ assert(patch_code == lir_patch_none, "Patch code not supported"); + LIR_Address* adr = addr->as_address_ptr(); + Register dst = dest->as_register_lo(); + + +From 2074b8ec0ea3562f3999b4f4010b3f5b57dbe502 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 11 Apr 2023 12:15:44 +0800 +Subject: [PATCH 037/140] Revert 8232365: Implementation for JEP 363: Remove + the Concurrent Mark Sweep (CMS) Garbage Collector + +--- + src/hotspot/cpu/riscv/globals_riscv.hpp | 3 +++ + src/hotspot/cpu/riscv/riscv.ad | 27 +++++++++++++++++++++++++ + 2 files changed, 30 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index 845064d6cbc..50bbb6a77b8 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -63,6 +63,9 @@ define_pd_global(bool, RewriteFrequentPairs, true); + + define_pd_global(bool, PreserveFramePointer, false); + ++// GC Ergo Flags ++define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread ++ + define_pd_global(uintx, TypeProfileLevel, 111); + + define_pd_global(bool, CompactStrings, true); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 29027d594a0..386ef731696 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -752,6 +752,9 @@ bool is_CAS(int opcode, bool maybe_volatile); + // predicate controlling translation of CompareAndSwapX + bool needs_acquiring_load_reserved(const Node *load); + ++// predicate controlling translation of StoreCM ++bool unnecessary_storestore(const Node *storecm); ++ + // predicate controlling addressing modes + bool size_fits_all_mem_uses(AddPNode* addp, int shift); + %} +@@ -874,6 +877,29 @@ bool needs_acquiring_load_reserved(const Node *n) + // so we can just return true here + return true; + } ++ ++// predicate controlling translation of StoreCM ++// ++// returns true if a StoreStore must precede the card write otherwise ++// false ++ ++bool unnecessary_storestore(const Node *storecm) ++{ ++ assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM"); ++ ++ // we need to generate a dmb ishst between an object put and the ++ // associated card mark when we are using CMS without conditional ++ // card marking ++ ++ if (UseConcMarkSweepGC && !UseCondCardMark) { ++ return false; ++ } ++ ++ // a storestore is unnecesary in all other cases ++ ++ return true; ++} ++ + #define __ _masm. + + // advance declarations for helper functions to convert register +@@ -4566,6 +4592,7 @@ instruct loadConD0(fRegD dst, immD0 con) %{ + instruct storeimmCM0(immI0 zero, memory mem) + %{ + match(Set mem (StoreCM mem zero)); ++ predicate(unnecessary_storestore(n)); + + ins_cost(STORE_COST); + format %{ "storestore (elided)\n\t" + +From f838cf41b48c6bc17d052531ab5594de236b1302 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 11 Apr 2023 22:06:58 +0800 +Subject: [PATCH 038/140] Revert 8220051: Remove global safepoint code + +--- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 3 +- + .../cpu/riscv/macroAssembler_riscv.cpp | 26 ++++++++++- + .../cpu/riscv/macroAssembler_riscv.hpp | 3 +- + src/hotspot/cpu/riscv/riscv.ad | 43 +++++++++++++++++++ + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 4 +- + .../templateInterpreterGenerator_riscv.cpp | 2 +- + 6 files changed, 75 insertions(+), 6 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 48957803fdc..74dded77d19 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -515,7 +515,8 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, + + Label safepoint; + address* const safepoint_table = Interpreter::safept_table(state); +- bool needs_thread_local_poll = generate_poll && table != safepoint_table; ++ bool needs_thread_local_poll = generate_poll && ++ SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; + + if (needs_thread_local_poll) { + NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index cf01d7d74bb..73629e3dba3 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -264,6 +264,30 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp, + } + } + ++// Just like safepoint_poll, but use an acquiring load for thread- ++// local polling. ++// ++// We need an acquire here to ensure that any subsequent load of the ++// global SafepointSynchronize::_state flag is ordered after this load ++// of the local Thread::_polling page. We don't want this poll to ++// return false (i.e. not safepointing) and a later poll of the global ++// SafepointSynchronize::_state spuriously to return true. ++// ++// This is to avoid a race when we're in a native->Java transition ++// racing the code which wakes up from a safepoint. ++// ++void MacroAssembler::safepoint_poll_acquire(Label& slow_path) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ membar(MacroAssembler::AnyAny); ++ ld(t1, Address(xthread, Thread::polling_page_offset())); ++ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ andi(t0, t1, SafepointMechanism::poll_bit()); ++ bnez(t0, slow_path); ++ } else { ++ safepoint_poll(slow_path); ++ } ++} ++ + void MacroAssembler::reset_last_Java_frame(bool clear_fp) { + // we must set sp to zero to clear frame + sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); +@@ -2137,7 +2161,7 @@ void MacroAssembler::check_klass_subtype(Register sub_klass, + bind(L_failure); + } + +-void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { ++void MacroAssembler::safepoint_poll(Label& slow_path) { + if (SafepointMechanism::uses_thread_local_poll()) { + ld(t1, Address(xthread, Thread::polling_page_offset())); + andi(t0, t1, SafepointMechanism::poll_bit()); +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index f23f7e7d1e6..8a2c6e07d88 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -44,7 +44,8 @@ class MacroAssembler: public Assembler { + } + virtual ~MacroAssembler() {} + +- void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod); ++ void safepoint_poll(Label& slow_path); ++ void safepoint_poll_acquire(Label& slow_path); + + // Place a fence.i after code may have been modified due to a safepoint. + void safepoint_ifence(); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 386ef731696..2dde4453dac 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1213,6 +1213,14 @@ const Pipeline * MachEpilogNode::pipeline() const { + return MachNode::pipeline_class(); + } + ++// This method seems to be obsolete. It is declared in machnode.hpp ++// and defined in all *.ad files, but it is never called. Should we ++// get rid of it? ++int MachEpilogNode::safepoint_offset() const { ++ assert(do_polling(), "no return for this epilog node"); ++ return 4; ++} ++ + //============================================================================= + + // Figure out which register class each belongs in: rc_int, rc_float or +@@ -1907,6 +1915,17 @@ encode %{ + __ li(dst_reg, 1); + %} + ++ enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{ ++ MacroAssembler _masm(&cbuf); ++ int32_t offset = 0; ++ address page = (address)$src$$constant; ++ unsigned long align = (unsigned long)page & 0xfff; ++ assert(align == 0, "polling page must be page aligned"); ++ Register dst_reg = as_Register($dst$$reg); ++ __ la_patchable(dst_reg, Address(page, relocInfo::poll_type), offset); ++ __ addi(dst_reg, dst_reg, offset); ++ %} ++ + enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{ + C2_MacroAssembler _masm(&cbuf); + __ load_byte_map_base($dst$$Register); +@@ -2688,6 +2707,17 @@ operand immP_1() + interface(CONST_INTER); + %} + ++// Polling Page Pointer Immediate ++operand immPollPage() ++%{ ++ predicate((address)n->get_ptr() == os::get_polling_page()); ++ match(ConP); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ + // Card Table Byte Map Base + operand immByteMapBase() + %{ +@@ -4476,6 +4506,19 @@ instruct loadConP1(iRegPNoSp dst, immP_1 con) + ins_pipe(ialu_imm); + %} + ++// Load Poll Page Constant ++instruct loadConPollPage(iRegPNoSp dst, immPollPage con) ++%{ ++ match(Set dst con); ++ ++ ins_cost(ALU_COST * 6); ++ format %{ "movptr $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %} ++ ++ ins_encode(riscv_enc_mov_poll_page(dst, con)); ++ ++ ins_pipe(ialu_imm); ++%} ++ + // Load Byte Map Base Constant + instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con) + %{ +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index f8585afbdc2..c501c8f7bac 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1573,7 +1573,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + // This is to avoid a race when we're in a native->Java transition + // racing the code which wakes up from a safepoint. + +- __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */); ++ __ safepoint_poll_acquire(safepoint_in_progress); + __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset())); + __ bnez(t0, safepoint_in_progress); + __ bind(safepoint_in_progress_done); +@@ -2439,7 +2439,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t + __ bind(noException); + + Label no_adjust, bail; +- if (!cause_return) { ++ if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { + // If our stashed return pc was modified by the runtime we avoid touching it + __ ld(t0, Address(fp, frame::return_addr_offset * wordSize)); + __ bne(x18, t0, no_adjust); +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index 76ae6f89e27..2d4baab2ab7 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -1143,7 +1143,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + // + // This is to avoid a race when we're in a native->Java transition + // racing the code which wakes up from a safepoint. +- __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */); ++ __ safepoint_poll_acquire(L); + __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset())); + __ beqz(t1, Continue); + __ bind(L); + +From 13faeae35312c59a1366d4f9c84da7157f06efc7 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 11 Apr 2023 22:15:14 +0800 +Subject: [PATCH 039/140] Revert 8253180: ZGC: Implementation of JEP 376: ZGC: + Concurrent Thread-Stack Processing + +--- + src/hotspot/cpu/riscv/frame_riscv.cpp | 8 ++------ + src/hotspot/cpu/riscv/frame_riscv.hpp | 3 --- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1 - + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 8 -------- + .../cpu/riscv/templateInterpreterGenerator_riscv.cpp | 9 --------- + 5 files changed, 2 insertions(+), 27 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index 8e4f20fe561..b056eb2488a 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -495,8 +495,8 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const { + } + + //------------------------------------------------------------------------------ +-// frame::sender_raw +-frame frame::sender_raw(RegisterMap* map) const { ++// frame::sender ++frame frame::sender(RegisterMap* map) const { + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + assert(map != NULL, "map must be set"); +@@ -521,10 +521,6 @@ frame frame::sender_raw(RegisterMap* map) const { + return frame(sender_sp(), link(), sender_pc()); + } + +-frame frame::sender(RegisterMap* map) const { +- return sender_raw(map); +-} +- + bool frame::is_interpreted_frame_valid(JavaThread* thread) const { + assert(is_interpreted_frame(), "Not an interpreted frame"); + // These are reasonable sanity checks +diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp +index c06aaa9e391..3b88f6d5a1a 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.hpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.hpp +@@ -196,7 +196,4 @@ + + static jint interpreter_frame_expression_stack_direction() { return -1; } + +- // returns the sending frame, without applying any barriers +- frame sender_raw(RegisterMap* map) const; +- + #endif // CPU_RISCV_FRAME_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 74dded77d19..4e642af87c4 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -571,7 +571,6 @@ void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { + + // remove activation + // +-// Apply stack watermark barrier. + // Unlock the receiver if this is a synchronized method. + // Unlock any Java monitors from syncronized blocks. + // Remove the activation from the stack. +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index c501c8f7bac..d740c99c979 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1565,14 +1565,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + + // check for safepoint operation in progress and/or pending suspend requests + { +- // We need an acquire here to ensure that any subsequent load of the +- // global SafepointSynchronize::_state flag is ordered after this load +- // of the thread-local polling word. We don't want this poll to +- // return false (i.e. not safepointing) and a later poll of the global +- // SafepointSynchronize::_state spuriously to return true. +- // This is to avoid a race when we're in a native->Java transition +- // racing the code which wakes up from a safepoint. +- + __ safepoint_poll_acquire(safepoint_in_progress); + __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset())); + __ bnez(t0, safepoint_in_progress); +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index 2d4baab2ab7..a07dea35b73 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -1134,15 +1134,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + // check for safepoint operation in progress and/or pending suspend requests + { + Label L, Continue; +- +- // We need an acquire here to ensure that any subsequent load of the +- // global SafepointSynchronize::_state flag is ordered after this load +- // of the thread-local polling word. We don't want this poll to +- // return false (i.e. not safepointing) and a later poll of the global +- // SafepointSynchronize::_state spuriously to return true. +- // +- // This is to avoid a race when we're in a native->Java transition +- // racing the code which wakes up from a safepoint. + __ safepoint_poll_acquire(L); + __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset())); + __ beqz(t1, Continue); + +From 99ca43f1e7e74f161b40466f49fc61aa734d334d Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Wed, 12 Apr 2023 12:35:33 +0800 +Subject: [PATCH 040/140] JDK-8243155: AArch64: Add support for SqrtVF + +--- + src/hotspot/cpu/riscv/riscv.ad | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 2dde4453dac..9da8a76c190 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -7206,7 +7206,7 @@ instruct absD_reg(fRegD dst, fRegD src) %{ + %} + + instruct sqrtF_reg(fRegF dst, fRegF src) %{ +- match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); ++ match(Set dst (SqrtF src)); + + ins_cost(FSQRT_COST); + format %{ "fsqrt.s $dst, $src\t#@sqrtF_reg" %} + +From 4bbd814dfbc33d3f1277dbb64f19a18f9f8c1a81 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Wed, 12 Apr 2023 15:11:49 +0800 +Subject: [PATCH 041/140] Revert JDK-8267098: AArch64: C1 StubFrames end + confusingly + +--- + src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 52 ++++++++++----------- + 1 file changed, 24 insertions(+), 28 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +index f523c9ed50a..1f58bde4df5 100644 +--- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +@@ -167,19 +167,14 @@ int StubAssembler::call_RT(Register oop_result, Register metadata_result, addres + return call_RT(oop_result, metadata_result, entry, arg_num); + } + +-enum return_state_t { +- does_not_return, requires_return +-}; +- + // Implementation of StubFrame + + class StubFrame: public StackObj { + private: + StubAssembler* _sasm; +- bool _return_state; + + public: +- StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return); ++ StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments); + void load_argument(int offset_in_words, Register reg); + + ~StubFrame(); +@@ -197,9 +192,8 @@ void StubAssembler::epilogue() { + + #define __ _sasm-> + +-StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state) { ++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) { + _sasm = sasm; +- _return_state = return_state; + __ prologue(name, must_gc_arguments); + } + +@@ -211,11 +205,7 @@ void StubFrame::load_argument(int offset_in_words, Register reg) { + + + StubFrame::~StubFrame() { +- if (_return_state == requires_return) { +- __ epilogue(); +- } else { +- __ should_not_reach_here(); +- } ++ __ epilogue(); + _sasm = NULL; + } + +@@ -378,6 +368,7 @@ OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address targe + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, oop_map); + ++ __ should_not_reach_here(); + return oop_maps; + } + +@@ -425,7 +416,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { + sasm->set_frame_size(frame_size); + break; + } +- default: ShouldNotReachHere(); ++ default: ++ __ should_not_reach_here(); ++ break; + } + + // verify that only x10 and x13 are valid at this time +@@ -481,6 +474,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { + restore_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: ++ // Pop the return address. ++ __ leave(); ++ __ ret(); // jump to exception handler + break; + default: ShouldNotReachHere(); + } +@@ -641,13 +637,13 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case throw_div0_exception_id: + { +- StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); + } + break; + + case throw_null_pointer_exception_id: +- { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return); ++ { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); + } + break; +@@ -926,14 +922,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case throw_class_cast_exception_id: + { +- StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); + } + break; + + case throw_incompatible_class_change_error_id: + { +- StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, + CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); + } +@@ -1027,7 +1023,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case deoptimize_id: + { +- StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "deoptimize", dont_gc_arguments); + OopMap* oop_map = save_live_registers(sasm); + assert_cond(oop_map != NULL); + f.load_argument(0, c_rarg1); +@@ -1046,7 +1042,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case throw_range_check_failed_id: + { +- StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); + } + break; +@@ -1062,7 +1058,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case access_field_patching_id: + { +- StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "access_field_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); + } +@@ -1070,7 +1066,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case load_klass_patching_id: + { +- StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); + } +@@ -1078,7 +1074,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case load_mirror_patching_id: + { +- StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); + } +@@ -1086,7 +1082,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case load_appendix_patching_id: + { +- StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); + } +@@ -1109,14 +1105,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case throw_index_exception_id: + { +- StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); + } + break; + + case throw_array_store_exception_id: + { +- StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments); + // tos + 0: link + // + 1: return address + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); +@@ -1125,7 +1121,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case predicate_failed_trap_id: + { +- StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments); + + OopMap* map = save_live_registers(sasm); + assert_cond(map != NULL); +@@ -1156,7 +1152,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + default: + { +- StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); + __ li(x10, (int) id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10); + __ should_not_reach_here(); + +From eb37cfd42e7801c5ce64666c3cd25d40cfb22e76 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Wed, 12 Apr 2023 18:06:40 +0800 +Subject: [PATCH 042/140] Revert JDK-8247691: [aarch64] Incorrect handling of + VM exceptions in C1 deopt stub/traps + +--- + src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 87 +++++++++++++++------ + 1 file changed, 65 insertions(+), 22 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +index 1f58bde4df5..1f45fba9de0 100644 +--- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +@@ -581,37 +581,80 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { + #endif + __ reset_last_Java_frame(true); + +-#ifdef ASSERT +- // Check that fields in JavaThread for exception oop and issuing pc are empty +- Label oop_empty; +- __ ld(t0, Address(xthread, Thread::pending_exception_offset())); +- __ beqz(t0, oop_empty); +- __ stop("exception oop must be empty"); +- __ bind(oop_empty); ++ // check for pending exceptions ++ { Label L; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, L); ++ // exception pending => remove activation and forward to exception handler + +- Label pc_empty; +- __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); +- __ beqz(t0, pc_empty); +- __ stop("exception pc must be empty"); +- __ bind(pc_empty); ++ { Label L1; ++ __ bnez(x10, L1); // have we deoptimized? ++ __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); ++ __ bind(L1); ++ } ++ ++ // the deopt blob expects exceptions in the special fields of ++ // JavaThread, so copy and clear pending exception. ++ ++ // load and clear pending exception ++ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); ++ __ sd(zr, Address(xthread, Thread::pending_exception_offset())); ++ ++ // check that there is really a valid exception ++ __ verify_not_null_oop(x10); ++ ++ // load throwing pc: this is the return address of the stub ++ __ ld(x13, Address(fp, wordSize)); ++ ++#ifdef ASSERT ++ // Check that fields in JavaThread for exception oop and issuing pc are empty ++ Label oop_empty; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, oop_empty); ++ __ stop("exception oop must be empty"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); ++ __ beqz(t0, pc_empty); ++ __ stop("exception pc must be empty"); ++ __ bind(pc_empty); + #endif + +- // Runtime will return true if the nmethod has been deoptimized, this is the +- // expected scenario and anything else is an error. Note that we maintain a +- // check on the result purely as a defensive measure. +- Label no_deopt; +- __ beqz(x10, no_deopt); // Have we deoptimized? ++ // store exception oop and throwing pc to JavaThread ++ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ ++ restore_live_registers(sasm); + +- // Perform a re-execute. The proper return address is already on the stack, +- // we just need to restore registers, pop all of our frames but the return +- // address and jump to the deopt blob. ++ __ leave(); ++ ++ // Forward the exception directly to deopt blob. We can blow no ++ // registers and must leave throwing pc on the stack. A patch may ++ // have values live in registers so the entry point with the ++ // exception in tls. ++ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls())); ++ ++ __ bind(L); ++ } ++ ++ // Runtime will return true if the nmethod has been deoptimized during ++ // the patching process. In that case we must do a deopt reexecute instead. ++ Label cont; ++ ++ __ beqz(x10, cont); // have we deoptimized? ++ ++ // Will reexecute. Proper return address is already on the stack we just restore ++ // registers, pop all of our frame but the return address and jump to the deopt blob + + restore_live_registers(sasm); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + +- __ bind(no_deopt); +- __ stop("deopt not performed"); ++ __ bind(cont); ++ restore_live_registers(sasm); ++ __ leave(); ++ __ ret(); + + return oop_maps; + } + +From 3fa279b459fffd1bd1ce158a7fdaa9d8704450a8 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 13 Apr 2023 18:29:27 +0800 +Subject: [PATCH 043/140] Revert JDK-8212681: Refactor IC locking to use a fine + grained CompiledICLocker + +--- + src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 3 +-- + 2 files changed, 2 insertions(+), 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +index 75bc4be7840..4d1687301fc 100644 +--- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp ++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +@@ -113,10 +113,10 @@ void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, ad + } + + void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { ++ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); +- assert(CompiledICLocker::is_safe(stub), "mt unsafe call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder + = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); +diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +index 0a05c577860..459683735e9 100644 +--- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp ++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +@@ -146,8 +146,7 @@ address NativeCall::destination() const { + // during code generation, where no patching lock is needed. + void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { + assert(!assert_lock || +- (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) || +- CompiledICLocker::is_safe(addr_at(0)), ++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), + "concurrent code patching"); + + ResourceMark rm; + +From 727f1a8f9b4a6dfbb0cf2002f12b86b5d5f23362 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 13 Apr 2023 18:36:11 +0800 +Subject: [PATCH 044/140] Revert JDK-8225681: + vmTestbase/nsk/jvmti/RedefineClasses/StressRedefine fails due a) MT-unsafe + modification of inline cache + +--- + src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +index 4d1687301fc..0b13e44c8d6 100644 +--- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp ++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +@@ -99,10 +99,15 @@ void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, ad + // Creation also verifies the object. + NativeMovConstReg* method_holder + = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); +-#ifdef ASSERT ++#ifndef PRODUCT + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); + +- verify_mt_safe(callee, entry, method_holder, jump); ++ // read the value once ++ volatile intptr_t data = method_holder->data(); ++ assert(data == 0 || data == (intptr_t)callee(), ++ "a) MT-unsafe modification of inline cache"); ++ assert(data == 0 || jump->jump_destination() == entry, ++ "b) MT-unsafe modification of inline cache"); + #endif + // Update stub. + method_holder->set_data((intptr_t)callee()); + +From 26e37551ecc41db0cf8eeb775a5501b4f45b4ffa Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 13 Apr 2023 18:39:52 +0800 +Subject: [PATCH 045/140] Revert JDK-8232046: AArch64 build failure after + JDK-8225681 + +--- + src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 2 -- + src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 19 ++++--------------- + 2 files changed, 4 insertions(+), 17 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +index 0b13e44c8d6..1cfc92b28fa 100644 +--- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp ++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +@@ -126,8 +126,6 @@ void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_ + NativeMovConstReg* method_holder + = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); + method_holder->set_data(0); +- NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); +- jump->set_jump_destination((address)-1); + } + + //----------------------------------------------------------------------------- +diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +index 459683735e9..bfe84fa4e30 100644 +--- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp ++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +@@ -272,15 +272,9 @@ address NativeJump::jump_destination() const { + + // We use jump to self as the unresolved address which the inline + // cache code (and relocs) know about +- // As a special case we also use sequence movptr_with_offset(r,0), jalr(r,0) +- // i.e. jump to 0 when we need leave space for a wide immediate +- // load +- +- // return -1 if jump to self or to 0 +- if ((dest == (address) this) || dest == 0) { +- dest = (address) -1; +- } + ++ // return -1 if jump to self ++ dest = (dest == (address) this) ? (address) -1 : dest; + return dest; + }; + +@@ -302,14 +296,9 @@ address NativeGeneralJump::jump_destination() const { + + // We use jump to self as the unresolved address which the inline + // cache code (and relocs) know about +- // As a special case we also use jump to 0 when first generating +- // a general jump +- +- // return -1 if jump to self or to 0 +- if ((dest == (address) this) || dest == 0) { +- dest = (address) -1; +- } + ++ // return -1 if jump to self ++ dest = (dest == (address) this) ? (address) -1 : dest; + return dest; + } + + +From 4fc68bc3cd13e623276965947d6c8cb14da15873 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 13 Apr 2023 18:47:08 +0800 +Subject: [PATCH 046/140] Revert JDK-8213084: Rework and enhance + Print[Opto]Assembly output + +--- + src/hotspot/cpu/riscv/assembler_riscv.hpp | 8 -------- + src/hotspot/cpu/riscv/disassembler_riscv.hpp | 20 -------------------- + 2 files changed, 28 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp +index 44e8d4b4ff1..b4e7287ce08 100644 +--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp +@@ -268,14 +268,6 @@ class Assembler : public AbstractAssembler { + + enum { instruction_size = 4 }; + +- //---< calculate length of instruction >--- +- // We just use the values set above. +- // instruction must start at passed address +- static unsigned int instr_len(unsigned char *instr) { return instruction_size; } +- +- //---< longest instructions >--- +- static unsigned int instr_maxlen() { return instruction_size; } +- + enum RoundingMode { + rne = 0b000, // round to Nearest, ties to Even + rtz = 0b001, // round towards Zero +diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp +index b0e5560c906..06bca5298cd 100644 +--- a/src/hotspot/cpu/riscv/disassembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp +@@ -35,24 +35,4 @@ static const char* pd_cpu_opts() { + return ""; + } + +-// Returns address of n-th instruction preceding addr, +-// NULL if no preceding instruction can be found. +-// On riscv, we assume a constant instruction length. +-// It might be beneficial to check "is_readable" as we do on ppc and s390. +-static address find_prev_instr(address addr, int n_instr) { +- return addr - Assembler::instruction_size * n_instr; +-} +- +-// special-case instruction decoding. +-// There may be cases where the binutils disassembler doesn't do +-// the perfect job. In those cases, decode_instruction0 may kick in +-// and do it right. +-// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)" +-static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) { +- return here; +-} +- +-// platform-specific instruction annotations (like value of loaded constants) +-static void annotate(address pc, outputStream* st) {} +- + #endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP + +From f660c594eccb174c9779ebdc9ba40fe579aa50cc Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 13 Apr 2023 19:44:28 +0800 +Subject: [PATCH 047/140] Revert JDK-8241909: Remove useless code cache lookup + in frame::patch_pc + +--- + src/hotspot/cpu/riscv/frame_riscv.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index b056eb2488a..d03adc0bff4 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -270,7 +270,6 @@ bool frame::safe_for_sender(JavaThread *thread) { + } + + void frame::patch_pc(Thread* thread, address pc) { +- assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); + address* pc_addr = &(((address*) sp())[-1]); + if (TracePcPatching) { + tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", +@@ -280,6 +279,7 @@ void frame::patch_pc(Thread* thread, address pc) { + // patch in the same address that's already there. + assert(_pc == *pc_addr || pc == *pc_addr, "must be"); + *pc_addr = pc; ++ _cb = CodeCache::find_blob(pc); + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + assert(original_pc == _pc, "expected original PC to be stored before patching"); + +From 0d1ed436d9b70c9244c5de42fb492bbfa5e785e8 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 16 Apr 2023 21:10:06 +0800 +Subject: [PATCH 048/140] Revert JDK-8277411: C2 fast_unlock intrinsic on + AArch64 has unnecessary ownership check & JDK-8277180: Intrinsify recursive + ObjectMonitor locking for C2 x64 and A64 + +--- + src/hotspot/cpu/riscv/riscv.ad | 24 ++++-------------------- + 1 file changed, 4 insertions(+), 20 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 9da8a76c190..c0fbda4f3f9 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -2204,16 +2204,6 @@ encode %{ + __ mv(tmp, (address)markOopDesc::unused_mark()); + __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + +- __ beqz(flag, cont); // CAS success means locking succeeded +- +- __ bne(flag, xthread, cont); // Check for recursive locking +- +- // Recursive lock case +- __ mv(flag, zr); +- __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value)); +- __ add(tmp, tmp, 1u); +- __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value)); +- + __ bind(cont); + %} + +@@ -2257,18 +2247,12 @@ encode %{ + __ bind(object_has_monitor); + STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX); + __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor ++ __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); + __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); ++ __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. ++ __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions ++ __ bnez(flag, cont); + +- Label notRecursive; +- __ beqz(disp_hdr, notRecursive); // Will be 0 if not recursive. +- +- // Recursive lock +- __ addi(disp_hdr, disp_hdr, -1); +- __ sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); +- __ mv(flag, zr); +- __ j(cont); +- +- __ bind(notRecursive); + __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); + __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); + __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. + +From cac7117dfc03023a81030e274944921df07bbead Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 16 Apr 2023 21:13:21 +0800 +Subject: [PATCH 049/140] Revert JDK-8210381: Obsolete EmitSync + +--- + src/hotspot/cpu/riscv/riscv.ad | 100 ++++++++++++++++++++------------- + 1 file changed, 60 insertions(+), 40 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index c0fbda4f3f9..c3ef648b21d 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -2150,9 +2150,17 @@ encode %{ + // Load markWord from object into displaced_header. + __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); + ++ // Always do locking in runtime. ++ if (EmitSync & 0x01) { ++ __ mv(flag, 1); ++ return; ++ } ++ + // Check for existing monitor +- __ andi(t0, disp_hdr, markOopDesc::monitor_value); +- __ bnez(t0, object_has_monitor); ++ if ((EmitSync & 0x02) == 0) { ++ __ andi(t0, disp_hdr, markOopDesc::monitor_value); ++ __ bnez(t0, object_has_monitor); ++ } + + // Set tmp to be (markWord of object | UNLOCK_VALUE). + __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); +@@ -2185,24 +2193,26 @@ encode %{ + __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); + __ mv(flag, tmp); // we can use the value of tmp as the result here + +- __ j(cont); +- +- // Handle existing monitor. +- __ bind(object_has_monitor); +- // The object's monitor m is unlocked iff m->owner == NULL, +- // otherwise m->owner may contain a thread or a stack address. +- // +- // Try to CAS m->owner from NULL to current thread. +- __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value)); +- __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, +- Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) +- +- // Store a non-null value into the box to avoid looking like a re-entrant +- // lock. The fast-path monitor unlock code checks for +- // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the +- // relevant bit set, and also matches ObjectSynchronizer::slow_enter. +- __ mv(tmp, (address)markOopDesc::unused_mark()); +- __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ if ((EmitSync & 0x02) == 0) { ++ __ j(cont); ++ ++ // Handle existing monitor. ++ __ bind(object_has_monitor); ++ // The object's monitor m is unlocked iff m->owner == NULL, ++ // otherwise m->owner may contain a thread or a stack address. ++ // ++ // Try to CAS m->owner from NULL to current thread. ++ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value)); ++ __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, ++ Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) ++ ++ // Store a non-null value into the box to avoid looking like a re-entrant ++ // lock. The fast-path monitor unlock code checks for ++ // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the ++ // relevant bit set, and also matches ObjectSynchronizer::slow_enter. ++ __ mv(tmp, (address)markOopDesc::unused_mark()); ++ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ } + + __ bind(cont); + %} +@@ -2220,6 +2230,12 @@ encode %{ + + assert_different_registers(oop, box, tmp, disp_hdr, flag); + ++ // Always do locking in runtime. ++ if (EmitSync & 0x01) { ++ __ mv(flag, 1); ++ return; ++ } ++ + // Find the lock address and load the displaced header from the stack. + __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); + +@@ -2228,9 +2244,11 @@ encode %{ + __ beqz(disp_hdr, cont); + + // Handle existing monitor. +- __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); +- __ andi(t0, disp_hdr, markOopDesc::monitor_value); +- __ bnez(t0, object_has_monitor); ++ if ((EmitSync & 0x02) == 0) { ++ __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); ++ __ andi(t0, disp_hdr, markOopDesc::monitor_value); ++ __ bnez(t0, object_has_monitor); ++ } + + // Check if it is still a light weight lock, this is true if we + // see the stack address of the basicLock in the markWord of the +@@ -2244,23 +2262,25 @@ encode %{ + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + // Handle existing monitor. +- __ bind(object_has_monitor); +- STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX); +- __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor +- __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); +- __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); +- __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. +- __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions +- __ bnez(flag, cont); +- +- __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); +- __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); +- __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. +- __ bnez(flag, cont); +- // need a release store here +- __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); +- __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); +- __ sd(zr, Address(tmp)); // set unowned ++ if ((EmitSync & 0x02) == 0) { ++ __ bind(object_has_monitor); ++ STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX); ++ __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor ++ __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); ++ __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); ++ __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. ++ __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions ++ __ bnez(flag, cont); ++ ++ __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); ++ __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); ++ __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. ++ __ bnez(flag, cont); ++ // need a release store here ++ __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sd(zr, Address(tmp)); // set unowned ++ } + + __ bind(cont); + %} + +From ca7ab86ee886233651e1a79faff631fd7e226d57 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 16 Apr 2023 22:07:21 +0800 +Subject: [PATCH 050/140] Revert JDK-8256425: Obsolete Biased Locking in JDK 18 + +--- + src/hotspot/cpu/riscv/assembler_riscv.hpp | 2 + + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 6 +- + .../cpu/riscv/c1_LIRGenerator_riscv.cpp | 7 +- + .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 35 ++- + .../cpu/riscv/c1_MacroAssembler_riscv.hpp | 3 +- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 27 ++- + .../cpu/riscv/macroAssembler_riscv.cpp | 217 ++++++++++++++++++ + .../cpu/riscv/macroAssembler_riscv.hpp | 28 +++ + src/hotspot/cpu/riscv/riscv.ad | 12 + + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 8 + + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 8 +- + 11 files changed, 341 insertions(+), 12 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp +index b4e7287ce08..51aa052a0c7 100644 +--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp +@@ -3043,4 +3043,6 @@ enum Nf { + virtual ~Assembler() {} + }; + ++class BiasedLockingCounters; ++ + #endif // CPU_RISCV_ASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 46a20a64194..6a961ee2307 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -1511,9 +1511,13 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { + if (!UseFastLocking) { + __ j(*op->stub()->entry()); + } else if (op->code() == lir_lock) { ++ Register scratch = noreg; ++ if (UseBiasedLocking) { ++ scratch = op->scratch_opr()->as_register(); ++ } + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + // add debug info for NullPointerException only if one is possible +- int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry()); ++ int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); + if (op->info() != NULL) { + add_debug_info_for_null_check(null_check_offset, op->info()); + } +diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +index e126f148cdf..c45a75b2301 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +@@ -277,6 +277,11 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { + + // "lock" stores the address of the monitor stack slot, so this is not an oop + LIR_Opr lock = new_register(T_INT); ++ // Need a scratch register for biased locking ++ LIR_Opr scratch = LIR_OprFact::illegalOpr; ++ if (UseBiasedLocking) { ++ scratch = new_register(T_INT); ++ } + + CodeEmitInfo* info_for_exception = NULL; + if (x->needs_null_check()) { +@@ -285,7 +290,7 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { + // this CodeEmitInfo must not have the xhandlers because here the + // object is already locked (xhandlers expect object to be unlocked) + CodeEmitInfo* info = state_for(x, x->state(), true); +- monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr, ++ monitor_enter(obj.result(), lock, syncTempOpr(), scratch, + x->monitor_no(), info_for_exception, info); + } + +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +index 2d52343587e..e486f41948e 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -35,6 +35,7 @@ + #include "oops/arrayOop.hpp" + #include "oops/markWord.hpp" + #include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" + #include "runtime/os.hpp" + #include "runtime/sharedRuntime.hpp" + #include "runtime/stubRoutines.hpp" +@@ -50,7 +51,7 @@ void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result, + } + } + +-int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { ++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { + const int aligned_mask = BytesPerWord - 1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); +@@ -62,7 +63,12 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr + // save object being locked into the BasicObjectLock + sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + +- null_check_offset = offset(); ++ if (UseBiasedLocking) { ++ assert(scratch != noreg, "should have scratch register at this point"); ++ null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); ++ } else { ++ null_check_offset = offset(); ++ } + + // Load object header + ld(hdr, Address(obj, hdr_offset)); +@@ -98,6 +104,10 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr + // otherwise we don't care about the result and handle locking via runtime call + bnez(hdr, slow_case, /* is_far */ true); + bind(done); ++ if (PrintBiasedLockingStatistics) { ++ la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); ++ add_memory_int32(Address(t1, 0), 1); ++ } + return null_check_offset; + } + +@@ -107,13 +117,21 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done; + ++ if (UseBiasedLocking) { ++ // load object ++ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ biased_locking_exit(obj, hdr, done); ++ } ++ + // load displaced header + ld(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is NULL we had recursive locking + // if we had recursive locking, we are done + beqz(hdr, done); +- // load object +- ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ if (!UseBiasedLocking) { ++ // load object ++ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ } + verify_oop(obj); + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to +@@ -140,8 +158,13 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i + + void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) { + assert_different_registers(obj, klass, len); +- // This assumes that all prototype bits fitr in an int32_t +- mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype()); ++ if (UseBiasedLocking && !len->is_valid()) { ++ assert_different_registers(obj, klass, len, tmp1, tmp2); ++ ld(tmp1, Address(klass, Klass::prototype_header_offset())); ++ } else { ++ // This assumes that all prototype bits fitr in an int32_t ++ mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype()); ++ } + sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); + + if (UseCompressedClassPointers) { // Take care not to kill klass +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp +index dfd3c17d7c7..1950cee5dd5 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp +@@ -59,8 +59,9 @@ using MacroAssembler::null_check; + // hdr : must be x10, contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must point to the displaced header location, contents preserved ++ // scratch : scratch register, contents destroyed + // returns code offset at which to add null check debug information +- int lock_object (Register swap, Register obj, Register disp_hdr, Label& slow_case); ++ int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); + + // unlocking + // hdr : contents destroyed +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 4e642af87c4..f0c249f0d26 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -39,6 +39,7 @@ + #include "prims/jvmtiExport.hpp" + #include "prims/jvmtiThreadState.hpp" + #include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" + #include "runtime/frame.inline.hpp" + #include "runtime/safepointMechanism.hpp" + #include "runtime/sharedRuntime.hpp" +@@ -782,6 +783,10 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) + // Load object pointer into obj_reg c_rarg3 + ld(obj_reg, Address(lock_reg, obj_offset)); + ++ if (UseBiasedLocking) { ++ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); ++ } ++ + // Load (object->mark() | 1) into swap_reg + ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + ori(swap_reg, t0, 1); +@@ -792,7 +797,17 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) + assert(lock_offset == 0, + "displached header must be first word in BasicObjectLock"); + +- cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL); ++ if (PrintBiasedLockingStatistics) { ++ Label fail, fast; ++ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail); ++ bind(fast); ++ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), ++ t1, t0); ++ j(done); ++ bind(fail); ++ } else { ++ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL); ++ } + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 7) == 0, and +@@ -809,6 +824,12 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) + + // Save the test result, for recursive case, the result is zero + sd(swap_reg, Address(lock_reg, mark_offset)); ++ ++ if (PrintBiasedLockingStatistics) { ++ bnez(swap_reg, slow_case); ++ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), ++ t1, t0); ++ } + beqz(swap_reg, done); + + bind(slow_case); +@@ -861,6 +882,10 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) + // Free entry + sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + ++ if (UseBiasedLocking) { ++ biased_locking_exit(obj_reg, header_reg, done); ++ } ++ + // Load the old header from BasicLock structure + ld(header_reg, Address(swap_reg, + BasicLock::displaced_header_offset_in_bytes())); +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 73629e3dba3..e557a134b5b 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -41,6 +41,7 @@ + #include "oops/compressedOops.inline.hpp" + #include "oops/klass.inline.hpp" + #include "oops/oop.hpp" ++#include "runtime/biasedLocking.hpp" + #include "runtime/interfaceSupport.inline.hpp" + #include "runtime/jniHandles.inline.hpp" + #include "runtime/sharedRuntime.hpp" +@@ -2791,6 +2792,222 @@ void MacroAssembler::reserved_stack_check() { + bind(no_reserved_zone_enabling); + } + ++void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { ++ Label retry_load; ++ bind(retry_load); ++ // flush and load exclusive from the memory location ++ lr_w(tmp, counter_addr); ++ addw(tmp, tmp, 1); ++ // if we store+flush with no intervening write tmp wil be zero ++ sc_w(tmp, tmp, counter_addr); ++ bnez(tmp, retry_load); ++} ++ ++void MacroAssembler::load_prototype_header(Register dst, Register src) { ++ load_klass(dst, src); ++ ld(dst, Address(dst, Klass::prototype_header_offset())); ++} ++ ++int MacroAssembler::biased_locking_enter(Register lock_reg, ++ Register obj_reg, ++ Register swap_reg, ++ Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, ++ Label* slow_case, ++ BiasedLockingCounters* counters, ++ Register flag) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ assert_different_registers(lock_reg, obj_reg, swap_reg); ++ ++ if (PrintBiasedLockingStatistics && counters == NULL) ++ counters = BiasedLocking::counters(); ++ ++ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0); ++ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); ++ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); ++ ++ // Biased locking ++ // See whether the lock is currently biased toward our thread and ++ // whether the epoch is still valid ++ // Note that the runtime guarantees sufficient alignment of JavaThread ++ // pointers to allow age to be placed into low bits ++ // First check to see whether biasing is even enabled for this object ++ Label cas_label; ++ int null_check_offset = -1; ++ if (!swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ ld(swap_reg, mark_addr); ++ } ++ andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place); ++ li(t0, markOopDesc::biased_lock_pattern); ++ bne(t0, tmp_reg, cas_label); ++ // The bias pattern is present in the object's header. Need to check ++ // whether the bias owner and the epoch are both still current. ++ load_prototype_header(tmp_reg, obj_reg); ++ orr(tmp_reg, tmp_reg, xthread); ++ xorr(tmp_reg, swap_reg, tmp_reg); ++ andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place)); ++ if (flag->is_valid()) { ++ mv(flag, tmp_reg); ++ } ++ if (counters != NULL) { ++ Label around; ++ bnez(tmp_reg, around); ++ atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0); ++ j(done); ++ bind(around); ++ } else { ++ beqz(tmp_reg, done); ++ } ++ ++ Label try_revoke_bias; ++ Label try_rebias; ++ ++ // At this point we know that the header has the bias pattern and ++ // that we are not the bias owner in the current epoch. We need to ++ // figure out more details about the state of the header in order to ++ // know what operations can be legally performed on the object's ++ // header. ++ ++ // If the low three bits in the xor result aren't clear, that means ++ // the prototype header is no longer biased and we have to revoke ++ // the bias on this object. ++ andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ bnez(t0, try_revoke_bias); ++ ++ // Biasing is still enabled for this data type. See whether the ++ // epoch of the current bias is still valid, meaning that the epoch ++ // bits of the mark word are equal to the epoch bits of the ++ // prototype header. (Note that the prototype header's epoch bits ++ // only change at a safepoint.) If not, attempt to rebias the object ++ // toward the current thread. Note that we must be absolutely sure ++ // that the current epoch is invalid in order to do this because ++ // otherwise the manipulations it performs on the mark word are ++ // illegal. ++ andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place); ++ bnez(t0, try_rebias); ++ ++ // The epoch of the current bias is still valid but we know nothing ++ // about the owner; it might be set or it might be clear. Try to ++ // acquire the bias of the object using an atomic operation. If this ++ // fails we will go in to the runtime to revoke the object's bias. ++ // Note that we first construct the presumed unbiased header so we ++ // don't accidentally blow away another thread's valid bias. ++ { ++ Label cas_success; ++ Label counter; ++ mv(t0, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); ++ andr(swap_reg, swap_reg, t0); ++ orr(tmp_reg, swap_reg, xthread); ++ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); ++ // cas failed here if slow_cass == NULL ++ if (flag->is_valid()) { ++ mv(flag, 1); ++ j(counter); ++ } ++ // If the biasing toward our thread failed, this means that ++ // another thread succeeded in biasing it toward itself and we ++ // need to revoke that bias. The revocation will occur in the ++ // interpreter runtime in the slow case. ++ bind(cas_success); ++ if (flag->is_valid()) { ++ mv(flag, 0); ++ bind(counter); ++ } ++ if (counters != NULL) { ++ atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), ++ tmp_reg, t0); ++ } ++ } ++ j(done); ++ ++ bind(try_rebias); ++ // At this point we know the epoch has expired, meaning that the ++ // current "bias owner", if any, is actually invalid. Under these ++ // circumstances _only_, we are allowed to use the current header's ++ // value as the comparison value when doing the cas to acquire the ++ // bias in the current epoch. In other words, we allow transfer of ++ // the bias from one thread to another directly in this situation. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ { ++ Label cas_success; ++ Label counter; ++ load_prototype_header(tmp_reg, obj_reg); ++ orr(tmp_reg, xthread, tmp_reg); ++ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); ++ // cas failed here if slow_cass == NULL ++ if (flag->is_valid()) { ++ mv(flag, 1); ++ j(counter); ++ } ++ ++ // If the biasing toward our thread failed, then another thread ++ // succeeded in biasing it toward itself and we need to revoke that ++ // bias. The revocation will occur in the runtime in the slow case. ++ bind(cas_success); ++ if (flag->is_valid()) { ++ mv(flag, 0); ++ bind(counter); ++ } ++ if (counters != NULL) { ++ atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), ++ tmp_reg, t0); ++ } ++ } ++ j(done); ++ ++ bind(try_revoke_bias); ++ // The prototype mark in the klass doesn't have the bias bit set any ++ // more, indicating that objects of this data type are not supposed ++ // to be biased any more. We are going to try to reset the mark of ++ // this object to the prototype value and fall through to the ++ // CAS-based locking scheme. Note that if our CAS fails, it means ++ // that another thread raced us for the privilege of revoking the ++ // bias of this particular object, so it's okay to continue in the ++ // normal locking code. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ { ++ Label cas_success, nope; ++ load_prototype_header(tmp_reg, obj_reg); ++ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope); ++ bind(cas_success); ++ ++ // Fall through to the normal CAS-based lock, because no matter what ++ // the result of the above CAS, some thread must have succeeded in ++ // removing the bias bit from the object's header. ++ if (counters != NULL) { ++ atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, ++ t0); ++ } ++ bind(nope); ++ } ++ ++ bind(cas_label); ++ ++ return null_check_offset; ++} ++ ++void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ ++ // Check for biased locking unlock case, which is a no-op ++ // Note: we do not have to check the thread ID for two reasons. ++ // First, the interpreter checks for IllegalMonitorStateException at ++ // a higher level. Second, if the bias was revoked while we held the ++ // lock, the object could not be rebiased toward another thread, so ++ // the bias bit would be clear. ++ ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern); ++ if (flag->is_valid()) { mv(flag, tmp_reg); } ++ beqz(tmp_reg, done); ++} ++ + // Move the address of the polling page into dest. + void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) { + if (SafepointMechanism::uses_thread_local_poll()) { +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 8a2c6e07d88..c1ffa120774 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -47,6 +47,32 @@ class MacroAssembler: public Assembler { + void safepoint_poll(Label& slow_path); + void safepoint_poll_acquire(Label& slow_path); + ++ // Biased locking support ++ // lock_reg and obj_reg must be loaded up with the appropriate values. ++ // swap_reg is killed. ++ // tmp_reg must be supplied and must not be rscratch1 or rscratch2 ++ // Optional slow case is for implementations (interpreter and C1) which branch to ++ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. ++ // Returns offset of first potentially-faulting instruction for null ++ // check info (currently consumed only by C1). If ++ // swap_reg_contains_mark is true then returns -1 as it is assumed ++ // the calling code has already passed any potential faults. ++ int biased_locking_enter(Register lock_reg, Register obj_reg, ++ Register swap_reg, Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, Label* slow_case = NULL, ++ BiasedLockingCounters* counters = NULL, ++ Register flag = noreg); ++ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done, Register flag = noreg); ++ ++ // Helper functions for statistics gathering. ++ // Unconditional atomic increment. ++ void atomic_incw(Register counter_addr, Register tmp); ++ void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { ++ la(tmp1, counter_addr); ++ atomic_incw(tmp1, tmp2); ++ } ++ + // Place a fence.i after code may have been modified due to a safepoint. + void safepoint_ifence(); + +@@ -225,6 +251,8 @@ class MacroAssembler: public Assembler { + // stored using routines that take a jobject. + void store_heap_oop_null(Address dst); + ++ void load_prototype_header(Register dst, Register src); ++ + // This dummy is to prevent a call to store_heap_oop from + // converting a zero (linke NULL) into a Register by giving + // the compiler two choices it can't resolve +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index c3ef648b21d..c2a0be140e9 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -2156,6 +2156,10 @@ encode %{ + return; + } + ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag); ++ } ++ + // Check for existing monitor + if ((EmitSync & 0x02) == 0) { + __ andi(t0, disp_hdr, markOopDesc::monitor_value); +@@ -2236,6 +2240,10 @@ encode %{ + return; + } + ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ __ biased_locking_exit(oop, tmp, cont, flag); ++ } ++ + // Find the lock address and load the displaced header from the stack. + __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); + +@@ -4961,6 +4969,10 @@ instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFla + ins_pipe(pipe_serial); + %} + ++// storeLConditional is used by PhaseMacroExpand::expand_lock_node ++// when attempting to rebias a lock towards the current thread. We ++// must use the acquire form of cmpxchg in order to guarantee acquire ++// semantics in this case. + instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) + %{ + match(Set cr (StoreLConditional mem (Binary oldval newval))); +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index d740c99c979..eaefcc2b595 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1489,6 +1489,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + // Load the oop from the handle + __ ld(obj_reg, Address(oop_handle_reg, 0)); + ++ if (UseBiasedLocking) { ++ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock); ++ } ++ + // Load (object->mark() | 1) into swap_reg % x10 + __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ ori(swap_reg, t0, 1); +@@ -1597,6 +1601,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + + Label done; + ++ if (UseBiasedLocking) { ++ __ biased_locking_exit(obj_reg, old_hdr, done); ++ } ++ + // Simple recursive lock? + __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + __ beqz(t0, done); +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index c9d399ccdaf..1e23fb4dc09 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -3563,9 +3563,13 @@ void TemplateTable::_new() { + __ bnez(x13, loop); + } + +- // initialize object hader only. ++ // initialize object header only. + __ bind(initialize_header); +- __ mv(t0, (intptr_t)markOopDesc::prototype()); ++ if (UseBiasedLocking) { ++ __ ld(t0, Address(x14, Klass::prototype_header_offset())); ++ } else { ++ __ mv(t0, (intptr_t)markOopDesc::prototype()); ++ } + __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); + __ store_klass_gap(x10, zr); // zero klass gap for compressed oops + __ store_klass(x10, x14); // store klass last + +From 864e551505bb816f3dc8a3bd1b065328ba7b5d65 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Mon, 17 Apr 2023 19:52:44 +0800 +Subject: [PATCH 051/140] Revert JDK-8227680: FastJNIAccessors: Check for JVMTI + field access event requests at runtime + +--- + .../cpu/riscv/jniFastGetField_riscv.cpp | 32 ++++--------------- + 1 file changed, 6 insertions(+), 26 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp +index 814ed23e471..f6e7351c4fc 100644 +--- a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp ++++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp +@@ -83,28 +83,10 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + // An even value means there are no ongoing safepoint operations + __ andi(t0, rcounter, 1); + __ bnez(t0, slow); +- +- if (JvmtiExport::can_post_field_access()) { +- // Using barrier to order wrt. JVMTI check and load of result. +- __ membar(MacroAssembler::LoadLoad); +- +- // Check to see if a field access watch has been set before we +- // take the fast path. +- int32_t offset2; +- __ la_patchable(result, +- ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), +- offset2); +- __ lwu(result, Address(result, offset2)); +- __ bnez(result, slow); +- +- __ mv(robj, c_rarg1); +- } else { +- // Using address dependency to order wrt. load of result. +- __ xorr(robj, c_rarg1, rcounter); +- __ xorr(robj, robj, rcounter); // obj, since +- // robj ^ rcounter ^ rcounter == robj +- // robj is address dependent on rcounter. +- } ++ __ xorr(robj, c_rarg1, rcounter); ++ __ xorr(robj, robj, rcounter); // obj, since ++ // robj ^ rcounter ^ rcounter == robj ++ // robj is address dependent on rcounter. + + // Both robj and t0 are clobbered by try_resolve_jobject_in_native. + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); +@@ -137,10 +119,8 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + default: ShouldNotReachHere(); + } + +- // Using acquire: Order JVMTI check and load of result wrt. succeeding check +- // (LoadStore for volatile field). +- __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); +- ++ __ xorr(rcounter_addr, rcounter_addr, result); ++ __ xorr(rcounter_addr, rcounter_addr, result); + __ lw(t0, safepoint_counter_addr); + __ bne(rcounter, t0, slow); + + +From b822b64cb6be38cb7806fda3d56675674557c163 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 18 Apr 2023 16:34:32 +0800 +Subject: [PATCH 052/140] Revert JDK-8249768: Move static oops and + NullPointerException oops from Universe into OopStorage + +--- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index 1e23fb4dc09..fbcdcf60d9c 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -411,7 +411,6 @@ void TemplateTable::fast_aldc(bool wide) + int32_t offset = 0; + __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset); + __ ld(tmp, Address(rarg, offset)); +- __ resolve_oop_handle(tmp); + __ bne(result, tmp, notNull); + __ mv(result, zr); // NULL object reference + __ bind(notNull); + +From c82c482aa065ffd39eab6b87a0ad6c6cbca1e3af Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 18 Apr 2023 16:58:23 +0800 +Subject: [PATCH 053/140] Revert JDK-8217998: Remove method_type field + associated with the appendix field of an indy or method handle call + +--- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index fbcdcf60d9c..158294f7436 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -3192,6 +3192,7 @@ void TemplateTable::prepare_invoke(int byte_no, + // since the parameter_size includes it. + __ push_reg(x9); + __ mv(x9, index); ++ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); + __ load_resolved_reference_at_index(index, x9); + __ pop_reg(x9); + __ push_reg(index); // push appendix (MethodType, CallSite, etc.) + +From 3e50d62dd06c3f8bc586e3ab2b00f2f587d950bf Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:04:31 +0800 +Subject: [PATCH 054/140] Revert JDK-8277372: Add getters for BOT and card + table members + +--- + src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 4 ++-- + .../riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp | 6 +++--- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +index 1c46b3947d3..6b75bf63781 100644 +--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +@@ -215,7 +215,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + ExternalAddress cardtable((address) ct->byte_map_base()); + const Register card_addr = tmp; + +- __ srli(card_addr, store_addr, CardTable::card_shift()); ++ __ srli(card_addr, store_addr, CardTable::card_shift); + + // get the address of the card + __ load_byte_map_base(tmp2); +@@ -437,7 +437,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* + assert_different_registers(card_offset, byte_map_base, t0); + + __ load_parameter(0, card_offset); +- __ srli(card_offset, card_offset, CardTable::card_shift()); ++ __ srli(card_offset, card_offset, CardTable::card_shift); + __ load_byte_map_base(byte_map_base); + + // Convert card offset into an address in card_addr +diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +index a419f92b5f6..868d022ac74 100644 +--- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +@@ -41,7 +41,7 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob + BarrierSet* bs = BarrierSet::barrier_set(); + assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); + +- __ srli(obj, obj, CardTable::card_shift()); ++ __ srli(obj, obj, CardTable::card_shift); + + assert(CardTable::dirty_card_val() == 0, "must be"); + +@@ -74,8 +74,8 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl + __ shadd(end, count, start, count, LogBytesPerHeapOop); + __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive + +- __ srli(start, start, CardTable::card_shift()); +- __ srli(end, end, CardTable::card_shift()); ++ __ srli(start, start, CardTable::card_shift); ++ __ srli(end, end, CardTable::card_shift); + __ sub(count, end, start); // number of bytes to copy + + __ load_byte_map_base(tmp); + +From 6a81a820e6c08cfdd8e29a835e953dabffdca98a Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Wed, 19 Apr 2023 11:30:58 +0800 +Subject: [PATCH 055/140] Revert JDK-8260941: Remove the conc_scan parameter + for CardTable + +--- + .../shared/cardTableBarrierSetAssembler_riscv.cpp | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +index 868d022ac74..a476e5ec84d 100644 +--- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +@@ -41,6 +41,9 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob + BarrierSet* bs = BarrierSet::barrier_set(); + assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); + ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ + __ srli(obj, obj, CardTable::card_shift); + + assert(CardTable::dirty_card_val() == 0, "must be"); +@@ -56,6 +59,9 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob + __ sb(zr, Address(tmp)); + __ bind(L_already_dirty); + } else { ++ if (ct->scanned_concurrently()) { ++ __ membar(MacroAssembler::StoreStore); ++ } + __ sb(zr, Address(tmp)); + } + } +@@ -66,6 +72,10 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl + assert_different_registers(start, tmp); + assert_different_registers(count, tmp); + ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ + Label L_loop, L_done; + const Register end = count; + +@@ -80,6 +90,9 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl + + __ load_byte_map_base(tmp); + __ add(start, start, tmp); ++ if (ct->scanned_concurrently()) { ++ __ membar(MacroAssembler::StoreStore); ++ } + + __ bind(L_loop); + __ add(tmp, start, count); + +From 24688cb665b16331b491bed2566dc97582a3d73c Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Wed, 19 Apr 2023 11:32:54 +0800 +Subject: [PATCH 056/140] Revert JDK-8220301: Remove jbyte use in CardTable + +Note: An assertion in `CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier` is removed. See the jdk11u backport for AArch64: https://mail.openjdk.org/pipermail/jdk-updates-dev/2019-August/001746.html +--- + src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 3 +++ + .../cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp | 1 + + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/riscv.ad | 3 +-- + 4 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +index 6b75bf63781..b6786c6b327 100644 +--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +@@ -196,6 +196,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; +@@ -213,6 +214,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + // storing region crossing non-NULL, is card already dirty? + + ExternalAddress cardtable((address) ct->byte_map_base()); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + const Register card_addr = tmp; + + __ srli(card_addr, store_addr, CardTable::card_shift); +@@ -419,6 +421,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; +diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +index a476e5ec84d..81d47d61d4c 100644 +--- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +@@ -43,6 +43,7 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob + + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + __ srli(obj, obj, CardTable::card_shift); + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index e557a134b5b..6e4d22db40f 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -2719,7 +2719,7 @@ void MacroAssembler::get_thread(Register thread) { + } + + void MacroAssembler::load_byte_map_base(Register reg) { +- CardTable::CardValue* byte_map_base = ++ jbyte *byte_map_base = + ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); + li(reg, (uint64_t)byte_map_base); + } +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index c2a0be140e9..ca6a232e1e0 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -2735,8 +2735,7 @@ operand immByteMapBase() + %{ + // Get base of card map + predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) && +- (CardTable::CardValue*)n->get_ptr() == +- ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); ++ (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); + match(ConP); + + op_cost(0); + +From 6ee27261d406342a5378d4a404319866a9bae804 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Wed, 19 Apr 2023 11:51:20 +0800 +Subject: [PATCH 057/140] Revert JDK-8230486: + G1BarrierSetAssembler::g1_write_barrier_post unnecessarily pushes/pops + new_val + +--- + src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +index b6786c6b327..d724876ec3a 100644 +--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +@@ -250,7 +250,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + + __ bind(runtime); + // save the live input values +- RegSet saved = RegSet::of(store_addr); ++ RegSet saved = RegSet::of(store_addr, new_val); + __ push_reg(saved, sp); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + __ pop_reg(saved, sp); + +From 57067a358ffc1b54edfb305549bd460b0fca47f0 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Fri, 21 Apr 2023 12:10:22 +0800 +Subject: [PATCH 058/140] Revert JDK-8242449: AArch64: r27 can be allocated in + CompressedOops mode + +--- + src/hotspot/cpu/riscv/riscv.ad | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index ca6a232e1e0..e3f976faa0d 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -4846,6 +4846,8 @@ instruct storeN(iRegN src, memory mem) + instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) + %{ + match(Set mem (StoreN mem zero)); ++ predicate(Universe::narrow_oop_base() == NULL && ++ Universe::narrow_klass_base() == NULL); + + ins_cost(STORE_COST); + format %{ "sw rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %} + +From 0db520768d4d268a9dc641e301df45653c52f6eb Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 23 Apr 2023 14:59:09 +0800 +Subject: [PATCH 059/140] A fix for interpreter frame verification code, + skipping the locals check if there is no locals. See one of the additional + commits in JDK-8286301, the RISC-V loom port. + +--- + src/hotspot/cpu/riscv/frame_riscv.cpp | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index d03adc0bff4..13c482b610a 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -571,7 +571,16 @@ bool frame::is_interpreted_frame_valid(JavaThread* thread) const { + + // validate locals + address locals = (address) *interpreter_frame_locals_addr(); +- if (locals > thread->stack_base() || locals < (address) fp()) { ++ if (locals > thread->stack_base()) { ++ return false; ++ } ++ ++ if (m->max_locals() > 0 && locals < (address) fp()) { ++ // fp in interpreter frame on RISC-V is higher than that on AArch64, ++ // pointing to sender_sp and sender_sp-2 relatively. ++ // On RISC-V, if max_locals is 0, the 'locals' pointer may be below fp, ++ // pointing to sender_sp-1 (with one padding slot). ++ // So we verify the 'locals' pointer only if max_locals > 0. + return false; + } + + +From 795da5afe59658b4d89cd8501b4f4ec56471b14c Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 11 Apr 2023 11:45:40 +0800 +Subject: [PATCH 060/140] ShenandoahGC adaptations on JDK11 for RISC-V backend + +--- + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 4 +- + .../c1/shenandoahBarrierSetC1_riscv.cpp | 2 +- + .../shenandoahBarrierSetAssembler_riscv.cpp | 229 +++++++++--------- + .../shenandoahBarrierSetAssembler_riscv.hpp | 15 +- + .../riscv/gc/shenandoah/shenandoah_riscv64.ad | 88 ------- + src/hotspot/cpu/riscv/riscv.ad | 6 +- + .../templateInterpreterGenerator_riscv.cpp | 15 +- + 7 files changed, 146 insertions(+), 213 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 6a961ee2307..90c4af5d3b0 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -1817,10 +1817,12 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) { + + + void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { +- if (patch_code != lir_patch_none) { ++#if INCLUDE_SHENANDOAHGC ++ if (UseShenandoahGC && patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } ++#endif + + assert(patch_code == lir_patch_none, "Patch code not supported"); + LIR_Address* adr = addr->as_address_ptr(); +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp +index cd568cc723f..d19f5b859ce 100644 +--- a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp +@@ -103,7 +103,7 @@ LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRIt + __ xchg(access.resolved_addr(), value_opr, result, tmp); + + if (access.is_oop()) { +- result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators()); ++ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0)); + LIR_Opr tmp_opr = gen->new_register(type); + __ move(result, tmp_opr); + result = tmp_opr; +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +index 84e1205bc25..b8534c52e77 100644 +--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +@@ -27,7 +27,7 @@ + #include "gc/shenandoah/shenandoahBarrierSet.hpp" + #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" + #include "gc/shenandoah/shenandoahForwarding.hpp" +-#include "gc/shenandoah/shenandoahHeap.inline.hpp" ++#include "gc/shenandoah/shenandoahHeap.hpp" + #include "gc/shenandoah/shenandoahHeapRegion.hpp" + #include "gc/shenandoah/shenandoahRuntime.hpp" + #include "gc/shenandoah/shenandoahThreadLocalData.hpp" +@@ -44,6 +44,8 @@ + + #define __ masm-> + ++address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; ++ + void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs) { + if (is_oop) { +@@ -116,10 +118,10 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, + Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); + + // Is marking active? +- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ if (in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 4) { + __ lwu(tmp, in_progress); + } else { +- assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ assert(in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lbu(tmp, in_progress); + } + __ beqz(tmp, done); +@@ -225,37 +227,21 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb + __ pop_reg(saved_regs, sp); + } + +-void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, +- Register dst, +- Address load_addr, +- DecoratorSet decorators) { ++void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, ++ Register dst, ++ Address load_addr) { + assert(ShenandoahLoadRefBarrier, "Should be enabled"); + assert(dst != t1 && load_addr.base() != t1, "need t1"); + assert_different_registers(load_addr.base(), t0, t1); + +- bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); +- bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); +- bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); +- bool is_native = ShenandoahBarrierSet::is_native_access(decorators); +- bool is_narrow = UseCompressedOops && !is_native; +- +- Label heap_stable, not_cset; ++ Label done; + __ enter(); + Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + __ lbu(t1, gc_state); + + // Check for heap stability +- if (is_strong) { +- __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED); +- __ beqz(t1, heap_stable); +- } else { +- Label lrb; +- __ andi(t0, t1, ShenandoahHeap::WEAK_ROOTS); +- __ bnez(t0, lrb); +- __ andi(t0, t1, ShenandoahHeap::HAS_FORWARDED); +- __ beqz(t0, heap_stable); +- __ bind(lrb); +- } ++ __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED); ++ __ beqz(t1, done); + + // use x11 for load address + Register result_dst = dst; +@@ -270,43 +256,12 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, + __ la(x11, load_addr); + __ mv(x10, dst); + +- // Test for in-cset +- if (is_strong) { +- __ li(t1, (uint64_t)ShenandoahHeap::in_cset_fast_test_addr()); +- __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint()); +- __ add(t1, t1, t0); +- __ lbu(t1, Address(t1)); +- __ andi(t0, t1, 1); +- __ beqz(t0, not_cset); +- } ++ __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); + +- __ push_call_clobbered_registers(); +- if (is_strong) { +- if (is_narrow) { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); +- } else { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); +- } +- } else if (is_weak) { +- if (is_narrow) { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); +- } else { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); +- } +- } else { +- assert(is_phantom, "only remaining strength"); +- assert(!is_narrow, "phantom access cannot be narrow"); +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); +- } +- __ jalr(ra); +- __ mv(t0, x10); +- __ pop_call_clobbered_registers(); +- __ mv(x10, t0); +- __ bind(not_cset); + __ mv(result_dst, x10); + __ pop_reg(saved_regs, sp); + +- __ bind(heap_stable); ++ __ bind(done); + __ leave(); + } + +@@ -320,6 +275,15 @@ void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register ds + } + } + ++void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) { ++ if (ShenandoahLoadRefBarrier) { ++ Label is_null; ++ __ beqz(dst, is_null); ++ load_reference_barrier_not_null(masm, dst, load_addr); ++ __ bind(is_null); ++ } ++} ++ + // + // Arguments: + // +@@ -363,7 +327,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, + + BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + +- load_reference_barrier(masm, dst, src, decorators); ++ load_reference_barrier(masm, dst, src); + + if (dst != result_dst) { + __ mv(result_dst, dst); +@@ -555,7 +519,7 @@ void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, Shen + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { +- ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */); ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); + } + __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); + ce->store_parameter(stub->pre_val()->as_register(), 0); +@@ -568,12 +532,6 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble + ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + __ bind(*stub->entry()); + +- DecoratorSet decorators = stub->decorators(); +- bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); +- bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); +- bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); +- bool is_native = ShenandoahBarrierSet::is_native_access(decorators); +- + Register obj = stub->obj()->as_register(); + Register res = stub->result()->as_register(); + Register addr = stub->addr()->as_pointer_register(); +@@ -587,30 +545,32 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble + __ mv(res, obj); + } + +- if (is_strong) { +- // Check for object in cset. +- __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); +- __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); +- __ add(tmp2, tmp2, tmp1); +- __ lbu(tmp2, Address(tmp2)); +- __ beqz(tmp2, *stub->continuation(), true /* is_far */); +- } ++ // Check for null. ++ __ beqz(res, *stub->continuation(), /* is_far */ true); ++ ++ // Check for object in cset. ++ __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); ++ __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); ++ __ add(t0, tmp2, tmp1); ++ __ lb(tmp2, Address(t0)); ++ __ beqz(tmp2, *stub->continuation(), /* is_far */ true); ++ ++ // Check if object is already forwarded. ++ Label slow_path; ++ __ ld(tmp1, Address(res, oopDesc::mark_offset_in_bytes())); ++ __ xori(tmp1, tmp1, -1); ++ __ andi(t0, tmp1, markOopDesc::lock_mask_in_place); ++ __ bnez(t0, slow_path); ++ ++ // Decode forwarded object. ++ __ ori(tmp1, tmp1, markOopDesc::marked_value); ++ __ xori(res, tmp1, -1); ++ __ j(*stub->continuation()); + ++ __ bind(slow_path); + ce->store_parameter(res, 0); + ce->store_parameter(addr, 1); +- +- if (is_strong) { +- if (is_native) { +- __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin())); +- } else { +- __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin())); +- } +- } else if (is_weak) { +- __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin())); +- } else { +- assert(is_phantom, "only remaining strength"); +- __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin())); +- } ++ __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); + + __ j(*stub->continuation()); + } +@@ -664,8 +624,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss + __ epilogue(); + } + +-void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, +- DecoratorSet decorators) { ++void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("shenandoah_load_reference_barrier", false); + // arg0 : object to be resolved + +@@ -673,31 +632,10 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s + __ load_parameter(0, x10); + __ load_parameter(1, x11); + +- bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); +- bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); +- bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); +- bool is_native = ShenandoahBarrierSet::is_native_access(decorators); +- if (is_strong) { +- if (is_native) { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); +- } else { +- if (UseCompressedOops) { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); +- } else { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); +- } +- } +- } else if (is_weak) { +- assert(!is_native, "weak must not be called off-heap"); +- if (UseCompressedOops) { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); +- } else { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); +- } ++ if (UseCompressedOops) { ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); + } else { +- assert(is_phantom, "only remaining strength"); +- assert(is_native, "phantom must only be called off-heap"); +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_phantom); ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); + } + __ jalr(ra); + __ mv(t0, x10); +@@ -710,3 +648,68 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s + #undef __ + + #endif // COMPILER1 ++ ++address ShenandoahBarrierSetAssembler::shenandoah_lrb() { ++ assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); ++ return _shenandoah_lrb; ++} ++ ++#define __ cgen->assembler()-> ++ ++// Shenandoah load reference barrier. ++// ++// Input: ++// x10: OOP to evacuate. Not null. ++// x11: load address ++// ++// Output: ++// x10: Pointer to evacuated OOP. ++// ++// Trash t0 t1 Preserve everything else. ++address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { ++ __ align(6); ++ StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); ++ address start = __ pc(); ++ ++ Label slow_path; ++ __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr()); ++ __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint()); ++ __ add(t1, t1, t0); ++ __ lbu(t1, Address(t1, 0)); ++ __ andi(t0, t1, 1); ++ __ bnez(t0, slow_path); ++ __ ret(); ++ ++ __ bind(slow_path); ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ push_call_clobbered_registers(); ++ ++ if (UseCompressedOops) { ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); ++ } else { ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); ++ } ++ __ jalr(ra); ++ __ mv(t0, x10); ++ __ pop_call_clobbered_registers(); ++ __ mv(x10, t0); ++ ++ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ __ ret(); ++ ++ return start; ++} ++ ++#undef __ ++ ++void ShenandoahBarrierSetAssembler::barrier_stubs_init() { ++ if (ShenandoahLoadRefBarrier) { ++ int stub_code_size = 2048; ++ ResourceMark rm; ++ BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); ++ CodeBuffer buf(bb); ++ StubCodeGenerator cgen(&buf); ++ _shenandoah_lrb = generate_shenandoah_lrb(&cgen); ++ } ++} +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp +index a705f497667..5d75035e9d4 100644 +--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp +@@ -40,6 +40,8 @@ class StubCodeGenerator; + class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { + private: + ++ static address _shenandoah_lrb; ++ + void satb_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, +@@ -57,17 +59,22 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { + + void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg); + void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg); +- void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators); ++ void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr); ++ void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr); ++ ++ address generate_shenandoah_lrb(StubCodeGenerator* cgen); + + public: + ++ static address shenandoah_lrb(); ++ + void iu_barrier(MacroAssembler* masm, Register dst, Register tmp); + + #ifdef COMPILER1 + void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); + void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); + void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); +- void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators); ++ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm); + #endif + + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, +@@ -81,8 +88,10 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath); + +- void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, ++ virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, + Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); ++ ++ virtual void barrier_stubs_init(); + }; + + #endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad +index 6c855f23c2a..bab407a8b76 100644 +--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad +@@ -176,48 +176,6 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva + ins_pipe(pipe_slow); + %} + +-instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ +- predicate(needs_acquiring_load_reserved(n)); +- match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); +- ins_cost(10 * DEFAULT_COST); +- +- effect(TEMP_DEF res, TEMP tmp, KILL cr); +- format %{ +- "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah" +- %} +- +- ins_encode %{ +- Register tmp = $tmp$$Register; +- __ mv(tmp, $oldval$$Register); +- ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, +- Assembler::aq /* acquire */, Assembler::rl /* release */, +- true /* is_cae */, $res$$Register); +- %} +- +- ins_pipe(pipe_slow); +-%} +- +-instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ +- predicate(needs_acquiring_load_reserved(n)); +- match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); +- ins_cost(10 * DEFAULT_COST); +- +- effect(TEMP_DEF res, TEMP tmp, KILL cr); +- format %{ +- "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah" +- %} +- +- ins_encode %{ +- Register tmp = $tmp$$Register; +- __ mv(tmp, $oldval$$Register); +- ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, +- Assembler::aq /* acquire */, Assembler::rl /* release */, +- true /* is_cae */, $res$$Register); +- %} +- +- ins_pipe(pipe_slow); +-%} +- + instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); +@@ -237,49 +195,3 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva + + ins_pipe(pipe_slow); + %} +- +-instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ +- predicate(needs_acquiring_load_reserved(n)); +- match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); +- ins_cost(10 * DEFAULT_COST); +- +- effect(TEMP tmp, KILL cr); +- format %{ +- "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah" +- "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" +- %} +- +- ins_encode %{ +- Register tmp = $tmp$$Register; +- __ mv(tmp, $oldval$$Register); // Must not clobber oldval. +- // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop +- ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, +- Assembler::aq /* acquire */, Assembler::rl /* release */, +- false /* is_cae */, $res$$Register); +- %} +- +- ins_pipe(pipe_slow); +-%} +- +-instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ +- predicate(needs_acquiring_load_reserved(n)); +- match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); +- ins_cost(10 * DEFAULT_COST); +- +- effect(TEMP tmp, KILL cr); +- format %{ +- "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah" +- "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" +- %} +- +- ins_encode %{ +- Register tmp = $tmp$$Register; +- __ mv(tmp, $oldval$$Register); // Must not clobber oldval. +- // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop +- ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, +- Assembler::aq /* acquire */, Assembler::rl /* release */, +- false /* is_cae */, $res$$Register); +- %} +- +- ins_pipe(pipe_slow); +-%} +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index e3f976faa0d..a6061de7a33 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -828,8 +828,10 @@ bool is_CAS(int opcode, bool maybe_volatile) + case Op_CompareAndSwapL: + case Op_CompareAndSwapP: + case Op_CompareAndSwapN: ++#if INCLUDE_SHENANDOAHGC + case Op_ShenandoahCompareAndSwapP: + case Op_ShenandoahCompareAndSwapN: ++#endif + case Op_CompareAndSwapB: + case Op_CompareAndSwapS: + case Op_GetAndSetI: +@@ -851,10 +853,6 @@ bool is_CAS(int opcode, bool maybe_volatile) + case Op_WeakCompareAndSwapL: + case Op_WeakCompareAndSwapP: + case Op_WeakCompareAndSwapN: +- case Op_ShenandoahWeakCompareAndSwapP: +- case Op_ShenandoahWeakCompareAndSwapN: +- case Op_ShenandoahCompareAndExchangeP: +- case Op_ShenandoahCompareAndExchangeN: + return maybe_volatile; + default: + return false; +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index a07dea35b73..5a87c687cf7 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -765,9 +765,18 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { + __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize)); + + // Get mirror and store it in the frame as GC root for this Method* +- __ load_mirror(t2, xmethod); +- __ sd(zr, Address(sp, 5 * wordSize)); +- __ sd(t2, Address(sp, 4 * wordSize)); ++#if INCLUDE_SHENANDOAHGC ++ if (UseShenandoahGC) { ++ __ load_mirror(x28, xmethod); ++ __ sd(zr, Address(sp, 5 * wordSize)); ++ __ sd(x28, Address(sp, 4 * wordSize)); ++ } else ++#endif ++ { ++ __ load_mirror(t2, xmethod); ++ __ sd(zr, Address(sp, 5 * wordSize)); ++ __ sd(t2, Address(sp, 4 * wordSize)); ++ } + + __ ld(xcpool, Address(xmethod, Method::const_offset())); + __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset())); + +From d8b14fd5e6455b47cfcb02d13c0c24c74e824570 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 14:42:07 +0800 +Subject: [PATCH 061/140] Revert JDK-8248404: AArch64: Remove uses of long and + unsigned long + +--- + src/hotspot/cpu/riscv/assembler_riscv.hpp | 19 +++++++++++++------ + .../cpu/riscv/macroAssembler_riscv.cpp | 6 ------ + .../cpu/riscv/macroAssembler_riscv.hpp | 13 ++++++++----- + 3 files changed, 21 insertions(+), 17 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp +index 51aa052a0c7..31aeeb9b425 100644 +--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp +@@ -183,13 +183,20 @@ class Address { + : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { } + Address(Register r) + : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { } +- +- template::value)> +- Address(Register r, T o) +- : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) {} +- ++ Address(Register r, int o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, long o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, long long o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, unsigned int o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, unsigned long o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, unsigned long long o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } + Address(Register r, ByteSize disp) +- : Address(r, in_bytes(disp)) {} ++ : Address(r, in_bytes(disp)) { } + Address(address target, RelocationHolder const& rspec) + : _base(noreg), + _index(noreg), +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 6e4d22db40f..b95f69cfcda 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1364,12 +1364,6 @@ void MacroAssembler::mv(Register Rd, Address dest) { + movptr(Rd, dest.target()); + } + +-void MacroAssembler::mv(Register Rd, address addr) { +- // Here in case of use with relocation, use fix length instruciton +- // movptr instead of li +- movptr(Rd, addr); +-} +- + void MacroAssembler::mv(Register Rd, RegisterOrConstant src) { + if (src.is_register()) { + mv(Rd, src.as_register()); +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index c1ffa120774..76b2716659b 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -543,15 +543,18 @@ class MacroAssembler: public Assembler { + } + + // mv +- template::value)> +- inline void mv(Register Rd, T o) { +- li(Rd, (int64_t)o); +- } ++ void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); } ++ ++ inline void mv(Register Rd, int imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, long imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, long long imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, unsigned int imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, unsigned long imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); } + + inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } + + void mv(Register Rd, Address dest); +- void mv(Register Rd, address addr); + void mv(Register Rd, RegisterOrConstant src); + + // logic + +From 94c1c9c01e61d0cb7c32596ef19b347c32406546 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 16:54:36 +0800 +Subject: [PATCH 062/140] Revert JDK-8280503: Use allStatic.hpp instead of + allocation.hpp where possible + +--- + src/hotspot/cpu/riscv/bytes_riscv.hpp | 2 -- + src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 1 - + 2 files changed, 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp +index 23d982f9abd..f60e0e38ae8 100644 +--- a/src/hotspot/cpu/riscv/bytes_riscv.hpp ++++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp +@@ -27,8 +27,6 @@ + #ifndef CPU_RISCV_BYTES_RISCV_HPP + #define CPU_RISCV_BYTES_RISCV_HPP + +-#include "memory/allStatic.hpp" +- + class Bytes: AllStatic { + public: + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering +diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp +index 83ffcc55d83..bc4e5758256 100644 +--- a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp ++++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp +@@ -27,7 +27,6 @@ + #define CPU_RISCV_JNITYPES_RISCV_HPP + + #include "jni.h" +-#include "memory/allStatic.hpp" + #include "oops/oop.hpp" + + // This file holds platform-dependent routines used to write primitive jni + +From 49e6399009b51edafa6904164528e1d051aeae6c Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 17:07:31 +0800 +Subject: [PATCH 063/140] Revert JDK-8276453: Undefined behavior in C1 + LIR_OprDesc causes SEGV in fastdebug build + +--- + src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 4 ++-- + src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp | 4 ++-- + src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 1 + + 3 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +index af7bd067f33..6057d43296b 100644 +--- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +@@ -58,7 +58,7 @@ RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) + } + + RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) +- : _index(index), _array(), _throw_index_out_of_bounds_exception(true) { ++ : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); + } +@@ -83,7 +83,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) { + if (_throw_index_out_of_bounds_exception) { + stub_id = Runtime1::throw_index_exception_id; + } else { +- assert(_array != LIR_Opr::nullOpr(), "sanity"); ++ assert(_array != NULL, "sanity"); + __ mv(t1, _array->as_pointer_register()); + stub_id = Runtime1::throw_range_check_failed_id; + } +diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp +index 172031941b2..1f8b2b55100 100644 +--- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp +@@ -156,8 +156,8 @@ LIR_Opr FrameMap::long11_opr; + LIR_Opr FrameMap::fpu10_float_opr; + LIR_Opr FrameMap::fpu10_double_opr; + +-LIR_Opr FrameMap::_caller_save_cpu_regs[] = {}; +-LIR_Opr FrameMap::_caller_save_fpu_regs[] = {}; ++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; ++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; + + //-------------------------------------------------------- + // FrameMap +diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +index c45a75b2301..227e7664225 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +@@ -206,6 +206,7 @@ LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { + break; + default: + ShouldNotReachHere(); ++ r = NULL; + } + return r; + } + +From b94bda9d1a2c12fa379f8fe813460c498344f543 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 17:19:19 +0800 +Subject: [PATCH 064/140] Revert JDK-8256205: Simplify compiler calling + convention handling + +--- + src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/riscv.ad | 25 +++++++++++++++++++ + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 11 ++------ + 4 files changed, 29 insertions(+), 11 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +index 6057d43296b..12980c12de6 100644 +--- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +@@ -290,7 +290,7 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) { + const int args_num = 5; + VMRegPair args[args_num]; + BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT }; +- SharedRuntime::java_calling_convention(signature, args, args_num); ++ SharedRuntime::java_calling_convention(signature, args, args_num, true); + + // push parameters + Register r[args_num]; +diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp +index 1f8b2b55100..682ebe82627 100644 +--- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp +@@ -314,7 +314,7 @@ void FrameMap::initialize() { + + VMRegPair regs; + BasicType sig_bt = T_OBJECT; +- SharedRuntime::java_calling_convention(&sig_bt, ®s, 1); ++ SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true); + receiver_opr = as_oop_opr(regs.first()->as_Register()); + + for (i = 0; i < nof_caller_save_fpu_regs; i++) { +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index a6061de7a33..1667994699f 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -2443,6 +2443,12 @@ frame %{ + // Stack alignment requirement + stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) + ++ // Number of stack slots between incoming argument block and the start of ++ // a new frame. The PROLOG must add this many slots to the stack. The ++ // EPILOG must remove this many slots. RISC-V needs two slots for ++ // return address and fp. ++ in_preserve_stack_slots(2 * VMRegImpl::slots_per_word); ++ + // Number of outgoing stack slots killed above the out_preserve_stack_slots + // for calls to C. Supports the var-args backing area for register parms. + varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt); +@@ -2461,6 +2467,25 @@ frame %{ + Compile::current()->fixed_slots()), + stack_alignment_in_slots())); + ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ calling_convention ++ %{ ++ // No difference between ingoing/outgoing just pass false ++ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); ++ %} ++ ++ c_calling_convention ++ %{ ++ // This is obviously always outgoing ++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length); ++ %} ++ + // Location of compiled Java return values. Same as C for now. + return_value + %{ +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index eaefcc2b595..411bddd2ace 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -233,7 +233,8 @@ static int reg2offset_out(VMReg r) { + + int SharedRuntime::java_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, +- int total_args_passed) { ++ int total_args_passed, ++ int is_outgoing) { + // Create the mapping between argument positions and + // registers. + static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { +@@ -2155,14 +2156,6 @@ void SharedRuntime::generate_deopt_blob() { + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); + } + +-// Number of stack slots between incoming argument block and the start of +-// a new frame. The PROLOG must add this many slots to the stack. The +-// EPILOG must remove this many slots. +-// RISCV needs two words for RA (return address) and FP (frame pointer). +-uint SharedRuntime::in_preserve_stack_slots() { +- return 2 * VMRegImpl::slots_per_word; +-} +- + uint SharedRuntime::out_preserve_stack_slots() { + return 0; + } + +From 3fc948472c4a0918b967646b45c8886103b839d2 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 17:27:57 +0800 +Subject: [PATCH 065/140] Revert JDK-8183574: Unify the is_power_of_2 functions + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 4 ++-- + src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 1 - + src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 3 +-- + src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 1 - + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1 - + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 1 - + src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 1 - + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 1 - + src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp | 1 - + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 - + 10 files changed, 3 insertions(+), 12 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp +index 4c1c13dc290..65d0eda62ef 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp +@@ -190,7 +190,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig + code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c); + break; + case lir_div: +- assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move lreg_lo to dreg if divisor is 1 + __ mv(dreg, lreg_lo); +@@ -208,7 +208,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig + } + break; + case lir_rem: +- assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move 0 to dreg if divisor is 1 + __ mv(dreg, zr); +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 90c4af5d3b0..9de89a3b026 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -41,7 +41,6 @@ + #include "oops/objArrayKlass.hpp" + #include "runtime/frame.inline.hpp" + #include "runtime/sharedRuntime.hpp" +-#include "utilities/powerOfTwo.hpp" + #include "vmreg_riscv.inline.hpp" + + #ifndef PRODUCT +diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +index 227e7664225..a9345158749 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +@@ -38,7 +38,6 @@ + #include "ci/ciTypeArrayKlass.hpp" + #include "runtime/sharedRuntime.hpp" + #include "runtime/stubRoutines.hpp" +-#include "utilities/powerOfTwo.hpp" + #include "vmreg_riscv.inline.hpp" + + #ifdef ASSERT +@@ -383,7 +382,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + // no need to do div-by-zero check if the divisor is a non-zero constant + if (c != 0) { need_zero_check = false; } + // do not load right if the divisor is a power-of-2 constant +- if (c > 0 && is_power_of_2(c)) { ++ if (c > 0 && is_power_of_2_long(c)) { + right.dont_load_item(); + } else { + right.load_item(); +diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +index 1f45fba9de0..fc88d5c180e 100644 +--- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +@@ -46,7 +46,6 @@ + #include "runtime/stubRoutines.hpp" + #include "runtime/vframe.hpp" + #include "runtime/vframeArray.hpp" +-#include "utilities/powerOfTwo.hpp" + #include "vmreg_riscv.inline.hpp" + + +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index f0c249f0d26..2fc0b00e2cb 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -44,7 +44,6 @@ + #include "runtime/safepointMechanism.hpp" + #include "runtime/sharedRuntime.hpp" + #include "runtime/thread.inline.hpp" +-#include "utilities/powerOfTwo.hpp" + + void InterpreterMacroAssembler::narrow(Register result) { + // Get method->_constMethod->_result_type +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index b95f69cfcda..41a415ef2cf 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -47,7 +47,6 @@ + #include "runtime/sharedRuntime.hpp" + #include "runtime/stubRoutines.hpp" + #include "runtime/thread.hpp" +-#include "utilities/powerOfTwo.hpp" + #ifdef COMPILER2 + #include "opto/compile.hpp" + #include "opto/node.hpp" +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 76b2716659b..dd39f67d507 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -30,7 +30,6 @@ + #include "asm/assembler.hpp" + #include "metaprogramming/enableIf.hpp" + #include "oops/compressedOops.hpp" +-#include "utilities/powerOfTwo.hpp" + + // MacroAssembler extends Assembler by frequently used macros. + // +diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +index 8392b768847..0c5b0e001ee 100644 +--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +@@ -45,7 +45,6 @@ + #include "runtime/stubRoutines.hpp" + #include "runtime/thread.inline.hpp" + #include "utilities/align.hpp" +-#include "utilities/powerOfTwo.hpp" + #ifdef COMPILER2 + #include "opto/runtime.hpp" + #endif +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index 5a87c687cf7..a10677bf650 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -51,7 +51,6 @@ + #include "runtime/timer.hpp" + #include "runtime/vframeArray.hpp" + #include "utilities/debug.hpp" +-#include "utilities/powerOfTwo.hpp" + #include + + #ifndef PRODUCT +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index 158294f7436..2a92fb9dd49 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -44,7 +44,6 @@ + #include "runtime/sharedRuntime.hpp" + #include "runtime/stubRoutines.hpp" + #include "runtime/synchronizer.hpp" +-#include "utilities/powerOfTwo.hpp" + + #define __ _masm-> + + +From 31b18aa6a29b83e2cae7ea76c5d4759b2596eca0 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 17:34:39 +0800 +Subject: [PATCH 066/140] Revert JDK-8276976: Rename LIR_OprDesc to LIR_Opr + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/c1_LIR_riscv.cpp | 14 +++++++------- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 9de89a3b026..70ee6295bfb 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -1261,7 +1261,7 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + assert(op->addr()->is_address(), "what else?"); + LIR_Address* addr_ptr = op->addr()->as_address_ptr(); + assert(addr_ptr->disp() == 0, "need 0 disp"); +- assert(addr_ptr->index() == LIR_Opr::illegalOpr(), "need 0 index"); ++ assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); + addr = as_reg(addr_ptr->base()); + } + Register newval = as_reg(op->new_value()); +diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp +index 5f1c394ab3d..0317ed9003e 100644 +--- a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp +@@ -27,22 +27,22 @@ + #include "asm/register.hpp" + #include "c1/c1_LIR.hpp" + +-FloatRegister LIR_Opr::as_float_reg() const { ++FloatRegister LIR_OprDesc::as_float_reg() const { + return as_FloatRegister(fpu_regnr()); + } + +-FloatRegister LIR_Opr::as_double_reg() const { ++FloatRegister LIR_OprDesc::as_double_reg() const { + return as_FloatRegister(fpu_regnrLo()); + } + + // Reg2 unused. + LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { + assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); +- return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) | +- (reg1 << LIR_Opr::reg2_shift) | +- LIR_Opr::double_type | +- LIR_Opr::fpu_register | +- LIR_Opr::double_size); ++ return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | ++ (reg1 << LIR_OprDesc::reg2_shift) | ++ LIR_OprDesc::double_type | ++ LIR_OprDesc::fpu_register | ++ LIR_OprDesc::double_size); + } + + #ifndef PRODUCT + +From 2e64fa47eddc271d32b136ace4f062cfb9648b25 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 17:39:16 +0800 +Subject: [PATCH 067/140] Revert JDK-8269672: C1: Remove unaligned move on all + architectures + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 8 +++++--- + .../cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 2 +- + 2 files changed, 6 insertions(+), 4 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 70ee6295bfb..e29c0df5f8b 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -673,7 +673,7 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool po + } + } + +-void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) { ++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { + LIR_Address* to_addr = dest->as_address_ptr(); + // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src + Register compressed_src = t1; +@@ -795,7 +795,7 @@ void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { + reg2stack(temp, dest, dest->type(), false); + } + +-void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) { ++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) { + assert(src->is_address(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + +@@ -910,11 +910,13 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L + Label done; + move_op(opr2, result, type, lir_patch_none, NULL, + false, // pop_fpu_stack ++ false, // unaligned + false); // wide + __ j(done); + __ bind(label); + move_op(opr1, result, type, lir_patch_none, NULL, + false, // pop_fpu_stack ++ false, // unaligned + false); // wide + __ bind(done); + } +@@ -1866,7 +1868,7 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* arg + + void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { + if (dest->is_address() || src->is_address()) { +- move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /* wide */ false); ++ move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/ false, /* wide */ false); + } else { + ShouldNotReachHere(); + } +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +index d724876ec3a..bc847388f68 100644 +--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +@@ -340,7 +340,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { +- ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */); ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); + } + __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); + ce->store_parameter(stub->pre_val()->as_register(), 0); + +From 5f15abe61c700cbf59805530c52e8e558354d552 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 17:54:05 +0800 +Subject: [PATCH 068/140] Revert JDK-8264805: Remove the experimental + Ahead-of-Time Compiler + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp | 1 + + src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 4 ++-- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp +index 051328c3a8a..5c81f1c704c 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp +@@ -73,6 +73,7 @@ friend class ArrayCopyStub; + // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address) + _call_stub_size = 14 * NativeInstruction::instruction_size + + (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size), ++ _call_aot_stub_size = 0, + // See emit_exception_handler for detail + // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY) + _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller +diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +index 1cfc92b28fa..a29e5be9dbb 100644 +--- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp ++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +@@ -86,7 +86,7 @@ int CompiledStaticCall::reloc_to_interp_stub() { + } + + void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { +- address stub = find_stub(); ++ address stub = find_stub(false /* is_aot */); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { +@@ -138,7 +138,7 @@ void CompiledDirectStaticCall::verify() { + _call->verify_alignment(); + + // Verify stub. +- address stub = find_stub(); ++ address stub = find_stub(false /* is_aot */); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder + +From 4cfd20c7d163188a1a4e63ffaa19708e15be9d96 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 17:59:45 +0800 +Subject: [PATCH 069/140] Revert JDK-8277417: C1 LIR instruction for load-klass + +--- + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 30 ++++++++----------- + 1 file changed, 12 insertions(+), 18 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index e29c0df5f8b..49653d04d81 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -840,7 +840,14 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch + __ ld(dest->as_register(), as_Address(from_addr)); + break; + case T_ADDRESS: +- __ ld(dest->as_register(), as_Address(from_addr)); ++ // FIXME: OMG this is a horrible kludge. Any offset from an ++ // address that matches klass_offset_in_bytes() will be loaded ++ // as a word, not a long. ++ if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { ++ __ lwu(dest->as_register(), as_Address(from_addr)); ++ } else { ++ __ ld(dest->as_register(), as_Address(from_addr)); ++ } + break; + case T_INT: + __ lw(dest->as_register(), as_Address(from_addr)); +@@ -869,6 +876,10 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch + __ decode_heap_oop(dest->as_register()); + } + __ verify_oop(dest->as_register()); ++ } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { ++ if (UseCompressedClassPointers) { ++ __ decode_klass_not_null(dest->as_register()); ++ } + } + } + +@@ -1531,23 +1542,6 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { + __ bind(*op->stub()->continuation()); + } + +-void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { +- Register obj = op->obj()->as_pointer_register(); +- Register result = op->result_opr()->as_pointer_register(); +- +- CodeEmitInfo* info = op->info(); +- if (info != NULL) { +- add_debug_info_for_null_check_here(info); +- } +- +- if (UseCompressedClassPointers) { +- __ lwu(result, Address(obj, oopDesc::klass_offset_in_bytes())); +- __ decode_klass_not_null(result); +- } else { +- __ ld(result, Address(obj, oopDesc::klass_offset_in_bytes())); +- } +-} +- + void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); + +From eb4de6fc8f9b6192d16343382ebbe4035ce71702 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:09:31 +0800 +Subject: [PATCH 070/140] Revert JDK-8245957: Remove unused LIR_OpBranch::type + after SPARC port removal + +--- + src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +index a9345158749..2aba4f4974f 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +@@ -393,7 +393,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); +- __ branch(lir_cond_equal, new DivByZeroStub(info)); ++ __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info)); + } + + rlock_result(x); +@@ -467,7 +467,7 @@ void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0)); +- __ branch(lir_cond_equal, new DivByZeroStub(info)); ++ __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info)); + } + + LIR_Opr ill = LIR_OprFact::illegalOpr; +@@ -1055,9 +1055,9 @@ void LIRGenerator::do_If(If* x) { + profile_branch(x, cond); + move_to_phi(x->state()); + if (x->x()->type()->is_float_kind()) { +- __ branch(lir_cond(cond), x->tsux(), x->usux()); ++ __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux()); + } else { +- __ branch(lir_cond(cond), x->tsux()); ++ __ branch(lir_cond(cond), right->type(), x->tsux()); + } + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); + +From d34f25c618982d3ac79e6ab2a47b3a199434d01b Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:14:10 +0800 +Subject: [PATCH 071/140] Revert JDK-8266950: Remove vestigial support for + non-strict floating-point execution + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 4 ++++ + src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 7 ++++++- + 2 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp +index 65d0eda62ef..2a99d49c94b 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp +@@ -238,7 +238,9 @@ void LIR_Assembler::arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig + switch (code) { + case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_mul_strictfp: // fall through + case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_div_strictfp: // fall through + case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + default: + ShouldNotReachHere(); +@@ -251,7 +253,9 @@ void LIR_Assembler::arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig + switch (code) { + case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_mul_strictfp: // fall through + case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_div_strictfp: // fall through + case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + default: + ShouldNotReachHere(); +diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +index 2aba4f4974f..21ae066e9ab 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +@@ -360,7 +360,12 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { + right.load_item(); + + LIR_Opr reg = rlock(x); +- arithmetic_op_fpu(x->op(), reg, left.result(), right.result()); ++ LIR_Opr tmp = LIR_OprFact::illegalOpr; ++ if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) { ++ tmp = new_register(T_DOUBLE); ++ } ++ ++ arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp()); + + set_result(x, round_item(reg)); + } + +From 02c0a84d52417d4aeddbdd10c07df446ee45c5de Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:19:51 +0800 +Subject: [PATCH 072/140] Revert JDK-8276217: Harmonize StrictMath intrinsics + handling + +--- + src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +index 21ae066e9ab..f9242251491 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +@@ -651,16 +651,14 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + do_LibmIntrinsic(x); + break; + case vmIntrinsics::_dabs: // fall through +- case vmIntrinsics::_dsqrt: // fall through +- case vmIntrinsics::_dsqrt_strict: { ++ case vmIntrinsics::_dsqrt: { + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + LIR_Opr dst = rlock_result(x); + + switch (x->id()) { +- case vmIntrinsics::_dsqrt: // fall through +- case vmIntrinsics::_dsqrt_strict: { ++ case vmIntrinsics::_dsqrt: { + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + +From 8dbace163d42cbb41ff49463b34f8971437fe82f Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:35:08 +0800 +Subject: [PATCH 073/140] Revert JDK-8276209: Some call sites doesn't pass the + parameter 'size' to SharedRuntime::dtrace_object_alloc(_base) + +--- + src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +index fc88d5c180e..329df2e1ca7 100644 +--- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +@@ -1186,7 +1186,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); + save_live_registers(sasm); + +- __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)), c_rarg0); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0); + + restore_live_registers(sasm); + } +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index 2a92fb9dd49..ddc9498dddc 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -3577,7 +3577,7 @@ void TemplateTable::_new() { + SkipIfEqual skip(_masm, &DTraceAllocProbes, false); + // Trigger dtrace event for fastpath + __ push(atos); // save the return value +- __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)), x10); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10); + __ pop(atos); // restore the return value + } + __ j(done); + +From 8930b6049a5b6e31ec9409c167b0e58d24cf6821 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:38:51 +0800 +Subject: [PATCH 074/140] Revert JDK-8229838: Rename markOop files to markWord + +--- + src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 1 - + src/hotspot/cpu/riscv/frame_riscv.cpp | 1 - + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1 - + 3 files changed, 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +index e486f41948e..44ceccd8bd1 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -33,7 +33,6 @@ + #include "gc/shared/collectedHeap.hpp" + #include "interpreter/interpreter.hpp" + #include "oops/arrayOop.hpp" +-#include "oops/markWord.hpp" + #include "runtime/basicLock.hpp" + #include "runtime/biasedLocking.hpp" + #include "runtime/os.hpp" +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index 13c482b610a..050595389e9 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -29,7 +29,6 @@ + #include "interpreter/interpreter.hpp" + #include "memory/resourceArea.hpp" + #include "memory/universe.hpp" +-#include "oops/markWord.hpp" + #include "oops/method.hpp" + #include "oops/oop.inline.hpp" + #include "prims/methodHandles.hpp" +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 2fc0b00e2cb..006fe49b155 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -33,7 +33,6 @@ + #include "interpreter/interpreterRuntime.hpp" + #include "logging/log.hpp" + #include "oops/arrayOop.hpp" +-#include "oops/markWord.hpp" + #include "oops/method.hpp" + #include "oops/methodData.hpp" + #include "prims/jvmtiExport.hpp" + +From f11c5a2beca94c8248c30899fef90947d478e10c Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:42:33 +0800 +Subject: [PATCH 075/140] Revert JDK-8235673: [C1, C2] Split inlining control + flags + +--- + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +index fe46f7b21c8..fd25f8f9afd 100644 +--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +@@ -42,6 +42,7 @@ define_pd_global(bool, TieredCompilation, false); + define_pd_global(intx, CompileThreshold, 1500 ); + + define_pd_global(intx, OnStackReplacePercentage, 933 ); ++define_pd_global(intx, FreqInlineSize, 325 ); + define_pd_global(intx, NewSizeThreadIncrease, 4*K ); + define_pd_global(intx, InitialCodeCacheSize, 160*K); + define_pd_global(intx, ReservedCodeCacheSize, 32*M ); + +From 6908dc58f2c66ca6a5adf4444a7ec2a91a80b9c8 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:45:00 +0800 +Subject: [PATCH 076/140] Revert JDK-8262074: Consolidate the default value of + MetaspaceSize + +--- + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 + + src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 3 +++ + 2 files changed, 4 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +index fd25f8f9afd..1c55a23eecf 100644 +--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +@@ -53,6 +53,7 @@ define_pd_global(bool, ProfileInterpreter, false); + define_pd_global(intx, CodeCacheExpansionSize, 32*K ); + define_pd_global(uintx, CodeCacheMinBlockLength, 1); + define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++define_pd_global(uintx, MetaspaceSize, 12*M ); + define_pd_global(bool, NeverActAsServerClassMachine, true ); + define_pd_global(uint64_t, MaxRAM, 1ULL*G); + define_pd_global(bool, CICompileOSR, true ); +diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +index 53a41665f4b..d9e5fcc1bb0 100644 +--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +@@ -75,6 +75,9 @@ define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); + define_pd_global(uintx, CodeCacheMinBlockLength, 6); + define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + ++// Heap related flags ++define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); ++ + // Ergonomics related flags + define_pd_global(bool, NeverActAsServerClassMachine, false); + + +From a3e991b37781d90c822471b54ace915622bee0da Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:48:15 +0800 +Subject: [PATCH 077/140] Revert JDK-8246023: Obsolete LIRFillDelaySlot + +--- + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +index 1c55a23eecf..bd8d039de03 100644 +--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +@@ -60,6 +60,7 @@ define_pd_global(bool, CICompileOSR, true ); + #endif // !COMPILER2 + define_pd_global(bool, UseTypeProfile, false); + ++define_pd_global(bool, LIRFillDelaySlots, false); + define_pd_global(bool, OptimizeSinglePrecision, true ); + define_pd_global(bool, CSEArrayLength, false); + define_pd_global(bool, TwoOperandLIRForm, false); + +From 9f6082ae9810e6a26c6803cb37cce62297d15a74 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:50:27 +0800 +Subject: [PATCH 078/140] Revert JDK-8136414: Large performance penalty + declaring a method strictfp on strict-only platforms + +--- + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +index bd8d039de03..16a87b7aced 100644 +--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +@@ -59,6 +59,7 @@ define_pd_global(uint64_t, MaxRAM, 1ULL*G); + define_pd_global(bool, CICompileOSR, true ); + #endif // !COMPILER2 + define_pd_global(bool, UseTypeProfile, false); ++define_pd_global(bool, RoundFPResults, true ); + + define_pd_global(bool, LIRFillDelaySlots, false); + define_pd_global(bool, OptimizeSinglePrecision, true ); + +From fbf03fc61be068f7f7c8ca1ab3854cc05519c5a3 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:58:36 +0800 +Subject: [PATCH 079/140] Revert JDK-8251462: Simplify compilation policy + +--- + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 4 +- + src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 2 +- + src/hotspot/cpu/riscv/globals_riscv.hpp | 2 +- + .../templateInterpreterGenerator_riscv.cpp | 114 +++++++++--- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 176 ++++++++++++------ + 5 files changed, 210 insertions(+), 88 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +index 16a87b7aced..8f2f4e0e81d 100644 +--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +@@ -32,7 +32,7 @@ + // Sets the default values for platform dependent flags used by the client compiler. + // (see c1_globals.hpp) + +-#ifndef COMPILER2 ++#ifndef TIERED + define_pd_global(bool, BackgroundCompilation, true ); + define_pd_global(bool, InlineIntrinsics, true ); + define_pd_global(bool, PreferInterpreterNativeStubs, false); +@@ -57,7 +57,7 @@ define_pd_global(uintx, MetaspaceSize, 12*M ); + define_pd_global(bool, NeverActAsServerClassMachine, true ); + define_pd_global(uint64_t, MaxRAM, 1ULL*G); + define_pd_global(bool, CICompileOSR, true ); +-#endif // !COMPILER2 ++#endif // !TIERED + define_pd_global(bool, UseTypeProfile, false); + define_pd_global(bool, RoundFPResults, true ); + +diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +index d9e5fcc1bb0..6c301cdae04 100644 +--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +@@ -39,7 +39,7 @@ define_pd_global(bool, PreferInterpreterNativeStubs, false); + define_pd_global(bool, ProfileTraps, true); + define_pd_global(bool, UseOnStackReplacement, true); + define_pd_global(bool, ProfileInterpreter, true); +-define_pd_global(bool, TieredCompilation, COMPILER1_PRESENT(true) NOT_COMPILER1(false)); ++define_pd_global(bool, TieredCompilation, trueInTiered); + define_pd_global(intx, CompileThreshold, 10000); + + define_pd_global(intx, OnStackReplacePercentage, 140); +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index 50bbb6a77b8..b78f258a764 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -36,7 +36,7 @@ define_pd_global(bool, ImplicitNullChecks, true); // Generate code for im + define_pd_global(bool, TrapBasedNullChecks, false); + define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast + +-define_pd_global(uintx, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment. ++define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. + define_pd_global(intx, CodeEntryAlignment, 64); + define_pd_global(intx, OptoLoopAlignment, 16); + +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index a10677bf650..8aea4eca048 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -556,31 +556,81 @@ address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, + // + // xmethod: method + // +-void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) { ++void TemplateInterpreterGenerator::generate_counter_incr( ++ Label* overflow, ++ Label* profile_method, ++ Label* profile_method_continue) { + Label done; + // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. +- int increment = InvocationCounter::count_increment; +- Label no_mdo; +- if (ProfileInterpreter) { +- // Are we profiling? +- __ ld(x10, Address(xmethod, Method::method_data_offset())); +- __ beqz(x10, no_mdo); +- // Increment counter in the MDO +- const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) + +- in_bytes(InvocationCounter::counter_offset())); +- const Address mask(x10, in_bytes(MethodData::invoke_mask_offset())); +- __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow); +- __ j(done); ++ if (TieredCompilation) { ++ int increment = InvocationCounter::count_increment; ++ Label no_mdo; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(x10, Address(xmethod, Method::method_data_offset())); ++ __ beqz(x10, no_mdo); ++ // Increment counter in the MDO ++ const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(x10, in_bytes(MethodData::invoke_mask_offset())); ++ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow); ++ __ j(done); ++ } ++ __ bind(no_mdo); ++ // Increment counter in MethodCounters ++ const Address invocation_counter(t1, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ __ get_method_counters(xmethod, t1, done); ++ const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); ++ __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); ++ __ bind(done); ++ } else { // not TieredCompilation ++ const Address backedge_counter(t1, ++ MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset()); ++ const Address invocation_counter(t1, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ ++ __ get_method_counters(xmethod, t1, done); ++ ++ if (ProfileInterpreter) { // %%% Merge this into MethodData* ++ __ lwu(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset())); ++ __ addw(x11, x11, 1); ++ __ sw(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset())); ++ } ++ // Update standard invocation counters ++ __ lwu(x11, invocation_counter); ++ __ lwu(x10, backedge_counter); ++ ++ __ addw(x11, x11, InvocationCounter::count_increment); ++ __ andi(x10, x10, InvocationCounter::count_mask_value); ++ ++ __ sw(x11, invocation_counter); ++ __ addw(x10, x10, x11); // add both counters ++ ++ // profile_method is non-null only for interpreted method so ++ // profile_method != NULL == !native_call ++ ++ if (ProfileInterpreter && profile_method != NULL) { ++ // Test to see if we should create a method data oop ++ __ ld(t1, Address(xmethod, Method::method_counters_offset())); ++ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); ++ __ blt(x10, t1, *profile_method_continue); ++ ++ // if no method data exists, go to profile_method ++ __ test_method_data_pointer(t1, *profile_method); ++ } ++ ++ { ++ __ ld(t1, Address(xmethod, Method::method_counters_offset())); ++ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset()))); ++ __ bltu(x10, t1, done); ++ __ j(*overflow); ++ } ++ __ bind(done); + } +- __ bind(no_mdo); +- // Increment counter in MethodCounters +- const Address invocation_counter(t1, +- MethodCounters::invocation_counter_offset() + +- InvocationCounter::counter_offset()); +- __ get_method_counters(xmethod, t1, done); +- const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); +- __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); +- __ bind(done); + } + + void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { +@@ -977,7 +1027,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + // increment invocation count & check for overflow + Label invocation_counter_overflow; + if (inc_counter) { +- generate_counter_incr(&invocation_counter_overflow); ++ generate_counter_incr(&invocation_counter_overflow, NULL, NULL); + } + + Label continue_after_compile; +@@ -1389,8 +1439,15 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { + + // increment invocation count & check for overflow + Label invocation_counter_overflow; ++ Label profile_method; ++ Label profile_method_continue; + if (inc_counter) { +- generate_counter_incr(&invocation_counter_overflow); ++ generate_counter_incr(&invocation_counter_overflow, ++ &profile_method, ++ &profile_method_continue); ++ if (ProfileInterpreter) { ++ __ bind(profile_method_continue); ++ } + } + + Label continue_after_compile; +@@ -1427,6 +1484,15 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { + + // invocation counter overflow + if (inc_counter) { ++ if (ProfileInterpreter) { ++ // We have decided to profile this method in the interpreter ++ __ bind(profile_method); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ // don't think we need this ++ __ get_method(x11); ++ __ j(profile_method_continue); ++ } + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(continue_after_compile); +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index ddc9498dddc..bb20f228447 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -1745,6 +1745,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) + assert(UseLoopCounter || !UseOnStackReplacement, + "on-stack-replacement requires loop counters"); + Label backedge_counter_overflow; ++ Label profile_method; + Label dispatch; + if (UseLoopCounter) { + // increment backedge counter for backward branches +@@ -1769,31 +1770,75 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) + __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory + __ bind(has_counters); + +- Label no_mdo; +- int increment = InvocationCounter::count_increment; +- if (ProfileInterpreter) { +- // Are we profiling? +- __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); +- __ beqz(x11, no_mdo); +- // Increment the MDO backedge counter +- const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) + +- in_bytes(InvocationCounter::counter_offset())); +- const Address mask(x11, in_bytes(MethodData::backedge_mask_offset())); +- __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, +- x10, t0, false, ++ if (TieredCompilation) { ++ Label no_mdo; ++ int increment = InvocationCounter::count_increment; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); ++ __ beqz(x11, no_mdo); ++ // Increment the MDO backedge counter ++ const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(x11, in_bytes(MethodData::backedge_mask_offset())); ++ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, ++ x10, t0, false, ++ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); ++ __ j(dispatch); ++ } ++ __ bind(no_mdo); ++ // Increment backedge counter in MethodCounters* ++ __ ld(t0, Address(xmethod, Method::method_counters_offset())); ++ const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset())); ++ __ increment_mask_and_jump(Address(t0, be_offset), increment, mask, ++ x10, t1, false, + UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); +- __ j(dispatch); ++ } else { // not TieredCompilation ++ // increment counter ++ __ ld(t1, Address(xmethod, Method::method_counters_offset())); ++ __ lwu(x10, Address(t1, be_offset)); // load backedge counter ++ __ addw(t0, x10, InvocationCounter::count_increment); // increment counter ++ __ sw(t0, Address(t1, be_offset)); // store counter ++ ++ __ lwu(x10, Address(t1, inv_offset)); // load invocation counter ++ __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits ++ __ addw(x10, x10, t0); // add both counters ++ ++ if (ProfileInterpreter) { ++ // Test to see if we should create a method data oop ++ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); ++ __ blt(x10, t0, dispatch); ++ ++ // if no method data exists, go to profile method ++ __ test_method_data_pointer(x10, profile_method); ++ ++ if (UseOnStackReplacement) { ++ // check for overflow against x11 which is the MDO taken count ++ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); ++ __ bltu(x11, t0, dispatch); // Intel == Assembler::below, lo:unsigned lower ++ ++ // When ProfileInterpreter is on, the backedge_count comes ++ // from the MethodData*, which value does not get reset on ++ // the call to frequency_counter_overflow(). To avoid ++ // excessive calls to the overflow routine while the method is ++ // being compiled, add a second test to make sure the overflow ++ // function is called only once every overflow_frequency. ++ const int overflow_frequency = 1024; ++ __ andi(x11, x11, overflow_frequency - 1); ++ __ beqz(x11, backedge_counter_overflow); ++ ++ } ++ } else { ++ if (UseOnStackReplacement) { ++ // check for overflow against x10, which is the sum of the ++ // counters ++ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); ++ __ bgeu(x10, t0, backedge_counter_overflow); // Intel == Assembler::aboveEqual ++ } ++ } + } +- __ bind(no_mdo); +- // Increment backedge counter in MethodCounters* +- __ ld(t0, Address(xmethod, Method::method_counters_offset())); +- const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset())); +- __ increment_mask_and_jump(Address(t0, be_offset), increment, mask, +- x10, t1, false, +- UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); + __ bind(dispatch); + } +- + // Pre-load the next target bytecode into t0 + __ load_unsigned_byte(t0, Address(xbcp, 0)); + +@@ -1802,52 +1847,63 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) + // xbcp: target bcp + __ dispatch_only(vtos, /*generate_poll*/true); + +- if (UseLoopCounter && UseOnStackReplacement) { +- // invocation counter overflow +- __ bind(backedge_counter_overflow); +- __ neg(x12, x12); +- __ add(x12, x12, xbcp); // branch xbcp +- // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) +- __ call_VM(noreg, +- CAST_FROM_FN_PTR(address, +- InterpreterRuntime::frequency_counter_overflow), +- x12); +- __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode +- +- // x10: osr nmethod (osr ok) or NULL (osr not possible) +- // w11: target bytecode +- // x12: temporary +- __ beqz(x10, dispatch); // test result -- no osr if null +- // nmethod may have been invalidated (VM may block upon call_VM return) +- __ lbu(x12, Address(x10, nmethod::state_offset())); +- if (nmethod::in_use != 0) { +- __ sub(x12, x12, nmethod::in_use); ++ if (UseLoopCounter) { ++ if (ProfileInterpreter && !TieredCompilation) { ++ // Out-of-line code to allocate method data oop. ++ __ bind(profile_method); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); ++ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode ++ __ set_method_data_pointer_for_bcp(); ++ __ j(dispatch); + } +- __ bnez(x12, dispatch); + +- // We have the address of an on stack replacement routine in x10 +- // We need to prepare to execute the OSR method. First we must +- // migrate the locals and monitors off of the stack. ++ if (UseOnStackReplacement) { ++ // invocation counter overflow ++ __ bind(backedge_counter_overflow); ++ __ neg(x12, x12); ++ __ add(x12, x12, xbcp); // branch xbcp ++ // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), ++ x12); ++ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode ++ ++ // x10: osr nmethod (osr ok) or NULL (osr not possible) ++ // w11: target bytecode ++ // x12: temporary ++ __ beqz(x10, dispatch); // test result -- no osr if null ++ // nmethod may have been invalidated (VM may block upon call_VM return) ++ __ lbu(x12, Address(x10, nmethod::state_offset())); ++ if (nmethod::in_use != 0) { ++ __ sub(x12, x12, nmethod::in_use); ++ } ++ __ bnez(x12, dispatch); ++ ++ // We have the address of an on stack replacement routine in x10 ++ // We need to prepare to execute the OSR method. First we must ++ // migrate the locals and monitors off of the stack. + +- __ mv(x9, x10); // save the nmethod ++ __ mv(x9, x10); // save the nmethod + +- call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); + +- // x10 is OSR buffer, move it to expected parameter location +- __ mv(j_rarg0, x10); ++ // x10 is OSR buffer, move it to expected parameter location ++ __ mv(j_rarg0, x10); + +- // remove activation +- // get sender esp +- __ ld(esp, +- Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); +- // remove frame anchor +- __ leave(); +- // Ensure compiled code always sees stack at proper alignment +- __ andi(sp, esp, -16); ++ // remove activation ++ // get sender esp ++ __ ld(esp, ++ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ // remove frame anchor ++ __ leave(); ++ // Ensure compiled code always sees stack at proper alignment ++ __ andi(sp, esp, -16); + +- // and begin the OSR nmethod +- __ ld(t0, Address(x9, nmethod::osr_entry_point_offset())); +- __ jr(t0); ++ // and begin the OSR nmethod ++ __ ld(t0, Address(x9, nmethod::osr_entry_point_offset())); ++ __ jr(t0); ++ } + } + } + + +From b1f3fd0510681324d70028443a3532d6084be504 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 27 Apr 2023 11:37:05 +0800 +Subject: [PATCH 080/140] Revert JDK-8250902: Implement MD5 Intrinsics on x86 + +--- + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 5 ---- + ...nericTestCaseForUnsupportedRISCV64CPU.java | 30 +++++++++---------- + 2 files changed, 15 insertions(+), 20 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +index c0491d23fa6..d4b79162d84 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +@@ -97,11 +97,6 @@ void VM_Version::initialize() { + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + +- if (UseMD5Intrinsics) { +- warning("MD5 intrinsics are not available on this CPU."); +- FLAG_SET_DEFAULT(UseMD5Intrinsics, false); +- } +- + if (UseRVV) { + if (!(_features & CPU_V)) { + warning("RVV is not supported on this CPU"); +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java +index 2ecfec07a4c..8566d57c391 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java +@@ -24,7 +24,7 @@ + + package compiler.intrinsics.sha.cli.testcases; + +-import compiler.intrinsics.sha.cli.DigestOptionsBase; ++import compiler.intrinsics.sha.cli.SHAOptionsBase; + import jdk.test.lib.process.ExitCode; + import jdk.test.lib.Platform; + import jdk.test.lib.cli.CommandLineOptionTest; +@@ -36,7 +36,7 @@ + * which don't support instruction required by the tested option. + */ + public class GenericTestCaseForUnsupportedRISCV64CPU extends +- DigestOptionsBase.TestCase { ++ SHAOptionsBase.TestCase { + + final private boolean checkUseSHA; + +@@ -46,7 +46,7 @@ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) { + + public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) { + super(optionName, new AndPredicate(Platform::isRISCV64, +- new NotPredicate(DigestOptionsBase.getPredicateForOption( ++ new NotPredicate(SHAOptionsBase.getPredicateForOption( + optionName)))); + + this.checkUseSHA = checkUseSHA; +@@ -58,27 +58,27 @@ protected void verifyWarnings() throws Throwable { + + "option '-XX:-%s' without any warnings", optionName); + //Verify that option could be disabled without any warnings. + CommandLineOptionTest.verifySameJVMStartup(null, new String[] { +- DigestOptionsBase.getWarningForUnsupportedCPU(optionName) ++ SHAOptionsBase.getWarningForUnsupportedCPU(optionName) + }, shouldPassMessage, shouldPassMessage, ExitCode.OK, +- DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, + CommandLineOptionTest.prepareBooleanFlag(optionName, false)); + + if (checkUseSHA) { + shouldPassMessage = String.format("If JVM is started with '-XX:-" + + "%s' '-XX:+%s', output should contain warning.", +- DigestOptionsBase.USE_SHA_OPTION, optionName); ++ SHAOptionsBase.USE_SHA_OPTION, optionName); + + // Verify that when the tested option is enabled, then + // a warning will occur in VM output if UseSHA is disabled. +- if (!optionName.equals(DigestOptionsBase.USE_SHA_OPTION)) { ++ if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) { + CommandLineOptionTest.verifySameJVMStartup( +- new String[] { DigestOptionsBase.getWarningForUnsupportedCPU(optionName) }, ++ new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) }, + null, + shouldPassMessage, + shouldPassMessage, + ExitCode.OK, +- DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, +- CommandLineOptionTest.prepareBooleanFlag(DigestOptionsBase.USE_SHA_OPTION, false), ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false), + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + } + } +@@ -90,7 +90,7 @@ protected void verifyOptionValues() throws Throwable { + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + String.format("Option '%s' should be disabled by default", + optionName), +- DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); + + if (checkUseSHA) { + // Verify that option is disabled even if it was explicitly enabled +@@ -98,7 +98,7 @@ protected void verifyOptionValues() throws Throwable { + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + String.format("Option '%s' should be off on unsupported " + + "RISCV64CPU even if set to true directly", optionName), +- DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + + // Verify that option is disabled when +UseSHA was passed to JVM. +@@ -106,10 +106,10 @@ protected void verifyOptionValues() throws Throwable { + String.format("Option '%s' should be off on unsupported " + + "RISCV64CPU even if %s flag set to JVM", + optionName, CommandLineOptionTest.prepareBooleanFlag( +- DigestOptionsBase.USE_SHA_OPTION, true)), +- DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ SHAOptionsBase.USE_SHA_OPTION, true)), ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, + CommandLineOptionTest.prepareBooleanFlag( +- DigestOptionsBase.USE_SHA_OPTION, true)); ++ SHAOptionsBase.USE_SHA_OPTION, true)); + } + } + } + +From b5e96cb7663b2def3a064b9aede7209fb0c5eeda Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 27 Apr 2023 15:41:48 +0800 +Subject: [PATCH 081/140] Revert JDK-8253555: Make ByteSize and WordSize typed + scoped enums + +--- + src/hotspot/cpu/riscv/assembler_riscv.hpp | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp +index 31aeeb9b425..9959ac1d02c 100644 +--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp +@@ -195,8 +195,10 @@ class Address { + : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } + Address(Register r, unsigned long long o) + : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++#ifdef ASSERT + Address(Register r, ByteSize disp) +- : Address(r, in_bytes(disp)) { } ++ : _base(r), _index(noreg), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(0) { } ++#endif + Address(address target, RelocationHolder const& rspec) + : _base(noreg), + _index(noreg), + +From 592afab705a4d4c8b2773a0808e47efc2a14517d Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 15:18:12 +0800 +Subject: [PATCH 082/140] Revert JDK-8253457: Remove unimplemented register + stack functions + +--- + .../os_cpu/linux_riscv/thread_linux_riscv.hpp | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp +index 61e2cf85b63..313a7b932c3 100644 +--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp ++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp +@@ -34,15 +34,31 @@ + frame pd_last_frame(); + + public: ++ ++ void set_base_of_stack_pointer(intptr_t* base_sp) { ++ } ++ + static ByteSize last_Java_fp_offset() { + return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); + } + ++ intptr_t* base_of_stack_pointer() { ++ return NULL; ++ } ++ void record_base_of_stack_pointer() { ++ } ++ + bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, + bool isInJava); + + bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); + private: + bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); ++public: ++ // These routines are only used on cpu architectures that ++ // have separate register stacks (Itanium). ++ static bool register_stack_overflow() { return false; } ++ static void enable_register_stack_guard() {} ++ static void disable_register_stack_guard() {} + + #endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP + +From 28238cf776bd25c9805d9dd686c08fe8d3a1500b Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 15:22:30 +0800 +Subject: [PATCH 083/140] Revert JDK-8253539: Remove unused JavaThread + functions for set_last_Java_fp/pc + +--- + src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp | 3 +++ + src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp | 3 +++ + 2 files changed, 6 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp +index 9a6084afa1d..5a0c9b812fc 100644 +--- a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp ++++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp +@@ -83,4 +83,7 @@ + + intptr_t* last_Java_fp(void) { return _last_Java_fp; } + ++ // Assert (last_Java_sp == NULL || fp == NULL) ++ void set_last_Java_fp(intptr_t* fp) { OrderAccess::release(); _last_Java_fp = fp; } ++ + #endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp +index 313a7b932c3..4b91fa855ae 100644 +--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp ++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp +@@ -34,6 +34,9 @@ + frame pd_last_frame(); + + public: ++ // Mutators are highly dangerous.... ++ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } ++ void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } + + void set_base_of_stack_pointer(intptr_t* base_sp) { + } + +From f9322bb6235b603eac825c6e6751093ada1e6cfe Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 15:45:56 +0800 +Subject: [PATCH 084/140] Revert JDK-8269853: Prefetch::read should accept + pointer to const + +--- + src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp +index a6432c84ec7..2bd48e09c34 100644 +--- a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp ++++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp +@@ -29,7 +29,7 @@ + #include "runtime/prefetch.hpp" + + +-inline void Prefetch::read (const void *loc, intx interval) { ++inline void Prefetch::read (void *loc, intx interval) { + } + + inline void Prefetch::write(void *loc, intx interval) { + +From aa6f7320d8d849b8e47b6e77a20257e3d99fd14f Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 16:14:55 +0800 +Subject: [PATCH 085/140] Revert: JDK-8254231: Implementation of Foreign Linker + API (Incubator) JDK-8264774: Implementation of Foreign Function and Memory + API (Incubator) + +--- + .../cpu/riscv/foreign_globals_riscv.cpp | 44 ------------------- + .../cpu/riscv/foreign_globals_riscv.hpp | 32 -------------- + src/hotspot/cpu/riscv/frame_riscv.cpp | 15 ------- + src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 12 +---- + src/hotspot/cpu/riscv/riscv.ad | 5 --- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 10 +---- + .../riscv/universalNativeInvoker_riscv.cpp | 33 -------------- + .../cpu/riscv/universalUpcallHandle_riscv.cpp | 42 ------------------ + src/hotspot/cpu/riscv/vmreg_riscv.cpp | 5 --- + 9 files changed, 2 insertions(+), 196 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.cpp + delete mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.hpp + delete mode 100644 src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp + delete mode 100644 src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp + +diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp +deleted file mode 100644 +index 5c700be9c91..00000000000 +--- a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp ++++ /dev/null +@@ -1,44 +0,0 @@ +-/* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "prims/foreign_globals.hpp" +-#include "utilities/debug.hpp" +- +-// Stubbed out, implement later +-const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const { +- Unimplemented(); +- return {}; +-} +- +-const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const { +- Unimplemented(); +- return {}; +-} +- +-const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const { +- ShouldNotCallThis(); +- return {}; +-} +diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp +deleted file mode 100644 +index 3ac89752c27..00000000000 +--- a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp ++++ /dev/null +@@ -1,32 +0,0 @@ +-/* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#ifndef CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP +-#define CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP +- +-class ABIDescriptor {}; +-class BufferLayout {}; +- +-#endif // CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index 050595389e9..40ec584b994 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -361,21 +361,6 @@ frame frame::sender_for_entry_frame(RegisterMap* map) const { + return fr; + } + +-OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { +- ShouldNotCallThis(); +- return nullptr; +-} +- +-bool frame::optimized_entry_frame_is_first() const { +- ShouldNotCallThis(); +- return false; +-} +- +-frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const { +- ShouldNotCallThis(); +- return {}; +-} +- + //------------------------------------------------------------------------------ + // frame::verify_deopt_original_pc + // +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +index 1f7c0c87c21..3bf5cfb16c3 100644 +--- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +@@ -181,13 +181,6 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* + return NULL; + } + +- // No need in interpreter entry for linkToNative for now. +- // Interpreter calls compiled entry through i2c. +- if (iid == vmIntrinsics::_linkToNative) { +- __ ebreak(); +- return NULL; +- } +- + // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted) + // xmethod: Method* + // x13: argument locator (parameter slot count, added to sp) +@@ -280,10 +273,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + assert_different_registers(temp1, temp2, temp3, receiver_reg); + assert_different_registers(temp1, temp2, temp3, member_reg); + +- if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { +- if (iid == vmIntrinsics::_linkToNative) { +- assert(for_compiler_entry, "only compiler entry is supported"); +- } ++ if (iid == vmIntrinsics::_invokeBasic) { + // indirect through MH.form.vmentry.vmtarget + jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry); + } else { +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 1667994699f..7ec76e72ff0 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -945,11 +945,6 @@ int MachCallRuntimeNode::ret_addr_offset() { + } + } + +-int MachCallNativeNode::ret_addr_offset() { +- Unimplemented(); +- return -1; +-} +- + // + // Compute padding required for nodes which need alignment + // +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index 411bddd2ace..897dafcc99c 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1037,7 +1037,7 @@ static void gen_special_dispatch(MacroAssembler* masm, + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument + member_reg = x9; // known to be free at this point + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); +- } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { ++ } else if (iid == vmIntrinsics::_invokeBasic) { + has_receiver = true; + } else { + fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); +@@ -2566,14 +2566,6 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha + } + + #ifdef COMPILER2 +-RuntimeStub* SharedRuntime::make_native_invoker(address call_target, +- int shadow_space_bytes, +- const GrowableArray& input_registers, +- const GrowableArray& output_registers) { +- Unimplemented(); +- return nullptr; +-} +- + //------------------------------generate_exception_blob--------------------------- + // creates exception blob at the end + // Using exception blob, this code is jumped from a compiled method. +diff --git a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp +deleted file mode 100644 +index 4f50adb05c3..00000000000 +--- a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp ++++ /dev/null +@@ -1,33 +0,0 @@ +-/* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "prims/universalNativeInvoker.hpp" +-#include "utilities/debug.hpp" +- +-address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) { +- Unimplemented(); +- return nullptr; +-} +diff --git a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp +deleted file mode 100644 +index ce70da72f2e..00000000000 +--- a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp ++++ /dev/null +@@ -1,42 +0,0 @@ +-/* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "prims/universalUpcallHandler.hpp" +-#include "utilities/debug.hpp" +- +-address ProgrammableUpcallHandler::generate_upcall_stub(jobject jrec, jobject jabi, jobject jlayout) { +- Unimplemented(); +- return nullptr; +-} +- +-address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) { +- ShouldNotCallThis(); +- return nullptr; +-} +- +-bool ProgrammableUpcallHandler::supports_optimized_upcalls() { +- return false; +-} +diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp +index 1f6eff96cba..5d1187c2a27 100644 +--- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp +@@ -49,8 +49,3 @@ void VMRegImpl::set_regName() { + regName[i] = "NON-GPR-FPR"; + } + } +- +-VMReg VMRegImpl::vmStorageToVMReg(int type, int index) { +- Unimplemented(); +- return VMRegImpl::Bad(); +-} + +From a5889735a97f3712bb649c454dee192d75457f96 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 17:35:20 +0800 +Subject: [PATCH 086/140] Revert JDK-8256254: Convert vmIntrinsics::ID to enum + class + +--- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 006fe49b155..1133e80a210 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -1841,7 +1841,7 @@ void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, + beq(t0, tmp, do_profile); + get_method(tmp); + lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes())); +- li(t1, static_cast(vmIntrinsics::_compiledLambdaForm)); ++ li(t1, vmIntrinsics::_compiledLambdaForm); + bne(t0, t1, profile_continue); + bind(do_profile); + } +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +index 3bf5cfb16c3..4442b5991b1 100644 +--- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +@@ -411,7 +411,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + } + + default: +- fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid)); ++ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); + break; + } + +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index 897dafcc99c..5b934b04e8e 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1040,7 +1040,7 @@ static void gen_special_dispatch(MacroAssembler* masm, + } else if (iid == vmIntrinsics::_invokeBasic) { + has_receiver = true; + } else { +- fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); ++ fatal("unexpected intrinsic id %d", iid); + } + + if (member_reg != noreg) { + +From 245d01e2cae27e41b875450f5f92751e4f36a095 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 20:27:58 +0800 +Subject: [PATCH 087/140] Revert JDK-8216557: Aarch64: Add support for + Concurrent Class Unloading + +--- + .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 4 - + .../gc/shared/barrierSetAssembler_riscv.cpp | 71 -------- + .../gc/shared/barrierSetAssembler_riscv.hpp | 3 - + .../gc/shared/barrierSetNMethod_riscv.cpp | 171 ------------------ + .../cpu/riscv/macroAssembler_riscv.cpp | 35 +--- + .../cpu/riscv/macroAssembler_riscv.hpp | 2 - + src/hotspot/cpu/riscv/relocInfo_riscv.cpp | 1 - + src/hotspot/cpu/riscv/riscv.ad | 16 -- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 7 - + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 49 ----- + src/hotspot/cpu/riscv/stubRoutines_riscv.cpp | 1 - + src/hotspot/cpu/riscv/stubRoutines_riscv.hpp | 6 - + 12 files changed, 5 insertions(+), 361 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp + +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +index 44ceccd8bd1..a6d1b1470f9 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -322,10 +322,6 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { + // Note that we do this before creating a frame. + generate_stack_overflow_check(bang_size_in_bytes); + MacroAssembler::build_frame(framesize); +- +- // Insert nmethod entry barrier into frame. +- BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); +- bs->nmethod_entry_barrier(this); + } + + void C1_MacroAssembler::remove_frame(int framesize) { +diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp +index 3c115a2ea02..2b556b95d71 100644 +--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp +@@ -27,7 +27,6 @@ + #include "classfile/classLoaderData.hpp" + #include "gc/shared/barrierSet.hpp" + #include "gc/shared/barrierSetAssembler.hpp" +-#include "gc/shared/barrierSetNMethod.hpp" + #include "gc/shared/collectedHeap.hpp" + #include "interpreter/interp_masm.hpp" + #include "memory/universe.hpp" +@@ -230,73 +229,3 @@ void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, + } + __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); + } +- +-void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { +- BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); +- +- if (bs_nm == NULL) { +- return; +- } +- +- // RISCV atomic operations require that the memory address be naturally aligned. +- __ align(4); +- +- Label skip, guard; +- Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset())); +- +- __ lwu(t0, guard); +- +- // Subsequent loads of oops must occur after load of guard value. +- // BarrierSetNMethod::disarm sets guard with release semantics. +- __ membar(MacroAssembler::LoadLoad); +- __ lwu(t1, thread_disarmed_addr); +- __ beq(t0, t1, skip); +- +- int32_t offset = 0; +- __ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset); +- __ jalr(ra, t0, offset); +- __ j(skip); +- +- __ bind(guard); +- +- assert(__ offset() % 4 == 0, "bad alignment"); +- __ emit_int32(0); // nmethod guard value. Skipped over in common case. +- +- __ bind(skip); +-} +- +-void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { +- BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod(); +- if (bs == NULL) { +- return; +- } +- +- Label bad_call; +- __ beqz(xmethod, bad_call); +- +- // Pointer chase to the method holder to find out if the method is concurrently unloading. +- Label method_live; +- __ load_method_holder_cld(t0, xmethod); +- +- // Is it a strong CLD? +- __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_offset())); +- __ bnez(t1, method_live); +- +- // Is it a weak but alive CLD? +- __ push_reg(RegSet::of(x28, x29), sp); +- +- __ ld(x28, Address(t0, ClassLoaderData::holder_offset())); +- +- // Uses x28 & x29, so we must pass new temporaries. +- __ resolve_weak_handle(x28, x29); +- __ mv(t0, x28); +- +- __ pop_reg(RegSet::of(x28, x29), sp); +- +- __ bnez(t0, method_live); +- +- __ bind(bad_call); +- +- __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); +- __ bind(method_live); +-} +diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp +index b85f7f5582b..984d94f4c3d 100644 +--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp +@@ -28,7 +28,6 @@ + + #include "asm/macroAssembler.hpp" + #include "gc/shared/barrierSet.hpp" +-#include "gc/shared/barrierSetNMethod.hpp" + #include "memory/allocation.hpp" + #include "oops/access.hpp" + +@@ -71,8 +70,6 @@ class BarrierSetAssembler: public CHeapObj { + ); + virtual void barrier_stubs_init() {} + +- virtual void nmethod_entry_barrier(MacroAssembler* masm); +- virtual void c2i_entry_barrier(MacroAssembler* masm); + virtual ~BarrierSetAssembler() {} + }; + +diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp +deleted file mode 100644 +index ae7ee4c5a44..00000000000 +--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp ++++ /dev/null +@@ -1,171 +0,0 @@ +-/* +- * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "code/codeCache.hpp" +-#include "code/nativeInst.hpp" +-#include "gc/shared/barrierSetNMethod.hpp" +-#include "logging/log.hpp" +-#include "memory/resourceArea.hpp" +-#include "runtime/sharedRuntime.hpp" +-#include "runtime/registerMap.hpp" +-#include "runtime/thread.hpp" +-#include "utilities/align.hpp" +-#include "utilities/debug.hpp" +- +-class NativeNMethodBarrier: public NativeInstruction { +- address instruction_address() const { return addr_at(0); } +- +- int *guard_addr() { +- /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */ +- return reinterpret_cast(instruction_address() + 12 * 4); +- } +- +-public: +- int get_value() { +- return Atomic::load_acquire(guard_addr()); +- } +- +- void set_value(int value) { +- Atomic::release_store(guard_addr(), value); +- } +- +- void verify() const; +-}; +- +-// Store the instruction bitmask, bits and name for checking the barrier. +-struct CheckInsn { +- uint32_t mask; +- uint32_t bits; +- const char *name; +-}; +- +-static const struct CheckInsn barrierInsn[] = { +- { 0x00000fff, 0x00000297, "auipc t0, 0 "}, +- { 0x000fffff, 0x0002e283, "lwu t0, 48(t0) "}, +- { 0xffffffff, 0x0aa0000f, "fence ir, ir "}, +- { 0x000fffff, 0x000be303, "lwu t1, 112(xthread)"}, +- { 0x01fff07f, 0x00628063, "beq t0, t1, skip "}, +- { 0x00000fff, 0x000002b7, "lui t0, imm0 "}, +- { 0x000fffff, 0x00028293, "addi t0, t0, imm1 "}, +- { 0xffffffff, 0x00b29293, "slli t0, t0, 11 "}, +- { 0x000fffff, 0x00028293, "addi t0, t0, imm2 "}, +- { 0xffffffff, 0x00529293, "slli t0, t0, 5 "}, +- { 0x000fffff, 0x000280e7, "jalr ra, imm3(t0) "}, +- { 0x00000fff, 0x0000006f, "j skip "} +- /* guard: */ +- /* 32bit nmethod guard value */ +- /* skip: */ +-}; +- +-// The encodings must match the instructions emitted by +-// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific +-// register numbers and immediate values in the encoding. +-void NativeNMethodBarrier::verify() const { +- intptr_t addr = (intptr_t) instruction_address(); +- for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) { +- uint32_t inst = *((uint32_t*) addr); +- if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) { +- tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst); +- fatal("not an %s instruction.", barrierInsn[i].name); +- } +- addr += 4; +- } +-} +- +- +-/* We're called from an nmethod when we need to deoptimize it. We do +- this by throwing away the nmethod's frame and jumping to the +- ic_miss stub. This looks like there has been an IC miss at the +- entry of the nmethod, so we resolve the call, which will fall back +- to the interpreter if the nmethod has been unloaded. */ +-void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { +- +- typedef struct { +- intptr_t *sp; intptr_t *fp; address ra; address pc; +- } frame_pointers_t; +- +- frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5); +- +- JavaThread *thread = JavaThread::current(); +- RegisterMap reg_map(thread, false); +- frame frame = thread->last_frame(); +- +- assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be"); +- assert(frame.cb() == nm, "must be"); +- frame = frame.sender(®_map); +- +- LogTarget(Trace, nmethod, barrier) out; +- if (out.is_enabled()) { +- ResourceMark mark; +- log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p", +- nm->method()->name_and_sig_as_C_string(), +- nm, *(address *) return_address_ptr, nm->is_osr_method(), thread, +- thread->name(), frame.sp(), nm->verified_entry_point()); +- } +- +- new_frame->sp = frame.sp(); +- new_frame->fp = frame.fp(); +- new_frame->ra = frame.pc(); +- new_frame->pc = SharedRuntime::get_handle_wrong_method_stub(); +-} +- +-// This is the offset of the entry barrier from where the frame is completed. +-// If any code changes between the end of the verified entry where the entry +-// barrier resides, and the completion of the frame, then +-// NativeNMethodCmpBarrier::verify() will immediately complain when it does +-// not find the expected native instruction at this offset, which needs updating. +-// Note that this offset is invariant of PreserveFramePointer. +- +-// see BarrierSetAssembler::nmethod_entry_barrier +-// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32 +-static const int entry_barrier_offset = -4 * 13; +- +-static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) { +- address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset; +- NativeNMethodBarrier* barrier = reinterpret_cast(barrier_address); +- debug_only(barrier->verify()); +- return barrier; +-} +- +-void BarrierSetNMethod::disarm(nmethod* nm) { +- if (!supports_entry_barrier(nm)) { +- return; +- } +- +- // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier. +- NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); +- +- barrier->set_value(disarmed_value()); +-} +- +-bool BarrierSetNMethod::is_armed(nmethod* nm) { +- if (!supports_entry_barrier(nm)) { +- return false; +- } +- +- NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); +- return barrier->get_value() != disarmed_value(); +-} +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 41a415ef2cf..a75bd9dfa89 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1638,10 +1638,10 @@ void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, + beq(trial_klass, tmp, L); + } + +-// Move an oop into a register. immediate is true if we want +-// immediate instructions and nmethod entry barriers are not enabled. +-// i.e. we are not going to patch this instruction while the code is being +-// executed by another thread. ++// Move an oop into a register. immediate is true if we want ++// immediate instructions, i.e. we are not going to patch this ++// instruction while the code is being executed by another thread. In ++// that case we can use move immediates rather than the constant pool. + void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { + int oop_index; + if (obj == NULL) { +@@ -1656,11 +1656,7 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { + oop_index = oop_recorder()->find_index(obj); + } + RelocationHolder rspec = oop_Relocation::spec(oop_index); +- +- // nmethod entry barrier necessitate using the constant pool. They have to be +- // ordered with respected to oop access. +- // Using immediate literals would necessitate fence.i. +- if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) { ++ if (!immediate) { + address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address + ld_constant(dst, Address(dummy, rspec)); + } else +@@ -1738,22 +1734,6 @@ void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { + access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg); + } + +-// ((WeakHandle)result).resolve() +-void MacroAssembler::resolve_weak_handle(Register result, Register tmp) { +- assert_different_registers(result, tmp); +- Label resolved; +- +- // A null weak handle resolves to null. +- beqz(result, resolved); +- +- // Only 64 bit platforms support GCs that require a tmp register +- // Only IN_HEAP loads require a thread_tmp register +- // WeakHandle::resolve is an indirection like jweak. +- access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, +- result, Address(result), tmp, noreg /* tmp_thread */); +- bind(resolved); +-} +- + void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, + Register dst, Address src, + Register tmp1, Register thread_tmp) { +@@ -3195,11 +3175,6 @@ void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { + beq(src1, t0, equal); + } + +-void MacroAssembler::load_method_holder_cld(Register result, Register method) { +- load_method_holder(result, method); +- ld(result, Address(result, InstanceKlass::class_loader_data_offset())); +-} +- + void MacroAssembler::load_method_holder(Register holder, Register method) { + ld(holder, Address(method, Method::const_offset())); // ConstMethod* + ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index dd39f67d507..b16fe904888 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -207,7 +207,6 @@ class MacroAssembler: public Assembler { + virtual void check_and_handle_earlyret(Register java_thread); + virtual void check_and_handle_popframe(Register java_thread); + +- void resolve_weak_handle(Register result, Register tmp); + void resolve_oop_handle(Register result, Register tmp = x15); + void resolve_jobject(Register value, Register thread, Register tmp); + +@@ -673,7 +672,6 @@ class MacroAssembler: public Assembler { + void cmpptr(Register src1, Address src2, Label& equal); + + void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL); +- void load_method_holder_cld(Register result, Register method); + void load_method_holder(Register holder, Register method); + + void compute_index(Register str1, Register trailing_zeros, Register match_mask, +diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp +index 228a64eae2c..047ea2276ca 100644 +--- a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp ++++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp +@@ -41,7 +41,6 @@ void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { + switch (type()) { + case relocInfo::oop_type: { + oop_Relocation *reloc = (oop_Relocation *)this; +- // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate + if (NativeInstruction::is_load_pc_relative_at(addr())) { + address constptr = (address)code()->oop_addr_at(reloc->oop_index()); + bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 7ec76e72ff0..0a1838695e1 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1068,17 +1068,6 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + st->print("sd ra, [sp, #%d]\n\t", - wordSize); + if (PreserveFramePointer) { st->print("sub fp, sp, #%d\n\t", 2 * wordSize); } + st->print("sub sp, sp, #%d\n\t", framesize); +- +- if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) { +- st->print("ld t0, [guard]\n\t"); +- st->print("membar LoadLoad\n\t"); +- st->print("ld t1, [xthread, #thread_disarmed_offset]\n\t"); +- st->print("beq t0, t1, skip\n\t"); +- st->print("jalr #nmethod_entry_barrier_stub\n\t"); +- st->print("j skip\n\t"); +- st->print("guard: int\n\t"); +- st->print("skip:\n\t"); +- } + } + #endif + +@@ -1114,11 +1103,6 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + + __ build_frame(framesize); + +- if (C->stub_function() == NULL) { +- BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); +- bs->nmethod_entry_barrier(&_masm); +- } +- + if (VerifyStackAtCalls) { + Unimplemented(); + } +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index 5b934b04e8e..326ba62fcb0 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -642,9 +642,6 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm + c2i_no_clinit_check_entry = __ pc(); + } + +- BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); +- bs->c2i_entry_barrier(masm); +- + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + + __ flush(); +@@ -1290,10 +1287,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + // -2 because return address is already present and so is saved fp + __ sub(sp, sp, stack_size - 2 * wordSize); + +- BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); +- assert_cond(bs != NULL); +- bs->nmethod_entry_barrier(masm); +- + // Frame is now completed as far as size and linkage. + int frame_complete = ((intptr_t)__ pc()) - start; + +diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +index 0c5b0e001ee..74c38c3d044 100644 +--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +@@ -2352,50 +2352,6 @@ class StubGenerator: public StubCodeGenerator { + return entry; + } + +- address generate_method_entry_barrier() { +- __ align(CodeEntryAlignment); +- StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier"); +- +- Label deoptimize_label; +- +- address start = __ pc(); +- +- __ set_last_Java_frame(sp, fp, ra, t0); +- +- __ enter(); +- __ add(t1, sp, wordSize); +- +- __ sub(sp, sp, 4 * wordSize); +- +- __ push_call_clobbered_registers(); +- +- __ mv(c_rarg0, t1); +- __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1); +- +- __ reset_last_Java_frame(true); +- +- __ mv(t0, x10); +- +- __ pop_call_clobbered_registers(); +- +- __ bnez(t0, deoptimize_label); +- +- __ leave(); +- __ ret(); +- +- __ BIND(deoptimize_label); +- +- __ ld(t0, Address(sp, 0)); +- __ ld(fp, Address(sp, wordSize)); +- __ ld(ra, Address(sp, wordSize * 2)); +- __ ld(t1, Address(sp, wordSize * 3)); +- +- __ mv(sp, t0); +- __ jr(t1); +- +- return start; +- } +- + // x10 = result + // x11 = str1 + // x12 = cnt1 +@@ -3703,11 +3659,6 @@ class StubGenerator: public StubCodeGenerator { + + generate_string_indexof_stubs(); + +- BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); +- if (bs_nm != NULL) { +- StubRoutines::riscv::_method_entry_barrier = generate_method_entry_barrier(); +- } +- + StubRoutines::riscv::set_completed(); + } + +diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp +index 395a2d338e4..9202d9ec4b0 100644 +--- a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp ++++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp +@@ -53,6 +53,5 @@ address StubRoutines::riscv::_string_indexof_linear_ll = NULL; + address StubRoutines::riscv::_string_indexof_linear_uu = NULL; + address StubRoutines::riscv::_string_indexof_linear_ul = NULL; + address StubRoutines::riscv::_large_byte_array_inflate = NULL; +-address StubRoutines::riscv::_method_entry_barrier = NULL; + + bool StubRoutines::riscv::_completed = false; +diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp +index 51f07819c33..0c9445e18a7 100644 +--- a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp ++++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp +@@ -67,8 +67,6 @@ class riscv { + static address _string_indexof_linear_ul; + static address _large_byte_array_inflate; + +- static address _method_entry_barrier; +- + static bool _completed; + + public: +@@ -145,10 +143,6 @@ class riscv { + return _large_byte_array_inflate; + } + +- static address method_entry_barrier() { +- return _method_entry_barrier; +- } +- + static bool complete() { + return _completed; + } + +From aee31440dde84c54449b5c0dbdfb43b4d3826f5a Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 17:59:40 +0800 +Subject: [PATCH 088/140] Revert JDK-8223173: Implement fast class + initialization checks on AARCH64 && JDK-8227260: JNI upcalls should bypass + class initialization barrier in c2i adapter + +--- + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 12 ------- + .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 12 +++---- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 12 ------- + src/hotspot/cpu/riscv/interp_masm_riscv.hpp | 2 -- + .../cpu/riscv/macroAssembler_riscv.cpp | 36 ------------------- + .../cpu/riscv/macroAssembler_riscv.hpp | 3 -- + src/hotspot/cpu/riscv/riscv.ad | 11 ------ + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 30 +--------------- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 17 +++------ + 9 files changed, 11 insertions(+), 124 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 49653d04d81..1e482d7cc2b 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -90,18 +90,6 @@ static void select_different_registers(Register preserve, + + bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } + +-void LIR_Assembler::clinit_barrier(ciMethod* method) { +- assert(VM_Version::supports_fast_class_init_checks(), "sanity"); +- assert(!method->holder()->is_not_initialized(), "initialization should have been started"); +- +- Label L_skip_barrier; +- +- __ mov_metadata(t1, method->holder()->constant_encoding()); +- __ clinit_barrier(t1, t0, &L_skip_barrier /* L_fast_path */); +- __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); +- __ bind(L_skip_barrier); +-} +- + LIR_Opr LIR_Assembler::receiverOpr() { + return FrameMap::receiver_opr; + } +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +index a6d1b1470f9..99d981f97f4 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -317,6 +317,12 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache, L + } + + void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { ++ // If we have to make this method not-entrant we'll overwrite its ++ // first instruction with a jump. For this action to be legal we ++ // must ensure that this first instruction is a J, JAL or NOP. ++ // Make it a NOP. ++ nop(); ++ + assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); + // Make sure there is enough stack space for this method's activation. + // Note that we do this before creating a frame. +@@ -330,12 +336,6 @@ void C1_MacroAssembler::remove_frame(int framesize) { + + + void C1_MacroAssembler::verified_entry() { +- // If we have to make this method not-entrant we'll overwrite its +- // first instruction with a jump. For this action to be legal we +- // must ensure that this first instruction is a J, JAL or NOP. +- // Make it a NOP. +- +- nop(); + } + + void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 1133e80a210..b50be7e726c 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -295,18 +295,6 @@ void InterpreterMacroAssembler::load_resolved_klass_at_offset( + ld(klass, Address(klass, Array::base_offset_in_bytes())); + } + +-void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no, +- Register method, +- Register cache) { +- const int method_offset = in_bytes( +- ConstantPoolCache::base_offset() + +- ((byte_no == TemplateTable::f2_byte) +- ? ConstantPoolCacheEntry::f2_offset() +- : ConstantPoolCacheEntry::f1_offset())); +- +- ld(method, Address(cache, method_offset)); // get f1 Method* +-} +- + // Generate a subtype check: branch to ok_is_subtype if sub_klass is a + // subtype of super_klass. + // +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp +index 4d8cb086f82..4126e8ee70f 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp +@@ -122,8 +122,6 @@ class InterpreterMacroAssembler: public MacroAssembler { + // Load cpool->resolved_klass_at(index). + void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp); + +- void load_resolved_method_at_index(int byte_no, Register method, Register cache); +- + void pop_ptr(Register r = x10); + void pop_i(Register r = x10); + void pop_l(Register r = x10); +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index a75bd9dfa89..304b6f2b06c 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -372,36 +372,6 @@ void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thr + sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); + } + +-void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { +- assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); +- assert_different_registers(klass, xthread, tmp); +- +- Label L_fallthrough, L_tmp; +- if (L_fast_path == NULL) { +- L_fast_path = &L_fallthrough; +- } else if (L_slow_path == NULL) { +- L_slow_path = &L_fallthrough; +- } +- +- // Fast path check: class is fully initialized +- lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); +- sub(tmp, tmp, InstanceKlass::fully_initialized); +- beqz(tmp, *L_fast_path); +- +- // Fast path check: current thread is initializer thread +- ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); +- +- if (L_slow_path == &L_fallthrough) { +- beq(xthread, tmp, *L_fast_path); +- bind(*L_slow_path); +- } else if (L_fast_path == &L_fallthrough) { +- bne(xthread, tmp, *L_slow_path); +- bind(*L_fast_path); +- } else { +- Unimplemented(); +- } +-} +- + void MacroAssembler::verify_oop(Register reg, const char* s) { + if (!VerifyOops) { return; } + +@@ -3175,12 +3145,6 @@ void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { + beq(src1, t0, equal); + } + +-void MacroAssembler::load_method_holder(Register holder, Register method) { +- ld(holder, Address(method, Method::const_offset())); // ConstMethod* +- ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* +- ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* +-} +- + // string indexof + // compute index by trailing zeros + void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index b16fe904888..c6b71bdbc3c 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -671,9 +671,6 @@ class MacroAssembler: public Assembler { + + void cmpptr(Register src1, Address src2, Label& equal); + +- void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL); +- void load_method_holder(Register holder, Register method); +- + void compute_index(Register str1, Register trailing_zeros, Register match_mask, + Register result, Register char_tmp, Register tmp, + bool haystack_isL); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 0a1838695e1..13546ab328b 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1085,17 +1085,6 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + + assert_cond(C != NULL); + +- if (C->clinit_barrier_on_entry()) { +- assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); +- +- Label L_skip_barrier; +- +- __ mov_metadata(t1, C->method()->holder()->constant_encoding()); +- __ clinit_barrier(t1, t0, &L_skip_barrier); +- __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); +- __ bind(L_skip_barrier); +- } +- + int bangsize = C->output()->bang_size_in_bytes(); + if (C->output()->need_stack_bang(bangsize)) { + __ generate_stack_overflow_check(bangsize); +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index 326ba62fcb0..ae414224c5b 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -623,29 +623,10 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm + + address c2i_entry = __ pc(); + +- // Class initialization barrier for static methods +- address c2i_no_clinit_check_entry = NULL; +- if (VM_Version::supports_fast_class_init_checks()) { +- Label L_skip_barrier; +- +- { // Bypass the barrier for non-static methods +- __ lwu(t0, Address(xmethod, Method::access_flags_offset())); +- __ andi(t1, t0, JVM_ACC_STATIC); +- __ beqz(t1, L_skip_barrier); // non-static +- } +- +- __ load_method_holder(t1, xmethod); +- __ clinit_barrier(t1, t0, &L_skip_barrier); +- __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); +- +- __ bind(L_skip_barrier); +- c2i_no_clinit_check_entry = __ pc(); +- } +- + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + + __ flush(); +- return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); ++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); + } + + int SharedRuntime::c_calling_convention(const BasicType *sig_bt, +@@ -1270,15 +1251,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + // first instruction with a jump. + __ nop(); + +- if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { +- Label L_skip_barrier; +- __ mov_metadata(t1, method->method_holder()); // InstanceKlass* +- __ clinit_barrier(t1, t0, &L_skip_barrier); +- __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); +- +- __ bind(L_skip_barrier); +- } +- + // Generate stack overflow check + __ bang_stack_with_offset(checked_cast(StackOverflow::stack_shadow_zone_size())); + +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index bb20f228447..1f4409a9c9a 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -2307,7 +2307,7 @@ void TemplateTable::resolve_cache_and_index(int byte_no, + const Register temp = x9; + assert_different_registers(Rcache, index, temp); + +- Label resolved, clinit_barrier_slow; ++ Label resolved; + + Bytecodes::Code code = bytecode(); + switch (code) { +@@ -2321,10 +2321,6 @@ void TemplateTable::resolve_cache_and_index(int byte_no, + __ mv(t0, (int) code); + __ beq(temp, t0, resolved); + +- // resolve first time through +- // Class initialization barrier slow path lands here as well. +- __ bind(clinit_barrier_slow); +- + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); + __ mv(temp, (int) code); + __ call_VM(noreg, entry, temp); +@@ -2334,13 +2330,6 @@ void TemplateTable::resolve_cache_and_index(int byte_no, + // n.b. unlike x86 Rcache is now rcpool plus the indexed offset + // so all clients ofthis method must be modified accordingly + __ bind(resolved); +- +- // Class initialization barrier for static methods +- if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) { +- __ load_resolved_method_at_index(byte_no, temp, Rcache); +- __ load_method_holder(temp, temp); +- __ clinit_barrier(temp, t0, NULL, &clinit_barrier_slow); +- } + } + + // The Rcache and index registers must be set before call +@@ -3431,7 +3420,9 @@ void TemplateTable::invokeinterface(int byte_no) { + __ profile_virtual_call(x13, x30, x9); + + // Get declaring interface class from method, and itable index +- __ load_method_holder(x10, xmethod); ++ __ ld(x10, Address(xmethod, Method::const_offset())); ++ __ ld(x10, Address(x10, ConstMethod::constants_offset())); ++ __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes())); + __ lwu(xmethod, Address(xmethod, Method::itable_index_offset())); + __ subw(xmethod, xmethod, Method::itable_index_max); + __ negw(xmethod, xmethod); + +From c259a42eac0a11e080d28dabe7f745ee79a53663 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 18:36:13 +0800 +Subject: [PATCH 089/140] Revert JDK-8268119: Rename copy_os_cpu.inline.hpp + files to copy_os_cpu.hpp && JDK-8142362: Lots of code duplication in Copy + class + +--- + src/hotspot/cpu/riscv/copy_riscv.hpp | 85 +----------- + .../os_cpu/linux_riscv/copy_linux_riscv.hpp | 31 ----- + .../linux_riscv/copy_linux_riscv.inline.hpp | 124 ++++++++++++++++++ + 3 files changed, 128 insertions(+), 112 deletions(-) + delete mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp + +diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp +index bceadcc5dcc..05da242e354 100644 +--- a/src/hotspot/cpu/riscv/copy_riscv.hpp ++++ b/src/hotspot/cpu/riscv/copy_riscv.hpp +@@ -27,7 +27,10 @@ + #ifndef CPU_RISCV_COPY_RISCV_HPP + #define CPU_RISCV_COPY_RISCV_HPP + +-#include OS_CPU_HEADER(copy) ++// Inline functions for memory copy and fill. ++ ++// Contains inline asm implementations ++#include OS_CPU_HEADER_INLINE(copy) + + static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + julong* to = (julong*) tohw; +@@ -53,84 +56,4 @@ static void pd_zero_to_bytes(void* to, size_t count) { + (void)memset(to, 0, count); + } + +-static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { +- (void)memmove(to, from, count * HeapWordSize); +-} +- +-static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { +- switch (count) { +- case 8: to[7] = from[7]; // fall through +- case 7: to[6] = from[6]; // fall through +- case 6: to[5] = from[5]; // fall through +- case 5: to[4] = from[4]; // fall through +- case 4: to[3] = from[3]; // fall through +- case 3: to[2] = from[2]; // fall through +- case 2: to[1] = from[1]; // fall through +- case 1: to[0] = from[0]; // fall through +- case 0: break; +- default: +- memcpy(to, from, count * HeapWordSize); +- break; +- } +-} +- +-static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { +- shared_disjoint_words_atomic(from, to, count); +-} +- +-static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { +- pd_conjoint_words(from, to, count); +-} +- +-static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { +- pd_disjoint_words(from, to, count); +-} +- +-static void pd_conjoint_bytes(const void* from, void* to, size_t count) { +- (void)memmove(to, from, count); +-} +- +-static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { +- pd_conjoint_bytes(from, to, count); +-} +- +-static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { +- _Copy_conjoint_jshorts_atomic(from, to, count); +-} +- +-static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { +- _Copy_conjoint_jints_atomic(from, to, count); +-} +- +-static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { +- _Copy_conjoint_jlongs_atomic(from, to, count); +-} +- +-static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { +- assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size."); +- _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); +-} +- +-static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { +- _Copy_arrayof_conjoint_bytes(from, to, count); +-} +- +-static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { +- _Copy_arrayof_conjoint_jshorts(from, to, count); +-} +- +-static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { +- _Copy_arrayof_conjoint_jints(from, to, count); +-} +- +-static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { +- _Copy_arrayof_conjoint_jlongs(from, to, count); +-} +- +-static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { +- assert(!UseCompressedOops, "foo!"); +- assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); +- _Copy_arrayof_conjoint_jlongs(from, to, count); +-} +- + #endif // CPU_RISCV_COPY_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp +deleted file mode 100644 +index 147cfdf3c10..00000000000 +--- a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp ++++ /dev/null +@@ -1,31 +0,0 @@ +-/* +- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP +-#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP +- +-// Empty for build system +- +-#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp +new file mode 100644 +index 00000000000..bdf36d6b4c3 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp +@@ -0,0 +1,124 @@ ++/* ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP ++#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP ++ ++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ (void)memmove(to, from, count * HeapWordSize); ++} ++ ++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; // fall through ++ case 7: to[6] = from[6]; // fall through ++ case 6: to[5] = from[5]; // fall through ++ case 5: to[4] = from[4]; // fall through ++ case 4: to[3] = from[3]; // fall through ++ case 3: to[2] = from[2]; // fall through ++ case 2: to[1] = from[1]; // fall through ++ case 1: to[0] = from[0]; // fall through ++ case 0: break; ++ default: ++ memcpy(to, from, count * HeapWordSize); ++ break; ++ } ++} ++ ++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ while (count-- > 0) { ++ *to++ = *from++; ++ } ++ break; ++ } ++} ++ ++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_words(from, to, count); ++} ++ ++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words(from, to, count); ++} ++ ++static void pd_conjoint_bytes(const void* from, void* to, size_t count) { ++ (void)memmove(to, from, count); ++} ++ ++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { ++ pd_conjoint_bytes(from, to, count); ++} ++ ++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { ++ _Copy_conjoint_jshorts_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { ++ _Copy_conjoint_jints_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { ++ _Copy_conjoint_jlongs_atomic(from, to, count); ++} ++ ++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size."); ++ _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); ++} ++ ++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_bytes(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jshorts(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jints(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jlongs(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { ++ assert(!UseCompressedOops, "foo!"); ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); ++ _Copy_arrayof_conjoint_jlongs(from, to, count); ++} ++ ++#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP + +From 6033e30ebd94f2315bf809a42ef00c85bdbc780e Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 19:33:21 +0800 +Subject: [PATCH 090/140] Revert JDK-8241436: C2: Factor out C2-specific code + from MacroAssembler + +--- + .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 1321 ----------------- + .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 141 -- + .../cpu/riscv/macroAssembler_riscv.cpp | 1282 ++++++++++++++++ + .../cpu/riscv/macroAssembler_riscv.hpp | 103 ++ + src/hotspot/cpu/riscv/riscv.ad | 124 +- + 5 files changed, 1447 insertions(+), 1524 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp + delete mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp + +diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +deleted file mode 100644 +index 73f84a724ca..00000000000 +--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp ++++ /dev/null +@@ -1,1321 +0,0 @@ +-/* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "asm/assembler.hpp" +-#include "asm/assembler.inline.hpp" +-#include "opto/c2_MacroAssembler.hpp" +-#include "opto/intrinsicnode.hpp" +-#include "opto/subnode.hpp" +-#include "runtime/stubRoutines.hpp" +- +-#ifdef PRODUCT +-#define BLOCK_COMMENT(str) /* nothing */ +-#define STOP(error) stop(error) +-#else +-#define BLOCK_COMMENT(str) block_comment(str) +-#define STOP(error) block_comment(error); stop(error) +-#endif +- +-#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") +- +-// short string +-// StringUTF16.indexOfChar +-// StringLatin1.indexOfChar +-void C2_MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, +- Register ch, Register result, +- bool isL) +-{ +- Register ch1 = t0; +- Register index = t1; +- +- BLOCK_COMMENT("string_indexof_char_short {"); +- +- Label LOOP, LOOP1, LOOP4, LOOP8; +- Label MATCH, MATCH1, MATCH2, MATCH3, +- MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; +- +- mv(result, -1); +- mv(index, zr); +- +- bind(LOOP); +- addi(t0, index, 8); +- ble(t0, cnt1, LOOP8); +- addi(t0, index, 4); +- ble(t0, cnt1, LOOP4); +- j(LOOP1); +- +- bind(LOOP8); +- isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); +- beq(ch, ch1, MATCH); +- isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); +- beq(ch, ch1, MATCH1); +- isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); +- beq(ch, ch1, MATCH2); +- isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); +- beq(ch, ch1, MATCH3); +- isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); +- beq(ch, ch1, MATCH4); +- isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); +- beq(ch, ch1, MATCH5); +- isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); +- beq(ch, ch1, MATCH6); +- isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); +- beq(ch, ch1, MATCH7); +- addi(index, index, 8); +- addi(str1, str1, isL ? 8 : 16); +- blt(index, cnt1, LOOP); +- j(NOMATCH); +- +- bind(LOOP4); +- isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); +- beq(ch, ch1, MATCH); +- isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); +- beq(ch, ch1, MATCH1); +- isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); +- beq(ch, ch1, MATCH2); +- isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); +- beq(ch, ch1, MATCH3); +- addi(index, index, 4); +- addi(str1, str1, isL ? 4 : 8); +- bge(index, cnt1, NOMATCH); +- +- bind(LOOP1); +- isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); +- beq(ch, ch1, MATCH); +- addi(index, index, 1); +- addi(str1, str1, isL ? 1 : 2); +- blt(index, cnt1, LOOP1); +- j(NOMATCH); +- +- bind(MATCH1); +- addi(index, index, 1); +- j(MATCH); +- +- bind(MATCH2); +- addi(index, index, 2); +- j(MATCH); +- +- bind(MATCH3); +- addi(index, index, 3); +- j(MATCH); +- +- bind(MATCH4); +- addi(index, index, 4); +- j(MATCH); +- +- bind(MATCH5); +- addi(index, index, 5); +- j(MATCH); +- +- bind(MATCH6); +- addi(index, index, 6); +- j(MATCH); +- +- bind(MATCH7); +- addi(index, index, 7); +- +- bind(MATCH); +- mv(result, index); +- bind(NOMATCH); +- BLOCK_COMMENT("} string_indexof_char_short"); +-} +- +-// StringUTF16.indexOfChar +-// StringLatin1.indexOfChar +-void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, +- Register ch, Register result, +- Register tmp1, Register tmp2, +- Register tmp3, Register tmp4, +- bool isL) +-{ +- Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; +- Register ch1 = t0; +- Register orig_cnt = t1; +- Register mask1 = tmp3; +- Register mask2 = tmp2; +- Register match_mask = tmp1; +- Register trailing_char = tmp4; +- Register unaligned_elems = tmp4; +- +- BLOCK_COMMENT("string_indexof_char {"); +- beqz(cnt1, NOMATCH); +- +- addi(t0, cnt1, isL ? -32 : -16); +- bgtz(t0, DO_LONG); +- string_indexof_char_short(str1, cnt1, ch, result, isL); +- j(DONE); +- +- bind(DO_LONG); +- mv(orig_cnt, cnt1); +- if (AvoidUnalignedAccesses) { +- Label ALIGNED; +- andi(unaligned_elems, str1, 0x7); +- beqz(unaligned_elems, ALIGNED); +- sub(unaligned_elems, unaligned_elems, 8); +- neg(unaligned_elems, unaligned_elems); +- if (!isL) { +- srli(unaligned_elems, unaligned_elems, 1); +- } +- // do unaligned part per element +- string_indexof_char_short(str1, unaligned_elems, ch, result, isL); +- bgez(result, DONE); +- mv(orig_cnt, cnt1); +- sub(cnt1, cnt1, unaligned_elems); +- bind(ALIGNED); +- } +- +- // duplicate ch +- if (isL) { +- slli(ch1, ch, 8); +- orr(ch, ch1, ch); +- } +- slli(ch1, ch, 16); +- orr(ch, ch1, ch); +- slli(ch1, ch, 32); +- orr(ch, ch1, ch); +- +- if (!isL) { +- slli(cnt1, cnt1, 1); +- } +- +- uint64_t mask0101 = UCONST64(0x0101010101010101); +- uint64_t mask0001 = UCONST64(0x0001000100010001); +- mv(mask1, isL ? mask0101 : mask0001); +- uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); +- uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); +- mv(mask2, isL ? mask7f7f : mask7fff); +- +- bind(CH1_LOOP); +- ld(ch1, Address(str1)); +- addi(str1, str1, 8); +- addi(cnt1, cnt1, -8); +- compute_match_mask(ch1, ch, match_mask, mask1, mask2); +- bnez(match_mask, HIT); +- bgtz(cnt1, CH1_LOOP); +- j(NOMATCH); +- +- bind(HIT); +- ctzc_bit(trailing_char, match_mask, isL, ch1, result); +- srli(trailing_char, trailing_char, 3); +- addi(cnt1, cnt1, 8); +- ble(cnt1, trailing_char, NOMATCH); +- // match case +- if (!isL) { +- srli(cnt1, cnt1, 1); +- srli(trailing_char, trailing_char, 1); +- } +- +- sub(result, orig_cnt, cnt1); +- add(result, result, trailing_char); +- j(DONE); +- +- bind(NOMATCH); +- mv(result, -1); +- +- bind(DONE); +- BLOCK_COMMENT("} string_indexof_char"); +-} +- +-typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); +- +-// Search for needle in haystack and return index or -1 +-// x10: result +-// x11: haystack +-// x12: haystack_len +-// x13: needle +-// x14: needle_len +-void C2_MacroAssembler::string_indexof(Register haystack, Register needle, +- Register haystack_len, Register needle_len, +- Register tmp1, Register tmp2, +- Register tmp3, Register tmp4, +- Register tmp5, Register tmp6, +- Register result, int ae) +-{ +- assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); +- +- Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; +- +- Register ch1 = t0; +- Register ch2 = t1; +- Register nlen_tmp = tmp1; // needle len tmp +- Register hlen_tmp = tmp2; // haystack len tmp +- Register result_tmp = tmp4; +- +- bool isLL = ae == StrIntrinsicNode::LL; +- +- bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; +- bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; +- int needle_chr_shift = needle_isL ? 0 : 1; +- int haystack_chr_shift = haystack_isL ? 0 : 1; +- int needle_chr_size = needle_isL ? 1 : 2; +- int haystack_chr_size = haystack_isL ? 1 : 2; +- load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : +- (load_chr_insn)&MacroAssembler::lhu; +- load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : +- (load_chr_insn)&MacroAssembler::lhu; +- +- BLOCK_COMMENT("string_indexof {"); +- +- // Note, inline_string_indexOf() generates checks: +- // if (pattern.count > src.count) return -1; +- // if (pattern.count == 0) return 0; +- +- // We have two strings, a source string in haystack, haystack_len and a pattern string +- // in needle, needle_len. Find the first occurence of pattern in source or return -1. +- +- // For larger pattern and source we use a simplified Boyer Moore algorithm. +- // With a small pattern and source we use linear scan. +- +- // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. +- sub(result_tmp, haystack_len, needle_len); +- // needle_len < 8, use linear scan +- sub(t0, needle_len, 8); +- bltz(t0, LINEARSEARCH); +- // needle_len >= 256, use linear scan +- sub(t0, needle_len, 256); +- bgez(t0, LINEARSTUB); +- // needle_len >= haystack_len/4, use linear scan +- srli(t0, haystack_len, 2); +- bge(needle_len, t0, LINEARSTUB); +- +- // Boyer-Moore-Horspool introduction: +- // The Boyer Moore alogorithm is based on the description here:- +- // +- // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm +- // +- // This describes and algorithm with 2 shift rules. The 'Bad Character' rule +- // and the 'Good Suffix' rule. +- // +- // These rules are essentially heuristics for how far we can shift the +- // pattern along the search string. +- // +- // The implementation here uses the 'Bad Character' rule only because of the +- // complexity of initialisation for the 'Good Suffix' rule. +- // +- // This is also known as the Boyer-Moore-Horspool algorithm: +- // +- // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm +- // +- // #define ASIZE 256 +- // +- // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { +- // int i, j; +- // unsigned c; +- // unsigned char bc[ASIZE]; +- // +- // /* Preprocessing */ +- // for (i = 0; i < ASIZE; ++i) +- // bc[i] = m; +- // for (i = 0; i < m - 1; ) { +- // c = pattern[i]; +- // ++i; +- // // c < 256 for Latin1 string, so, no need for branch +- // #ifdef PATTERN_STRING_IS_LATIN1 +- // bc[c] = m - i; +- // #else +- // if (c < ASIZE) bc[c] = m - i; +- // #endif +- // } +- // +- // /* Searching */ +- // j = 0; +- // while (j <= n - m) { +- // c = src[i+j]; +- // if (pattern[m-1] == c) +- // int k; +- // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); +- // if (k < 0) return j; +- // // c < 256 for Latin1 string, so, no need for branch +- // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 +- // // LL case: (c< 256) always true. Remove branch +- // j += bc[pattern[j+m-1]]; +- // #endif +- // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF +- // // UU case: need if (c if not. +- // if (c < ASIZE) +- // j += bc[pattern[j+m-1]]; +- // else +- // j += m +- // #endif +- // } +- // return -1; +- // } +- +- // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result +- Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, +- BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; +- +- Register haystack_end = haystack_len; +- Register skipch = tmp2; +- +- // pattern length is >=8, so, we can read at least 1 register for cases when +- // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for +- // UL case. We'll re-read last character in inner pre-loop code to have +- // single outer pre-loop load +- const int firstStep = isLL ? 7 : 3; +- +- const int ASIZE = 256; +- const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) +- +- sub(sp, sp, ASIZE); +- +- // init BC offset table with default value: needle_len +- slli(t0, needle_len, 8); +- orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] +- slli(tmp1, t0, 16); +- orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] +- slli(tmp1, t0, 32); +- orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] +- +- mv(ch1, sp); // ch1 is t0 +- mv(tmp6, ASIZE / STORE_BYTES); // loop iterations +- +- bind(BM_INIT_LOOP); +- // for (i = 0; i < ASIZE; ++i) +- // bc[i] = m; +- for (int i = 0; i < 4; i++) { +- sd(tmp5, Address(ch1, i * wordSize)); +- } +- add(ch1, ch1, 32); +- sub(tmp6, tmp6, 4); +- bgtz(tmp6, BM_INIT_LOOP); +- +- sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern +- Register orig_haystack = tmp5; +- mv(orig_haystack, haystack); +- // result_tmp = tmp4 +- shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); +- sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 +- mv(tmp3, needle); +- +- // for (i = 0; i < m - 1; ) { +- // c = pattern[i]; +- // ++i; +- // // c < 256 for Latin1 string, so, no need for branch +- // #ifdef PATTERN_STRING_IS_LATIN1 +- // bc[c] = m - i; +- // #else +- // if (c < ASIZE) bc[c] = m - i; +- // #endif +- // } +- bind(BCLOOP); +- (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); +- add(tmp3, tmp3, needle_chr_size); +- if (!needle_isL) { +- // ae == StrIntrinsicNode::UU +- mv(tmp6, ASIZE); +- bgeu(ch1, tmp6, BCSKIP); +- } +- add(tmp4, sp, ch1); +- sb(ch2, Address(tmp4)); // store skip offset to BC offset table +- +- bind(BCSKIP); +- sub(ch2, ch2, 1); // for next pattern element, skip distance -1 +- bgtz(ch2, BCLOOP); +- +- // tmp6: pattern end, address after needle +- shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); +- if (needle_isL == haystack_isL) { +- // load last 8 bytes (8LL/4UU symbols) +- ld(tmp6, Address(tmp6, -wordSize)); +- } else { +- // UL: from UTF-16(source) search Latin1(pattern) +- lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) +- // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d +- // We'll have to wait until load completed, but it's still faster than per-character loads+checks +- srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a +- slli(ch2, tmp6, XLEN - 24); +- srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b +- slli(ch1, tmp6, XLEN - 16); +- srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c +- andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d +- slli(ch2, ch2, 16); +- orr(ch2, ch2, ch1); // 0x00000b0c +- slli(result, tmp3, 48); // use result as temp register +- orr(tmp6, tmp6, result); // 0x0a00000d +- slli(result, ch2, 16); +- orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d +- } +- +- // i = m - 1; +- // skipch = j + i; +- // if (skipch == pattern[m - 1] +- // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); +- // else +- // move j with bad char offset table +- bind(BMLOOPSTR2); +- // compare pattern to source string backward +- shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); +- (this->*haystack_load_1chr)(skipch, Address(result), noreg); +- sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 +- if (needle_isL == haystack_isL) { +- // re-init tmp3. It's for free because it's executed in parallel with +- // load above. Alternative is to initialize it before loop, but it'll +- // affect performance on in-order systems with 2 or more ld/st pipelines +- srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] +- } +- if (!isLL) { // UU/UL case +- slli(ch2, nlen_tmp, 1); // offsets in bytes +- } +- bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char +- add(result, haystack, isLL ? nlen_tmp : ch2); +- ld(ch2, Address(result)); // load 8 bytes from source string +- mv(ch1, tmp6); +- if (isLL) { +- j(BMLOOPSTR1_AFTER_LOAD); +- } else { +- sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 +- j(BMLOOPSTR1_CMP); +- } +- +- bind(BMLOOPSTR1); +- shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); +- (this->*needle_load_1chr)(ch1, Address(ch1), noreg); +- shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); +- (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); +- +- bind(BMLOOPSTR1_AFTER_LOAD); +- sub(nlen_tmp, nlen_tmp, 1); +- bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); +- +- bind(BMLOOPSTR1_CMP); +- beq(ch1, ch2, BMLOOPSTR1); +- +- bind(BMSKIP); +- if (!isLL) { +- // if we've met UTF symbol while searching Latin1 pattern, then we can +- // skip needle_len symbols +- if (needle_isL != haystack_isL) { +- mv(result_tmp, needle_len); +- } else { +- mv(result_tmp, 1); +- } +- mv(t0, ASIZE); +- bgeu(skipch, t0, BMADV); +- } +- add(result_tmp, sp, skipch); +- lbu(result_tmp, Address(result_tmp)); // load skip offset +- +- bind(BMADV); +- sub(nlen_tmp, needle_len, 1); +- // move haystack after bad char skip offset +- shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); +- ble(haystack, haystack_end, BMLOOPSTR2); +- add(sp, sp, ASIZE); +- j(NOMATCH); +- +- bind(BMLOOPSTR1_LASTCMP); +- bne(ch1, ch2, BMSKIP); +- +- bind(BMMATCH); +- sub(result, haystack, orig_haystack); +- if (!haystack_isL) { +- srli(result, result, 1); +- } +- add(sp, sp, ASIZE); +- j(DONE); +- +- bind(LINEARSTUB); +- sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm +- bltz(t0, LINEARSEARCH); +- mv(result, zr); +- RuntimeAddress stub = NULL; +- if (isLL) { +- stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); +- assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); +- } else if (needle_isL) { +- stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); +- assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); +- } else { +- stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); +- assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); +- } +- trampoline_call(stub); +- j(DONE); +- +- bind(NOMATCH); +- mv(result, -1); +- j(DONE); +- +- bind(LINEARSEARCH); +- string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); +- +- bind(DONE); +- BLOCK_COMMENT("} string_indexof"); +-} +- +-// string_indexof +-// result: x10 +-// src: x11 +-// src_count: x12 +-// pattern: x13 +-// pattern_count: x14 or 1/2/3/4 +-void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, +- Register haystack_len, Register needle_len, +- Register tmp1, Register tmp2, +- Register tmp3, Register tmp4, +- int needle_con_cnt, Register result, int ae) +-{ +- // Note: +- // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant +- // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 +- assert(needle_con_cnt <= 4, "Invalid needle constant count"); +- assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); +- +- Register ch1 = t0; +- Register ch2 = t1; +- Register hlen_neg = haystack_len, nlen_neg = needle_len; +- Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; +- +- bool isLL = ae == StrIntrinsicNode::LL; +- +- bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; +- bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; +- int needle_chr_shift = needle_isL ? 0 : 1; +- int haystack_chr_shift = haystack_isL ? 0 : 1; +- int needle_chr_size = needle_isL ? 1 : 2; +- int haystack_chr_size = haystack_isL ? 1 : 2; +- +- load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : +- (load_chr_insn)&MacroAssembler::lhu; +- load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : +- (load_chr_insn)&MacroAssembler::lhu; +- load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; +- load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; +- +- Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; +- +- Register first = tmp3; +- +- if (needle_con_cnt == -1) { +- Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; +- +- sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); +- bltz(t0, DOSHORT); +- +- (this->*needle_load_1chr)(first, Address(needle), noreg); +- slli(t0, needle_len, needle_chr_shift); +- add(needle, needle, t0); +- neg(nlen_neg, t0); +- slli(t0, result_tmp, haystack_chr_shift); +- add(haystack, haystack, t0); +- neg(hlen_neg, t0); +- +- bind(FIRST_LOOP); +- add(t0, haystack, hlen_neg); +- (this->*haystack_load_1chr)(ch2, Address(t0), noreg); +- beq(first, ch2, STR1_LOOP); +- +- bind(STR2_NEXT); +- add(hlen_neg, hlen_neg, haystack_chr_size); +- blez(hlen_neg, FIRST_LOOP); +- j(NOMATCH); +- +- bind(STR1_LOOP); +- add(nlen_tmp, nlen_neg, needle_chr_size); +- add(hlen_tmp, hlen_neg, haystack_chr_size); +- bgez(nlen_tmp, MATCH); +- +- bind(STR1_NEXT); +- add(ch1, needle, nlen_tmp); +- (this->*needle_load_1chr)(ch1, Address(ch1), noreg); +- add(ch2, haystack, hlen_tmp); +- (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); +- bne(ch1, ch2, STR2_NEXT); +- add(nlen_tmp, nlen_tmp, needle_chr_size); +- add(hlen_tmp, hlen_tmp, haystack_chr_size); +- bltz(nlen_tmp, STR1_NEXT); +- j(MATCH); +- +- bind(DOSHORT); +- if (needle_isL == haystack_isL) { +- sub(t0, needle_len, 2); +- bltz(t0, DO1); +- bgtz(t0, DO3); +- } +- } +- +- if (needle_con_cnt == 4) { +- Label CH1_LOOP; +- (this->*load_4chr)(ch1, Address(needle), noreg); +- sub(result_tmp, haystack_len, 4); +- slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp +- add(haystack, haystack, tmp3); +- neg(hlen_neg, tmp3); +- +- bind(CH1_LOOP); +- add(ch2, haystack, hlen_neg); +- (this->*load_4chr)(ch2, Address(ch2), noreg); +- beq(ch1, ch2, MATCH); +- add(hlen_neg, hlen_neg, haystack_chr_size); +- blez(hlen_neg, CH1_LOOP); +- j(NOMATCH); +- } +- +- if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { +- Label CH1_LOOP; +- BLOCK_COMMENT("string_indexof DO2 {"); +- bind(DO2); +- (this->*load_2chr)(ch1, Address(needle), noreg); +- if (needle_con_cnt == 2) { +- sub(result_tmp, haystack_len, 2); +- } +- slli(tmp3, result_tmp, haystack_chr_shift); +- add(haystack, haystack, tmp3); +- neg(hlen_neg, tmp3); +- +- bind(CH1_LOOP); +- add(tmp3, haystack, hlen_neg); +- (this->*load_2chr)(ch2, Address(tmp3), noreg); +- beq(ch1, ch2, MATCH); +- add(hlen_neg, hlen_neg, haystack_chr_size); +- blez(hlen_neg, CH1_LOOP); +- j(NOMATCH); +- BLOCK_COMMENT("} string_indexof DO2"); +- } +- +- if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { +- Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; +- BLOCK_COMMENT("string_indexof DO3 {"); +- +- bind(DO3); +- (this->*load_2chr)(first, Address(needle), noreg); +- (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); +- if (needle_con_cnt == 3) { +- sub(result_tmp, haystack_len, 3); +- } +- slli(hlen_tmp, result_tmp, haystack_chr_shift); +- add(haystack, haystack, hlen_tmp); +- neg(hlen_neg, hlen_tmp); +- +- bind(FIRST_LOOP); +- add(ch2, haystack, hlen_neg); +- (this->*load_2chr)(ch2, Address(ch2), noreg); +- beq(first, ch2, STR1_LOOP); +- +- bind(STR2_NEXT); +- add(hlen_neg, hlen_neg, haystack_chr_size); +- blez(hlen_neg, FIRST_LOOP); +- j(NOMATCH); +- +- bind(STR1_LOOP); +- add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); +- add(ch2, haystack, hlen_tmp); +- (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); +- bne(ch1, ch2, STR2_NEXT); +- j(MATCH); +- BLOCK_COMMENT("} string_indexof DO3"); +- } +- +- if (needle_con_cnt == -1 || needle_con_cnt == 1) { +- Label DO1_LOOP; +- +- BLOCK_COMMENT("string_indexof DO1 {"); +- bind(DO1); +- (this->*needle_load_1chr)(ch1, Address(needle), noreg); +- sub(result_tmp, haystack_len, 1); +- mv(tmp3, result_tmp); +- if (haystack_chr_shift) { +- slli(tmp3, result_tmp, haystack_chr_shift); +- } +- add(haystack, haystack, tmp3); +- neg(hlen_neg, tmp3); +- +- bind(DO1_LOOP); +- add(tmp3, haystack, hlen_neg); +- (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); +- beq(ch1, ch2, MATCH); +- add(hlen_neg, hlen_neg, haystack_chr_size); +- blez(hlen_neg, DO1_LOOP); +- BLOCK_COMMENT("} string_indexof DO1"); +- } +- +- bind(NOMATCH); +- mv(result, -1); +- j(DONE); +- +- bind(MATCH); +- srai(t0, hlen_neg, haystack_chr_shift); +- add(result, result_tmp, t0); +- +- bind(DONE); +-} +- +-// Compare strings. +-void C2_MacroAssembler::string_compare(Register str1, Register str2, +- Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, +- Register tmp3, int ae) +-{ +- Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, +- DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, +- SHORT_LOOP_START, TAIL_CHECK, L; +- +- const int STUB_THRESHOLD = 64 + 8; +- bool isLL = ae == StrIntrinsicNode::LL; +- bool isLU = ae == StrIntrinsicNode::LU; +- bool isUL = ae == StrIntrinsicNode::UL; +- +- bool str1_isL = isLL || isLU; +- bool str2_isL = isLL || isUL; +- +- // for L strings, 1 byte for 1 character +- // for U strings, 2 bytes for 1 character +- int str1_chr_size = str1_isL ? 1 : 2; +- int str2_chr_size = str2_isL ? 1 : 2; +- int minCharsInWord = isLL ? wordSize : wordSize / 2; +- +- load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; +- load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; +- +- BLOCK_COMMENT("string_compare {"); +- +- // Bizzarely, the counts are passed in bytes, regardless of whether they +- // are L or U strings, however the result is always in characters. +- if (!str1_isL) { +- sraiw(cnt1, cnt1, 1); +- } +- if (!str2_isL) { +- sraiw(cnt2, cnt2, 1); +- } +- +- // Compute the minimum of the string lengths and save the difference in result. +- sub(result, cnt1, cnt2); +- bgt(cnt1, cnt2, L); +- mv(cnt2, cnt1); +- bind(L); +- +- // A very short string +- li(t0, minCharsInWord); +- ble(cnt2, t0, SHORT_STRING); +- +- // Compare longwords +- // load first parts of strings and finish initialization while loading +- { +- if (str1_isL == str2_isL) { // LL or UU +- // load 8 bytes once to compare +- ld(tmp1, Address(str1)); +- beq(str1, str2, DONE); +- ld(tmp2, Address(str2)); +- li(t0, STUB_THRESHOLD); +- bge(cnt2, t0, STUB); +- sub(cnt2, cnt2, minCharsInWord); +- beqz(cnt2, TAIL_CHECK); +- // convert cnt2 from characters to bytes +- if (!str1_isL) { +- slli(cnt2, cnt2, 1); +- } +- add(str2, str2, cnt2); +- add(str1, str1, cnt2); +- sub(cnt2, zr, cnt2); +- } else if (isLU) { // LU case +- lwu(tmp1, Address(str1)); +- ld(tmp2, Address(str2)); +- li(t0, STUB_THRESHOLD); +- bge(cnt2, t0, STUB); +- addi(cnt2, cnt2, -4); +- add(str1, str1, cnt2); +- sub(cnt1, zr, cnt2); +- slli(cnt2, cnt2, 1); +- add(str2, str2, cnt2); +- inflate_lo32(tmp3, tmp1); +- mv(tmp1, tmp3); +- sub(cnt2, zr, cnt2); +- addi(cnt1, cnt1, 4); +- } else { // UL case +- ld(tmp1, Address(str1)); +- lwu(tmp2, Address(str2)); +- li(t0, STUB_THRESHOLD); +- bge(cnt2, t0, STUB); +- addi(cnt2, cnt2, -4); +- slli(t0, cnt2, 1); +- sub(cnt1, zr, t0); +- add(str1, str1, t0); +- add(str2, str2, cnt2); +- inflate_lo32(tmp3, tmp2); +- mv(tmp2, tmp3); +- sub(cnt2, zr, cnt2); +- addi(cnt1, cnt1, 8); +- } +- addi(cnt2, cnt2, isUL ? 4 : 8); +- bgez(cnt2, TAIL); +- xorr(tmp3, tmp1, tmp2); +- bnez(tmp3, DIFFERENCE); +- +- // main loop +- bind(NEXT_WORD); +- if (str1_isL == str2_isL) { // LL or UU +- add(t0, str1, cnt2); +- ld(tmp1, Address(t0)); +- add(t0, str2, cnt2); +- ld(tmp2, Address(t0)); +- addi(cnt2, cnt2, 8); +- } else if (isLU) { // LU case +- add(t0, str1, cnt1); +- lwu(tmp1, Address(t0)); +- add(t0, str2, cnt2); +- ld(tmp2, Address(t0)); +- addi(cnt1, cnt1, 4); +- inflate_lo32(tmp3, tmp1); +- mv(tmp1, tmp3); +- addi(cnt2, cnt2, 8); +- } else { // UL case +- add(t0, str2, cnt2); +- lwu(tmp2, Address(t0)); +- add(t0, str1, cnt1); +- ld(tmp1, Address(t0)); +- inflate_lo32(tmp3, tmp2); +- mv(tmp2, tmp3); +- addi(cnt1, cnt1, 8); +- addi(cnt2, cnt2, 4); +- } +- bgez(cnt2, TAIL); +- +- xorr(tmp3, tmp1, tmp2); +- beqz(tmp3, NEXT_WORD); +- j(DIFFERENCE); +- bind(TAIL); +- xorr(tmp3, tmp1, tmp2); +- bnez(tmp3, DIFFERENCE); +- // Last longword. In the case where length == 4 we compare the +- // same longword twice, but that's still faster than another +- // conditional branch. +- if (str1_isL == str2_isL) { // LL or UU +- ld(tmp1, Address(str1)); +- ld(tmp2, Address(str2)); +- } else if (isLU) { // LU case +- lwu(tmp1, Address(str1)); +- ld(tmp2, Address(str2)); +- inflate_lo32(tmp3, tmp1); +- mv(tmp1, tmp3); +- } else { // UL case +- lwu(tmp2, Address(str2)); +- ld(tmp1, Address(str1)); +- inflate_lo32(tmp3, tmp2); +- mv(tmp2, tmp3); +- } +- bind(TAIL_CHECK); +- xorr(tmp3, tmp1, tmp2); +- beqz(tmp3, DONE); +- +- // Find the first different characters in the longwords and +- // compute their difference. +- bind(DIFFERENCE); +- ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb +- srl(tmp1, tmp1, result); +- srl(tmp2, tmp2, result); +- if (isLL) { +- andi(tmp1, tmp1, 0xFF); +- andi(tmp2, tmp2, 0xFF); +- } else { +- andi(tmp1, tmp1, 0xFFFF); +- andi(tmp2, tmp2, 0xFFFF); +- } +- sub(result, tmp1, tmp2); +- j(DONE); +- } +- +- bind(STUB); +- RuntimeAddress stub = NULL; +- switch (ae) { +- case StrIntrinsicNode::LL: +- stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); +- break; +- case StrIntrinsicNode::UU: +- stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); +- break; +- case StrIntrinsicNode::LU: +- stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); +- break; +- case StrIntrinsicNode::UL: +- stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); +- break; +- default: +- ShouldNotReachHere(); +- } +- assert(stub.target() != NULL, "compare_long_string stub has not been generated"); +- trampoline_call(stub); +- j(DONE); +- +- bind(SHORT_STRING); +- // Is the minimum length zero? +- beqz(cnt2, DONE); +- // arrange code to do most branches while loading and loading next characters +- // while comparing previous +- (this->*str1_load_chr)(tmp1, Address(str1), t0); +- addi(str1, str1, str1_chr_size); +- addi(cnt2, cnt2, -1); +- beqz(cnt2, SHORT_LAST_INIT); +- (this->*str2_load_chr)(cnt1, Address(str2), t0); +- addi(str2, str2, str2_chr_size); +- j(SHORT_LOOP_START); +- bind(SHORT_LOOP); +- addi(cnt2, cnt2, -1); +- beqz(cnt2, SHORT_LAST); +- bind(SHORT_LOOP_START); +- (this->*str1_load_chr)(tmp2, Address(str1), t0); +- addi(str1, str1, str1_chr_size); +- (this->*str2_load_chr)(t0, Address(str2), t0); +- addi(str2, str2, str2_chr_size); +- bne(tmp1, cnt1, SHORT_LOOP_TAIL); +- addi(cnt2, cnt2, -1); +- beqz(cnt2, SHORT_LAST2); +- (this->*str1_load_chr)(tmp1, Address(str1), t0); +- addi(str1, str1, str1_chr_size); +- (this->*str2_load_chr)(cnt1, Address(str2), t0); +- addi(str2, str2, str2_chr_size); +- beq(tmp2, t0, SHORT_LOOP); +- sub(result, tmp2, t0); +- j(DONE); +- bind(SHORT_LOOP_TAIL); +- sub(result, tmp1, cnt1); +- j(DONE); +- bind(SHORT_LAST2); +- beq(tmp2, t0, DONE); +- sub(result, tmp2, t0); +- +- j(DONE); +- bind(SHORT_LAST_INIT); +- (this->*str2_load_chr)(cnt1, Address(str2), t0); +- addi(str2, str2, str2_chr_size); +- bind(SHORT_LAST); +- beq(tmp1, cnt1, DONE); +- sub(result, tmp1, cnt1); +- +- bind(DONE); +- +- BLOCK_COMMENT("} string_compare"); +-} +- +-void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, +- Register tmp4, Register tmp5, Register tmp6, Register result, +- Register cnt1, int elem_size) { +- Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; +- Register tmp1 = t0; +- Register tmp2 = t1; +- Register cnt2 = tmp2; // cnt2 only used in array length compare +- Register elem_per_word = tmp6; +- int log_elem_size = exact_log2(elem_size); +- int length_offset = arrayOopDesc::length_offset_in_bytes(); +- int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); +- +- assert(elem_size == 1 || elem_size == 2, "must be char or byte"); +- assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); +- li(elem_per_word, wordSize / elem_size); +- +- BLOCK_COMMENT("arrays_equals {"); +- +- // if (a1 == a2), return true +- beq(a1, a2, SAME); +- +- mv(result, false); +- beqz(a1, DONE); +- beqz(a2, DONE); +- lwu(cnt1, Address(a1, length_offset)); +- lwu(cnt2, Address(a2, length_offset)); +- bne(cnt2, cnt1, DONE); +- beqz(cnt1, SAME); +- +- slli(tmp5, cnt1, 3 + log_elem_size); +- sub(tmp5, zr, tmp5); +- add(a1, a1, base_offset); +- add(a2, a2, base_offset); +- ld(tmp3, Address(a1, 0)); +- ld(tmp4, Address(a2, 0)); +- ble(cnt1, elem_per_word, SHORT); // short or same +- +- // Main 16 byte comparison loop with 2 exits +- bind(NEXT_DWORD); { +- ld(tmp1, Address(a1, wordSize)); +- ld(tmp2, Address(a2, wordSize)); +- sub(cnt1, cnt1, 2 * wordSize / elem_size); +- blez(cnt1, TAIL); +- bne(tmp3, tmp4, DONE); +- ld(tmp3, Address(a1, 2 * wordSize)); +- ld(tmp4, Address(a2, 2 * wordSize)); +- add(a1, a1, 2 * wordSize); +- add(a2, a2, 2 * wordSize); +- ble(cnt1, elem_per_word, TAIL2); +- } beq(tmp1, tmp2, NEXT_DWORD); +- j(DONE); +- +- bind(TAIL); +- xorr(tmp4, tmp3, tmp4); +- xorr(tmp2, tmp1, tmp2); +- sll(tmp2, tmp2, tmp5); +- orr(tmp5, tmp4, tmp2); +- j(IS_TMP5_ZR); +- +- bind(TAIL2); +- bne(tmp1, tmp2, DONE); +- +- bind(SHORT); +- xorr(tmp4, tmp3, tmp4); +- sll(tmp5, tmp4, tmp5); +- +- bind(IS_TMP5_ZR); +- bnez(tmp5, DONE); +- +- bind(SAME); +- mv(result, true); +- // That's it. +- bind(DONE); +- +- BLOCK_COMMENT("} array_equals"); +-} +- +-// Compare Strings +- +-// For Strings we're passed the address of the first characters in a1 +-// and a2 and the length in cnt1. +-// elem_size is the element size in bytes: either 1 or 2. +-// There are two implementations. For arrays >= 8 bytes, all +-// comparisons (including the final one, which may overlap) are +-// performed 8 bytes at a time. For strings < 8 bytes, we compare a +-// halfword, then a short, and then a byte. +- +-void C2_MacroAssembler::string_equals(Register a1, Register a2, +- Register result, Register cnt1, int elem_size) +-{ +- Label SAME, DONE, SHORT, NEXT_WORD; +- Register tmp1 = t0; +- Register tmp2 = t1; +- +- assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); +- assert_different_registers(a1, a2, result, cnt1, t0, t1); +- +- BLOCK_COMMENT("string_equals {"); +- +- mv(result, false); +- +- // Check for short strings, i.e. smaller than wordSize. +- sub(cnt1, cnt1, wordSize); +- bltz(cnt1, SHORT); +- +- // Main 8 byte comparison loop. +- bind(NEXT_WORD); { +- ld(tmp1, Address(a1, 0)); +- add(a1, a1, wordSize); +- ld(tmp2, Address(a2, 0)); +- add(a2, a2, wordSize); +- sub(cnt1, cnt1, wordSize); +- bne(tmp1, tmp2, DONE); +- } bgtz(cnt1, NEXT_WORD); +- +- // Last longword. In the case where length == 4 we compare the +- // same longword twice, but that's still faster than another +- // conditional branch. +- // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when +- // length == 4. +- add(tmp1, a1, cnt1); +- ld(tmp1, Address(tmp1, 0)); +- add(tmp2, a2, cnt1); +- ld(tmp2, Address(tmp2, 0)); +- bne(tmp1, tmp2, DONE); +- j(SAME); +- +- bind(SHORT); +- Label TAIL03, TAIL01; +- +- // 0-7 bytes left. +- andi(t0, cnt1, 4); +- beqz(t0, TAIL03); +- { +- lwu(tmp1, Address(a1, 0)); +- add(a1, a1, 4); +- lwu(tmp2, Address(a2, 0)); +- add(a2, a2, 4); +- bne(tmp1, tmp2, DONE); +- } +- +- bind(TAIL03); +- // 0-3 bytes left. +- andi(t0, cnt1, 2); +- beqz(t0, TAIL01); +- { +- lhu(tmp1, Address(a1, 0)); +- add(a1, a1, 2); +- lhu(tmp2, Address(a2, 0)); +- add(a2, a2, 2); +- bne(tmp1, tmp2, DONE); +- } +- +- bind(TAIL01); +- if (elem_size == 1) { // Only needed when comparing 1-byte elements +- // 0-1 bytes left. +- andi(t0, cnt1, 1); +- beqz(t0, SAME); +- { +- lbu(tmp1, a1, 0); +- lbu(tmp2, a2, 0); +- bne(tmp1, tmp2, DONE); +- } +- } +- +- // Arrays are equal. +- bind(SAME); +- mv(result, true); +- +- // That's it. +- bind(DONE); +- BLOCK_COMMENT("} string_equals"); +-} +- +-typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); +-typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, +- bool is_far, bool is_unordered); +- +-static conditional_branch_insn conditional_branches[] = +-{ +- /* SHORT branches */ +- (conditional_branch_insn)&Assembler::beq, +- (conditional_branch_insn)&Assembler::bgt, +- NULL, // BoolTest::overflow +- (conditional_branch_insn)&Assembler::blt, +- (conditional_branch_insn)&Assembler::bne, +- (conditional_branch_insn)&Assembler::ble, +- NULL, // BoolTest::no_overflow +- (conditional_branch_insn)&Assembler::bge, +- +- /* UNSIGNED branches */ +- (conditional_branch_insn)&Assembler::beq, +- (conditional_branch_insn)&Assembler::bgtu, +- NULL, +- (conditional_branch_insn)&Assembler::bltu, +- (conditional_branch_insn)&Assembler::bne, +- (conditional_branch_insn)&Assembler::bleu, +- NULL, +- (conditional_branch_insn)&Assembler::bgeu +-}; +- +-static float_conditional_branch_insn float_conditional_branches[] = +-{ +- /* FLOAT SHORT branches */ +- (float_conditional_branch_insn)&MacroAssembler::float_beq, +- (float_conditional_branch_insn)&MacroAssembler::float_bgt, +- NULL, // BoolTest::overflow +- (float_conditional_branch_insn)&MacroAssembler::float_blt, +- (float_conditional_branch_insn)&MacroAssembler::float_bne, +- (float_conditional_branch_insn)&MacroAssembler::float_ble, +- NULL, // BoolTest::no_overflow +- (float_conditional_branch_insn)&MacroAssembler::float_bge, +- +- /* DOUBLE SHORT branches */ +- (float_conditional_branch_insn)&MacroAssembler::double_beq, +- (float_conditional_branch_insn)&MacroAssembler::double_bgt, +- NULL, +- (float_conditional_branch_insn)&MacroAssembler::double_blt, +- (float_conditional_branch_insn)&MacroAssembler::double_bne, +- (float_conditional_branch_insn)&MacroAssembler::double_ble, +- NULL, +- (float_conditional_branch_insn)&MacroAssembler::double_bge +-}; +- +-void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { +- assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), +- "invalid conditional branch index"); +- (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); +-} +- +-// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use +-// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). +-void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { +- assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), +- "invalid float conditional branch index"); +- int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask); +- (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, +- (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); +-} +- +-void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { +- switch (cmpFlag) { +- case BoolTest::eq: +- case BoolTest::le: +- beqz(op1, L, is_far); +- break; +- case BoolTest::ne: +- case BoolTest::gt: +- bnez(op1, L, is_far); +- break; +- default: +- ShouldNotReachHere(); +- } +-} +- +-void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { +- switch (cmpFlag) { +- case BoolTest::eq: +- beqz(op1, L, is_far); +- break; +- case BoolTest::ne: +- bnez(op1, L, is_far); +- break; +- default: +- ShouldNotReachHere(); +- } +-} +- +-void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { +- Label L; +- cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); +- mv(dst, src); +- bind(L); +-} +- +-// Set dst to NaN if any NaN input. +-void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, +- bool is_double, bool is_min) { +- assert_different_registers(dst, src1, src2); +- +- Label Done; +- fsflags(zr); +- if (is_double) { +- is_min ? fmin_d(dst, src1, src2) +- : fmax_d(dst, src1, src2); +- // Checking NaNs +- flt_d(zr, src1, src2); +- } else { +- is_min ? fmin_s(dst, src1, src2) +- : fmax_s(dst, src1, src2); +- // Checking NaNs +- flt_s(zr, src1, src2); +- } +- +- frflags(t0); +- beqz(t0, Done); +- +- // In case of NaNs +- is_double ? fadd_d(dst, src1, src2) +- : fadd_s(dst, src1, src2); +- +- bind(Done); +-} +diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +deleted file mode 100644 +index 90b6554af02..00000000000 +--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp ++++ /dev/null +@@ -1,141 +0,0 @@ +-/* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#ifndef CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP +-#define CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP +- +-// C2_MacroAssembler contains high-level macros for C2 +- +- public: +- +- void string_compare(Register str1, Register str2, +- Register cnt1, Register cnt2, Register result, +- Register tmp1, Register tmp2, Register tmp3, +- int ae); +- +- void string_indexof_char_short(Register str1, Register cnt1, +- Register ch, Register result, +- bool isL); +- +- void string_indexof_char(Register str1, Register cnt1, +- Register ch, Register result, +- Register tmp1, Register tmp2, +- Register tmp3, Register tmp4, +- bool isL); +- +- void string_indexof(Register str1, Register str2, +- Register cnt1, Register cnt2, +- Register tmp1, Register tmp2, +- Register tmp3, Register tmp4, +- Register tmp5, Register tmp6, +- Register result, int ae); +- +- void string_indexof_linearscan(Register haystack, Register needle, +- Register haystack_len, Register needle_len, +- Register tmp1, Register tmp2, +- Register tmp3, Register tmp4, +- int needle_con_cnt, Register result, int ae); +- +- void arrays_equals(Register r1, Register r2, +- Register tmp3, Register tmp4, +- Register tmp5, Register tmp6, +- Register result, Register cnt1, +- int elem_size); +- +- void string_equals(Register r1, Register r2, +- Register result, Register cnt1, +- int elem_size); +- +- // refer to conditional_branches and float_conditional_branches +- static const int bool_test_bits = 3; +- static const int neg_cond_bits = 2; +- static const int unsigned_branch_mask = 1 << bool_test_bits; +- static const int double_branch_mask = 1 << bool_test_bits; +- +- // cmp +- void cmp_branch(int cmpFlag, +- Register op1, Register op2, +- Label& label, bool is_far = false); +- +- void float_cmp_branch(int cmpFlag, +- FloatRegister op1, FloatRegister op2, +- Label& label, bool is_far = false); +- +- void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op, +- Label& L, bool is_far = false); +- +- void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op, +- Label& L, bool is_far = false); +- +- void enc_cmove(int cmpFlag, +- Register op1, Register op2, +- Register dst, Register src); +- +- void spill(Register r, bool is64, int offset) { +- is64 ? sd(r, Address(sp, offset)) +- : sw(r, Address(sp, offset)); +- } +- +- void spill(FloatRegister f, bool is64, int offset) { +- is64 ? fsd(f, Address(sp, offset)) +- : fsw(f, Address(sp, offset)); +- } +- +- void spill(VectorRegister v, int offset) { +- add(t0, sp, offset); +- vs1r_v(v, t0); +- } +- +- void unspill(Register r, bool is64, int offset) { +- is64 ? ld(r, Address(sp, offset)) +- : lw(r, Address(sp, offset)); +- } +- +- void unspillu(Register r, bool is64, int offset) { +- is64 ? ld(r, Address(sp, offset)) +- : lwu(r, Address(sp, offset)); +- } +- +- void unspill(FloatRegister f, bool is64, int offset) { +- is64 ? fld(f, Address(sp, offset)) +- : flw(f, Address(sp, offset)); +- } +- +- void unspill(VectorRegister v, int offset) { +- add(t0, sp, offset); +- vl1r_v(v, t0); +- } +- +- void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) { +- assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size"); +- unspill(v0, src_offset); +- spill(v0, dst_offset); +- } +- +- void minmax_FD(FloatRegister dst, +- FloatRegister src1, FloatRegister src2, +- bool is_double, bool is_min); +- +-#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 304b6f2b06c..d175a62aeeb 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -4125,3 +4125,1285 @@ void MacroAssembler::safepoint_ifence() { + ifence(); + } + ++#ifdef COMPILER2 ++// short string ++// StringUTF16.indexOfChar ++// StringLatin1.indexOfChar ++void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, ++ Register ch, Register result, ++ bool isL) ++{ ++ Register ch1 = t0; ++ Register index = t1; ++ ++ BLOCK_COMMENT("string_indexof_char_short {"); ++ ++ Label LOOP, LOOP1, LOOP4, LOOP8; ++ Label MATCH, MATCH1, MATCH2, MATCH3, ++ MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; ++ ++ mv(result, -1); ++ mv(index, zr); ++ ++ bind(LOOP); ++ addi(t0, index, 8); ++ ble(t0, cnt1, LOOP8); ++ addi(t0, index, 4); ++ ble(t0, cnt1, LOOP4); ++ j(LOOP1); ++ ++ bind(LOOP8); ++ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); ++ beq(ch, ch1, MATCH); ++ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); ++ beq(ch, ch1, MATCH1); ++ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); ++ beq(ch, ch1, MATCH2); ++ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); ++ beq(ch, ch1, MATCH3); ++ isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); ++ beq(ch, ch1, MATCH4); ++ isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); ++ beq(ch, ch1, MATCH5); ++ isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); ++ beq(ch, ch1, MATCH6); ++ isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); ++ beq(ch, ch1, MATCH7); ++ addi(index, index, 8); ++ addi(str1, str1, isL ? 8 : 16); ++ blt(index, cnt1, LOOP); ++ j(NOMATCH); ++ ++ bind(LOOP4); ++ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); ++ beq(ch, ch1, MATCH); ++ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); ++ beq(ch, ch1, MATCH1); ++ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); ++ beq(ch, ch1, MATCH2); ++ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); ++ beq(ch, ch1, MATCH3); ++ addi(index, index, 4); ++ addi(str1, str1, isL ? 4 : 8); ++ bge(index, cnt1, NOMATCH); ++ ++ bind(LOOP1); ++ isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); ++ beq(ch, ch1, MATCH); ++ addi(index, index, 1); ++ addi(str1, str1, isL ? 1 : 2); ++ blt(index, cnt1, LOOP1); ++ j(NOMATCH); ++ ++ bind(MATCH1); ++ addi(index, index, 1); ++ j(MATCH); ++ ++ bind(MATCH2); ++ addi(index, index, 2); ++ j(MATCH); ++ ++ bind(MATCH3); ++ addi(index, index, 3); ++ j(MATCH); ++ ++ bind(MATCH4); ++ addi(index, index, 4); ++ j(MATCH); ++ ++ bind(MATCH5); ++ addi(index, index, 5); ++ j(MATCH); ++ ++ bind(MATCH6); ++ addi(index, index, 6); ++ j(MATCH); ++ ++ bind(MATCH7); ++ addi(index, index, 7); ++ ++ bind(MATCH); ++ mv(result, index); ++ bind(NOMATCH); ++ BLOCK_COMMENT("} string_indexof_char_short"); ++} ++ ++// StringUTF16.indexOfChar ++// StringLatin1.indexOfChar ++void MacroAssembler::string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ bool isL) ++{ ++ Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; ++ Register ch1 = t0; ++ Register orig_cnt = t1; ++ Register mask1 = tmp3; ++ Register mask2 = tmp2; ++ Register match_mask = tmp1; ++ Register trailing_char = tmp4; ++ Register unaligned_elems = tmp4; ++ ++ BLOCK_COMMENT("string_indexof_char {"); ++ beqz(cnt1, NOMATCH); ++ ++ addi(t0, cnt1, isL ? -32 : -16); ++ bgtz(t0, DO_LONG); ++ string_indexof_char_short(str1, cnt1, ch, result, isL); ++ j(DONE); ++ ++ bind(DO_LONG); ++ mv(orig_cnt, cnt1); ++ if (AvoidUnalignedAccesses) { ++ Label ALIGNED; ++ andi(unaligned_elems, str1, 0x7); ++ beqz(unaligned_elems, ALIGNED); ++ sub(unaligned_elems, unaligned_elems, 8); ++ neg(unaligned_elems, unaligned_elems); ++ if (!isL) { ++ srli(unaligned_elems, unaligned_elems, 1); ++ } ++ // do unaligned part per element ++ string_indexof_char_short(str1, unaligned_elems, ch, result, isL); ++ bgez(result, DONE); ++ mv(orig_cnt, cnt1); ++ sub(cnt1, cnt1, unaligned_elems); ++ bind(ALIGNED); ++ } ++ ++ // duplicate ch ++ if (isL) { ++ slli(ch1, ch, 8); ++ orr(ch, ch1, ch); ++ } ++ slli(ch1, ch, 16); ++ orr(ch, ch1, ch); ++ slli(ch1, ch, 32); ++ orr(ch, ch1, ch); ++ ++ if (!isL) { ++ slli(cnt1, cnt1, 1); ++ } ++ ++ uint64_t mask0101 = UCONST64(0x0101010101010101); ++ uint64_t mask0001 = UCONST64(0x0001000100010001); ++ mv(mask1, isL ? mask0101 : mask0001); ++ uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); ++ uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); ++ mv(mask2, isL ? mask7f7f : mask7fff); ++ ++ bind(CH1_LOOP); ++ ld(ch1, Address(str1)); ++ addi(str1, str1, 8); ++ addi(cnt1, cnt1, -8); ++ compute_match_mask(ch1, ch, match_mask, mask1, mask2); ++ bnez(match_mask, HIT); ++ bgtz(cnt1, CH1_LOOP); ++ j(NOMATCH); ++ ++ bind(HIT); ++ ctzc_bit(trailing_char, match_mask, isL, ch1, result); ++ srli(trailing_char, trailing_char, 3); ++ addi(cnt1, cnt1, 8); ++ ble(cnt1, trailing_char, NOMATCH); ++ // match case ++ if (!isL) { ++ srli(cnt1, cnt1, 1); ++ srli(trailing_char, trailing_char, 1); ++ } ++ ++ sub(result, orig_cnt, cnt1); ++ add(result, result, trailing_char); ++ j(DONE); ++ ++ bind(NOMATCH); ++ mv(result, -1); ++ ++ bind(DONE); ++ BLOCK_COMMENT("} string_indexof_char"); ++} ++ ++typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); ++ ++// Search for needle in haystack and return index or -1 ++// x10: result ++// x11: haystack ++// x12: haystack_len ++// x13: needle ++// x14: needle_len ++void MacroAssembler::string_indexof(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, int ae) ++{ ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); ++ ++ Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; ++ ++ Register ch1 = t0; ++ Register ch2 = t1; ++ Register nlen_tmp = tmp1; // needle len tmp ++ Register hlen_tmp = tmp2; // haystack len tmp ++ Register result_tmp = tmp4; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ ++ BLOCK_COMMENT("string_indexof {"); ++ ++ // Note, inline_string_indexOf() generates checks: ++ // if (pattern.count > src.count) return -1; ++ // if (pattern.count == 0) return 0; ++ ++ // We have two strings, a source string in haystack, haystack_len and a pattern string ++ // in needle, needle_len. Find the first occurence of pattern in source or return -1. ++ ++ // For larger pattern and source we use a simplified Boyer Moore algorithm. ++ // With a small pattern and source we use linear scan. ++ ++ // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. ++ sub(result_tmp, haystack_len, needle_len); ++ // needle_len < 8, use linear scan ++ sub(t0, needle_len, 8); ++ bltz(t0, LINEARSEARCH); ++ // needle_len >= 256, use linear scan ++ sub(t0, needle_len, 256); ++ bgez(t0, LINEARSTUB); ++ // needle_len >= haystack_len/4, use linear scan ++ srli(t0, haystack_len, 2); ++ bge(needle_len, t0, LINEARSTUB); ++ ++ // Boyer-Moore-Horspool introduction: ++ // The Boyer Moore alogorithm is based on the description here:- ++ // ++ // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm ++ // ++ // This describes and algorithm with 2 shift rules. The 'Bad Character' rule ++ // and the 'Good Suffix' rule. ++ // ++ // These rules are essentially heuristics for how far we can shift the ++ // pattern along the search string. ++ // ++ // The implementation here uses the 'Bad Character' rule only because of the ++ // complexity of initialisation for the 'Good Suffix' rule. ++ // ++ // This is also known as the Boyer-Moore-Horspool algorithm: ++ // ++ // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm ++ // ++ // #define ASIZE 256 ++ // ++ // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { ++ // int i, j; ++ // unsigned c; ++ // unsigned char bc[ASIZE]; ++ // ++ // /* Preprocessing */ ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ // ++ // /* Searching */ ++ // j = 0; ++ // while (j <= n - m) { ++ // c = src[i+j]; ++ // if (pattern[m-1] == c) ++ // int k; ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // if (k < 0) return j; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 ++ // // LL case: (c< 256) always true. Remove branch ++ // j += bc[pattern[j+m-1]]; ++ // #endif ++ // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF ++ // // UU case: need if (c if not. ++ // if (c < ASIZE) ++ // j += bc[pattern[j+m-1]]; ++ // else ++ // j += m ++ // #endif ++ // } ++ // return -1; ++ // } ++ ++ // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result ++ Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, ++ BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; ++ ++ Register haystack_end = haystack_len; ++ Register skipch = tmp2; ++ ++ // pattern length is >=8, so, we can read at least 1 register for cases when ++ // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for ++ // UL case. We'll re-read last character in inner pre-loop code to have ++ // single outer pre-loop load ++ const int firstStep = isLL ? 7 : 3; ++ ++ const int ASIZE = 256; ++ const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) ++ ++ sub(sp, sp, ASIZE); ++ ++ // init BC offset table with default value: needle_len ++ slli(t0, needle_len, 8); ++ orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] ++ slli(tmp1, t0, 16); ++ orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] ++ slli(tmp1, t0, 32); ++ orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] ++ ++ mv(ch1, sp); // ch1 is t0 ++ mv(tmp6, ASIZE / STORE_BYTES); // loop iterations ++ ++ bind(BM_INIT_LOOP); ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ for (int i = 0; i < 4; i++) { ++ sd(tmp5, Address(ch1, i * wordSize)); ++ } ++ add(ch1, ch1, 32); ++ sub(tmp6, tmp6, 4); ++ bgtz(tmp6, BM_INIT_LOOP); ++ ++ sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern ++ Register orig_haystack = tmp5; ++ mv(orig_haystack, haystack); ++ // result_tmp = tmp4 ++ shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); ++ sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 ++ mv(tmp3, needle); ++ ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ bind(BCLOOP); ++ (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); ++ add(tmp3, tmp3, needle_chr_size); ++ if (!needle_isL) { ++ // ae == StrIntrinsicNode::UU ++ mv(tmp6, ASIZE); ++ bgeu(ch1, tmp6, BCSKIP); ++ } ++ add(tmp4, sp, ch1); ++ sb(ch2, Address(tmp4)); // store skip offset to BC offset table ++ ++ bind(BCSKIP); ++ sub(ch2, ch2, 1); // for next pattern element, skip distance -1 ++ bgtz(ch2, BCLOOP); ++ ++ // tmp6: pattern end, address after needle ++ shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); ++ if (needle_isL == haystack_isL) { ++ // load last 8 bytes (8LL/4UU symbols) ++ ld(tmp6, Address(tmp6, -wordSize)); ++ } else { ++ // UL: from UTF-16(source) search Latin1(pattern) ++ lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) ++ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d ++ // We'll have to wait until load completed, but it's still faster than per-character loads+checks ++ srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a ++ slli(ch2, tmp6, XLEN - 24); ++ srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b ++ slli(ch1, tmp6, XLEN - 16); ++ srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c ++ andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d ++ slli(ch2, ch2, 16); ++ orr(ch2, ch2, ch1); // 0x00000b0c ++ slli(result, tmp3, 48); // use result as temp register ++ orr(tmp6, tmp6, result); // 0x0a00000d ++ slli(result, ch2, 16); ++ orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d ++ } ++ ++ // i = m - 1; ++ // skipch = j + i; ++ // if (skipch == pattern[m - 1] ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // else ++ // move j with bad char offset table ++ bind(BMLOOPSTR2); ++ // compare pattern to source string backward ++ shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); ++ (this->*haystack_load_1chr)(skipch, Address(result), noreg); ++ sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 ++ if (needle_isL == haystack_isL) { ++ // re-init tmp3. It's for free because it's executed in parallel with ++ // load above. Alternative is to initialize it before loop, but it'll ++ // affect performance on in-order systems with 2 or more ld/st pipelines ++ srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] ++ } ++ if (!isLL) { // UU/UL case ++ slli(ch2, nlen_tmp, 1); // offsets in bytes ++ } ++ bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char ++ add(result, haystack, isLL ? nlen_tmp : ch2); ++ ld(ch2, Address(result)); // load 8 bytes from source string ++ mv(ch1, tmp6); ++ if (isLL) { ++ j(BMLOOPSTR1_AFTER_LOAD); ++ } else { ++ sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 ++ j(BMLOOPSTR1_CMP); ++ } ++ ++ bind(BMLOOPSTR1); ++ shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); ++ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); ++ shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ ++ bind(BMLOOPSTR1_AFTER_LOAD); ++ sub(nlen_tmp, nlen_tmp, 1); ++ bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); ++ ++ bind(BMLOOPSTR1_CMP); ++ beq(ch1, ch2, BMLOOPSTR1); ++ ++ bind(BMSKIP); ++ if (!isLL) { ++ // if we've met UTF symbol while searching Latin1 pattern, then we can ++ // skip needle_len symbols ++ if (needle_isL != haystack_isL) { ++ mv(result_tmp, needle_len); ++ } else { ++ mv(result_tmp, 1); ++ } ++ mv(t0, ASIZE); ++ bgeu(skipch, t0, BMADV); ++ } ++ add(result_tmp, sp, skipch); ++ lbu(result_tmp, Address(result_tmp)); // load skip offset ++ ++ bind(BMADV); ++ sub(nlen_tmp, needle_len, 1); ++ // move haystack after bad char skip offset ++ shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); ++ ble(haystack, haystack_end, BMLOOPSTR2); ++ add(sp, sp, ASIZE); ++ j(NOMATCH); ++ ++ bind(BMLOOPSTR1_LASTCMP); ++ bne(ch1, ch2, BMSKIP); ++ ++ bind(BMMATCH); ++ sub(result, haystack, orig_haystack); ++ if (!haystack_isL) { ++ srli(result, result, 1); ++ } ++ add(sp, sp, ASIZE); ++ j(DONE); ++ ++ bind(LINEARSTUB); ++ sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm ++ bltz(t0, LINEARSEARCH); ++ mv(result, zr); ++ RuntimeAddress stub = NULL; ++ if (isLL) { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); ++ assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); ++ } else if (needle_isL) { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); ++ assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); ++ } else { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); ++ assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); ++ } ++ trampoline_call(stub); ++ j(DONE); ++ ++ bind(NOMATCH); ++ mv(result, -1); ++ j(DONE); ++ ++ bind(LINEARSEARCH); ++ string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); ++ ++ bind(DONE); ++ BLOCK_COMMENT("} string_indexof"); ++} ++ ++// string_indexof ++// result: x10 ++// src: x11 ++// src_count: x12 ++// pattern: x13 ++// pattern_count: x14 or 1/2/3/4 ++void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ int needle_con_cnt, Register result, int ae) ++{ ++ // Note: ++ // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant ++ // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 ++ assert(needle_con_cnt <= 4, "Invalid needle constant count"); ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); ++ ++ Register ch1 = t0; ++ Register ch2 = t1; ++ Register hlen_neg = haystack_len, nlen_neg = needle_len; ++ Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; ++ load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; ++ ++ Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; ++ ++ Register first = tmp3; ++ ++ if (needle_con_cnt == -1) { ++ Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; ++ ++ sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); ++ bltz(t0, DOSHORT); ++ ++ (this->*needle_load_1chr)(first, Address(needle), noreg); ++ slli(t0, needle_len, needle_chr_shift); ++ add(needle, needle, t0); ++ neg(nlen_neg, t0); ++ slli(t0, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, t0); ++ neg(hlen_neg, t0); ++ ++ bind(FIRST_LOOP); ++ add(t0, haystack, hlen_neg); ++ (this->*haystack_load_1chr)(ch2, Address(t0), noreg); ++ beq(first, ch2, STR1_LOOP); ++ ++ bind(STR2_NEXT); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, FIRST_LOOP); ++ j(NOMATCH); ++ ++ bind(STR1_LOOP); ++ add(nlen_tmp, nlen_neg, needle_chr_size); ++ add(hlen_tmp, hlen_neg, haystack_chr_size); ++ bgez(nlen_tmp, MATCH); ++ ++ bind(STR1_NEXT); ++ add(ch1, needle, nlen_tmp); ++ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); ++ add(ch2, haystack, hlen_tmp); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ bne(ch1, ch2, STR2_NEXT); ++ add(nlen_tmp, nlen_tmp, needle_chr_size); ++ add(hlen_tmp, hlen_tmp, haystack_chr_size); ++ bltz(nlen_tmp, STR1_NEXT); ++ j(MATCH); ++ ++ bind(DOSHORT); ++ if (needle_isL == haystack_isL) { ++ sub(t0, needle_len, 2); ++ bltz(t0, DO1); ++ bgtz(t0, DO3); ++ } ++ } ++ ++ if (needle_con_cnt == 4) { ++ Label CH1_LOOP; ++ (this->*load_4chr)(ch1, Address(needle), noreg); ++ sub(result_tmp, haystack_len, 4); ++ slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); ++ ++ bind(CH1_LOOP); ++ add(ch2, haystack, hlen_neg); ++ (this->*load_4chr)(ch2, Address(ch2), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, CH1_LOOP); ++ j(NOMATCH); ++ } ++ ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { ++ Label CH1_LOOP; ++ BLOCK_COMMENT("string_indexof DO2 {"); ++ bind(DO2); ++ (this->*load_2chr)(ch1, Address(needle), noreg); ++ if (needle_con_cnt == 2) { ++ sub(result_tmp, haystack_len, 2); ++ } ++ slli(tmp3, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); ++ ++ bind(CH1_LOOP); ++ add(tmp3, haystack, hlen_neg); ++ (this->*load_2chr)(ch2, Address(tmp3), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, CH1_LOOP); ++ j(NOMATCH); ++ BLOCK_COMMENT("} string_indexof DO2"); ++ } ++ ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { ++ Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; ++ BLOCK_COMMENT("string_indexof DO3 {"); ++ ++ bind(DO3); ++ (this->*load_2chr)(first, Address(needle), noreg); ++ (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); ++ if (needle_con_cnt == 3) { ++ sub(result_tmp, haystack_len, 3); ++ } ++ slli(hlen_tmp, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, hlen_tmp); ++ neg(hlen_neg, hlen_tmp); ++ ++ bind(FIRST_LOOP); ++ add(ch2, haystack, hlen_neg); ++ (this->*load_2chr)(ch2, Address(ch2), noreg); ++ beq(first, ch2, STR1_LOOP); ++ ++ bind(STR2_NEXT); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, FIRST_LOOP); ++ j(NOMATCH); ++ ++ bind(STR1_LOOP); ++ add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); ++ add(ch2, haystack, hlen_tmp); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ bne(ch1, ch2, STR2_NEXT); ++ j(MATCH); ++ BLOCK_COMMENT("} string_indexof DO3"); ++ } ++ ++ if (needle_con_cnt == -1 || needle_con_cnt == 1) { ++ Label DO1_LOOP; ++ ++ BLOCK_COMMENT("string_indexof DO1 {"); ++ bind(DO1); ++ (this->*needle_load_1chr)(ch1, Address(needle), noreg); ++ sub(result_tmp, haystack_len, 1); ++ mv(tmp3, result_tmp); ++ if (haystack_chr_shift) { ++ slli(tmp3, result_tmp, haystack_chr_shift); ++ } ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); ++ ++ bind(DO1_LOOP); ++ add(tmp3, haystack, hlen_neg); ++ (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, DO1_LOOP); ++ BLOCK_COMMENT("} string_indexof DO1"); ++ } ++ ++ bind(NOMATCH); ++ mv(result, -1); ++ j(DONE); ++ ++ bind(MATCH); ++ srai(t0, hlen_neg, haystack_chr_shift); ++ add(result, result_tmp, t0); ++ ++ bind(DONE); ++} ++ ++// Compare strings. ++void MacroAssembler::string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, ++ Register tmp3, int ae) ++{ ++ Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, ++ DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, ++ SHORT_LOOP_START, TAIL_CHECK, L; ++ ++ const int STUB_THRESHOLD = 64 + 8; ++ bool isLL = ae == StrIntrinsicNode::LL; ++ bool isLU = ae == StrIntrinsicNode::LU; ++ bool isUL = ae == StrIntrinsicNode::UL; ++ ++ bool str1_isL = isLL || isLU; ++ bool str2_isL = isLL || isUL; ++ ++ // for L strings, 1 byte for 1 character ++ // for U strings, 2 bytes for 1 character ++ int str1_chr_size = str1_isL ? 1 : 2; ++ int str2_chr_size = str2_isL ? 1 : 2; ++ int minCharsInWord = isLL ? wordSize : wordSize / 2; ++ ++ load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; ++ ++ BLOCK_COMMENT("string_compare {"); ++ ++ // Bizzarely, the counts are passed in bytes, regardless of whether they ++ // are L or U strings, however the result is always in characters. ++ if (!str1_isL) { ++ sraiw(cnt1, cnt1, 1); ++ } ++ if (!str2_isL) { ++ sraiw(cnt2, cnt2, 1); ++ } ++ ++ // Compute the minimum of the string lengths and save the difference in result. ++ sub(result, cnt1, cnt2); ++ bgt(cnt1, cnt2, L); ++ mv(cnt2, cnt1); ++ bind(L); ++ ++ // A very short string ++ li(t0, minCharsInWord); ++ ble(cnt2, t0, SHORT_STRING); ++ ++ // Compare longwords ++ // load first parts of strings and finish initialization while loading ++ { ++ if (str1_isL == str2_isL) { // LL or UU ++ // load 8 bytes once to compare ++ ld(tmp1, Address(str1)); ++ beq(str1, str2, DONE); ++ ld(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ sub(cnt2, cnt2, minCharsInWord); ++ beqz(cnt2, TAIL_CHECK); ++ // convert cnt2 from characters to bytes ++ if (!str1_isL) { ++ slli(cnt2, cnt2, 1); ++ } ++ add(str2, str2, cnt2); ++ add(str1, str1, cnt2); ++ sub(cnt2, zr, cnt2); ++ } else if (isLU) { // LU case ++ lwu(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ addi(cnt2, cnt2, -4); ++ add(str1, str1, cnt2); ++ sub(cnt1, zr, cnt2); ++ slli(cnt2, cnt2, 1); ++ add(str2, str2, cnt2); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ sub(cnt2, zr, cnt2); ++ addi(cnt1, cnt1, 4); ++ } else { // UL case ++ ld(tmp1, Address(str1)); ++ lwu(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ addi(cnt2, cnt2, -4); ++ slli(t0, cnt2, 1); ++ sub(cnt1, zr, t0); ++ add(str1, str1, t0); ++ add(str2, str2, cnt2); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ sub(cnt2, zr, cnt2); ++ addi(cnt1, cnt1, 8); ++ } ++ addi(cnt2, cnt2, isUL ? 4 : 8); ++ bgez(cnt2, TAIL); ++ xorr(tmp3, tmp1, tmp2); ++ bnez(tmp3, DIFFERENCE); ++ ++ // main loop ++ bind(NEXT_WORD); ++ if (str1_isL == str2_isL) { // LL or UU ++ add(t0, str1, cnt2); ++ ld(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ addi(cnt2, cnt2, 8); ++ } else if (isLU) { // LU case ++ add(t0, str1, cnt1); ++ lwu(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ addi(cnt1, cnt1, 4); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ addi(cnt2, cnt2, 8); ++ } else { // UL case ++ add(t0, str2, cnt2); ++ lwu(tmp2, Address(t0)); ++ add(t0, str1, cnt1); ++ ld(tmp1, Address(t0)); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ addi(cnt1, cnt1, 8); ++ addi(cnt2, cnt2, 4); ++ } ++ bgez(cnt2, TAIL); ++ ++ xorr(tmp3, tmp1, tmp2); ++ beqz(tmp3, NEXT_WORD); ++ j(DIFFERENCE); ++ bind(TAIL); ++ xorr(tmp3, tmp1, tmp2); ++ bnez(tmp3, DIFFERENCE); ++ // Last longword. In the case where length == 4 we compare the ++ // same longword twice, but that's still faster than another ++ // conditional branch. ++ if (str1_isL == str2_isL) { // LL or UU ++ ld(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ } else if (isLU) { // LU case ++ lwu(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ } else { // UL case ++ lwu(tmp2, Address(str2)); ++ ld(tmp1, Address(str1)); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ } ++ bind(TAIL_CHECK); ++ xorr(tmp3, tmp1, tmp2); ++ beqz(tmp3, DONE); ++ ++ // Find the first different characters in the longwords and ++ // compute their difference. ++ bind(DIFFERENCE); ++ ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb ++ srl(tmp1, tmp1, result); ++ srl(tmp2, tmp2, result); ++ if (isLL) { ++ andi(tmp1, tmp1, 0xFF); ++ andi(tmp2, tmp2, 0xFF); ++ } else { ++ andi(tmp1, tmp1, 0xFFFF); ++ andi(tmp2, tmp2, 0xFFFF); ++ } ++ sub(result, tmp1, tmp2); ++ j(DONE); ++ } ++ ++ bind(STUB); ++ RuntimeAddress stub = NULL; ++ switch (ae) { ++ case StrIntrinsicNode::LL: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); ++ break; ++ case StrIntrinsicNode::UU: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); ++ break; ++ case StrIntrinsicNode::LU: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); ++ break; ++ case StrIntrinsicNode::UL: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ assert(stub.target() != NULL, "compare_long_string stub has not been generated"); ++ trampoline_call(stub); ++ j(DONE); ++ ++ bind(SHORT_STRING); ++ // Is the minimum length zero? ++ beqz(cnt2, DONE); ++ // arrange code to do most branches while loading and loading next characters ++ // while comparing previous ++ (this->*str1_load_chr)(tmp1, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST_INIT); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ j(SHORT_LOOP_START); ++ bind(SHORT_LOOP); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST); ++ bind(SHORT_LOOP_START); ++ (this->*str1_load_chr)(tmp2, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ (this->*str2_load_chr)(t0, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ bne(tmp1, cnt1, SHORT_LOOP_TAIL); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST2); ++ (this->*str1_load_chr)(tmp1, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ beq(tmp2, t0, SHORT_LOOP); ++ sub(result, tmp2, t0); ++ j(DONE); ++ bind(SHORT_LOOP_TAIL); ++ sub(result, tmp1, cnt1); ++ j(DONE); ++ bind(SHORT_LAST2); ++ beq(tmp2, t0, DONE); ++ sub(result, tmp2, t0); ++ ++ j(DONE); ++ bind(SHORT_LAST_INIT); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ bind(SHORT_LAST); ++ beq(tmp1, cnt1, DONE); ++ sub(result, tmp1, cnt1); ++ ++ bind(DONE); ++ ++ BLOCK_COMMENT("} string_compare"); ++} ++ ++void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, ++ Register tmp4, Register tmp5, Register tmp6, Register result, ++ Register cnt1, int elem_size) { ++ Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ Register cnt2 = tmp2; // cnt2 only used in array length compare ++ Register elem_per_word = tmp6; ++ int log_elem_size = exact_log2(elem_size); ++ int length_offset = arrayOopDesc::length_offset_in_bytes(); ++ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); ++ ++ assert(elem_size == 1 || elem_size == 2, "must be char or byte"); ++ assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); ++ li(elem_per_word, wordSize / elem_size); ++ ++ BLOCK_COMMENT("arrays_equals {"); ++ ++ // if (a1 == a2), return true ++ beq(a1, a2, SAME); ++ ++ mv(result, false); ++ beqz(a1, DONE); ++ beqz(a2, DONE); ++ lwu(cnt1, Address(a1, length_offset)); ++ lwu(cnt2, Address(a2, length_offset)); ++ bne(cnt2, cnt1, DONE); ++ beqz(cnt1, SAME); ++ ++ slli(tmp5, cnt1, 3 + log_elem_size); ++ sub(tmp5, zr, tmp5); ++ add(a1, a1, base_offset); ++ add(a2, a2, base_offset); ++ ld(tmp3, Address(a1, 0)); ++ ld(tmp4, Address(a2, 0)); ++ ble(cnt1, elem_per_word, SHORT); // short or same ++ ++ // Main 16 byte comparison loop with 2 exits ++ bind(NEXT_DWORD); { ++ ld(tmp1, Address(a1, wordSize)); ++ ld(tmp2, Address(a2, wordSize)); ++ sub(cnt1, cnt1, 2 * wordSize / elem_size); ++ blez(cnt1, TAIL); ++ bne(tmp3, tmp4, DONE); ++ ld(tmp3, Address(a1, 2 * wordSize)); ++ ld(tmp4, Address(a2, 2 * wordSize)); ++ add(a1, a1, 2 * wordSize); ++ add(a2, a2, 2 * wordSize); ++ ble(cnt1, elem_per_word, TAIL2); ++ } beq(tmp1, tmp2, NEXT_DWORD); ++ j(DONE); ++ ++ bind(TAIL); ++ xorr(tmp4, tmp3, tmp4); ++ xorr(tmp2, tmp1, tmp2); ++ sll(tmp2, tmp2, tmp5); ++ orr(tmp5, tmp4, tmp2); ++ j(IS_TMP5_ZR); ++ ++ bind(TAIL2); ++ bne(tmp1, tmp2, DONE); ++ ++ bind(SHORT); ++ xorr(tmp4, tmp3, tmp4); ++ sll(tmp5, tmp4, tmp5); ++ ++ bind(IS_TMP5_ZR); ++ bnez(tmp5, DONE); ++ ++ bind(SAME); ++ mv(result, true); ++ // That's it. ++ bind(DONE); ++ ++ BLOCK_COMMENT("} array_equals"); ++} ++ ++// Compare Strings ++ ++// For Strings we're passed the address of the first characters in a1 ++// and a2 and the length in cnt1. ++// elem_size is the element size in bytes: either 1 or 2. ++// There are two implementations. For arrays >= 8 bytes, all ++// comparisons (including the final one, which may overlap) are ++// performed 8 bytes at a time. For strings < 8 bytes, we compare a ++// halfword, then a short, and then a byte. ++ ++void MacroAssembler::string_equals(Register a1, Register a2, ++ Register result, Register cnt1, int elem_size) ++{ ++ Label SAME, DONE, SHORT, NEXT_WORD; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ ++ assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); ++ assert_different_registers(a1, a2, result, cnt1, t0, t1); ++ ++ BLOCK_COMMENT("string_equals {"); ++ ++ mv(result, false); ++ ++ // Check for short strings, i.e. smaller than wordSize. ++ sub(cnt1, cnt1, wordSize); ++ bltz(cnt1, SHORT); ++ ++ // Main 8 byte comparison loop. ++ bind(NEXT_WORD); { ++ ld(tmp1, Address(a1, 0)); ++ add(a1, a1, wordSize); ++ ld(tmp2, Address(a2, 0)); ++ add(a2, a2, wordSize); ++ sub(cnt1, cnt1, wordSize); ++ bne(tmp1, tmp2, DONE); ++ } bgtz(cnt1, NEXT_WORD); ++ ++ // Last longword. In the case where length == 4 we compare the ++ // same longword twice, but that's still faster than another ++ // conditional branch. ++ // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when ++ // length == 4. ++ add(tmp1, a1, cnt1); ++ ld(tmp1, Address(tmp1, 0)); ++ add(tmp2, a2, cnt1); ++ ld(tmp2, Address(tmp2, 0)); ++ bne(tmp1, tmp2, DONE); ++ j(SAME); ++ ++ bind(SHORT); ++ Label TAIL03, TAIL01; ++ ++ // 0-7 bytes left. ++ andi(t0, cnt1, 4); ++ beqz(t0, TAIL03); ++ { ++ lwu(tmp1, Address(a1, 0)); ++ add(a1, a1, 4); ++ lwu(tmp2, Address(a2, 0)); ++ add(a2, a2, 4); ++ bne(tmp1, tmp2, DONE); ++ } ++ ++ bind(TAIL03); ++ // 0-3 bytes left. ++ andi(t0, cnt1, 2); ++ beqz(t0, TAIL01); ++ { ++ lhu(tmp1, Address(a1, 0)); ++ add(a1, a1, 2); ++ lhu(tmp2, Address(a2, 0)); ++ add(a2, a2, 2); ++ bne(tmp1, tmp2, DONE); ++ } ++ ++ bind(TAIL01); ++ if (elem_size == 1) { // Only needed when comparing 1-byte elements ++ // 0-1 bytes left. ++ andi(t0, cnt1, 1); ++ beqz(t0, SAME); ++ { ++ lbu(tmp1, a1, 0); ++ lbu(tmp2, a2, 0); ++ bne(tmp1, tmp2, DONE); ++ } ++ } ++ ++ // Arrays are equal. ++ bind(SAME); ++ mv(result, true); ++ ++ // That's it. ++ bind(DONE); ++ BLOCK_COMMENT("} string_equals"); ++} ++ ++typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); ++typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, ++ bool is_far, bool is_unordered); ++ ++static conditional_branch_insn conditional_branches[] = ++{ ++ /* SHORT branches */ ++ (conditional_branch_insn)&Assembler::beq, ++ (conditional_branch_insn)&Assembler::bgt, ++ NULL, // BoolTest::overflow ++ (conditional_branch_insn)&Assembler::blt, ++ (conditional_branch_insn)&Assembler::bne, ++ (conditional_branch_insn)&Assembler::ble, ++ NULL, // BoolTest::no_overflow ++ (conditional_branch_insn)&Assembler::bge, ++ ++ /* UNSIGNED branches */ ++ (conditional_branch_insn)&Assembler::beq, ++ (conditional_branch_insn)&Assembler::bgtu, ++ NULL, ++ (conditional_branch_insn)&Assembler::bltu, ++ (conditional_branch_insn)&Assembler::bne, ++ (conditional_branch_insn)&Assembler::bleu, ++ NULL, ++ (conditional_branch_insn)&Assembler::bgeu ++}; ++ ++static float_conditional_branch_insn float_conditional_branches[] = ++{ ++ /* FLOAT SHORT branches */ ++ (float_conditional_branch_insn)&MacroAssembler::float_beq, ++ (float_conditional_branch_insn)&MacroAssembler::float_bgt, ++ NULL, // BoolTest::overflow ++ (float_conditional_branch_insn)&MacroAssembler::float_blt, ++ (float_conditional_branch_insn)&MacroAssembler::float_bne, ++ (float_conditional_branch_insn)&MacroAssembler::float_ble, ++ NULL, // BoolTest::no_overflow ++ (float_conditional_branch_insn)&MacroAssembler::float_bge, ++ ++ /* DOUBLE SHORT branches */ ++ (float_conditional_branch_insn)&MacroAssembler::double_beq, ++ (float_conditional_branch_insn)&MacroAssembler::double_bgt, ++ NULL, ++ (float_conditional_branch_insn)&MacroAssembler::double_blt, ++ (float_conditional_branch_insn)&MacroAssembler::double_bne, ++ (float_conditional_branch_insn)&MacroAssembler::double_ble, ++ NULL, ++ (float_conditional_branch_insn)&MacroAssembler::double_bge ++}; ++ ++void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { ++ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), ++ "invalid conditional branch index"); ++ (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); ++} ++ ++// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use ++// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). ++void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { ++ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), ++ "invalid float conditional branch index"); ++ int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask); ++ (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, ++ (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); ++} ++ ++void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { ++ switch (cmpFlag) { ++ case BoolTest::eq: ++ case BoolTest::le: ++ beqz(op1, L, is_far); ++ break; ++ case BoolTest::ne: ++ case BoolTest::gt: ++ bnez(op1, L, is_far); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { ++ switch (cmpFlag) { ++ case BoolTest::eq: ++ beqz(op1, L, is_far); ++ break; ++ case BoolTest::ne: ++ bnez(op1, L, is_far); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { ++ Label L; ++ cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); ++ mv(dst, src); ++ bind(L); ++} ++ ++// Set dst to NaN if any NaN input. ++void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, ++ bool is_double, bool is_min) { ++ assert_different_registers(dst, src1, src2); ++ ++ Label Done; ++ fsflags(zr); ++ if (is_double) { ++ is_min ? fmin_d(dst, src1, src2) ++ : fmax_d(dst, src1, src2); ++ // Checking NaNs ++ flt_d(zr, src1, src2); ++ } else { ++ is_min ? fmin_s(dst, src1, src2) ++ : fmax_s(dst, src1, src2); ++ // Checking NaNs ++ flt_s(zr, src1, src2); ++ } ++ ++ frflags(t0); ++ beqz(t0, Done); ++ ++ // In case of NaNs ++ is_double ? fadd_d(dst, src1, src2) ++ : fadd_s(dst, src1, src2); ++ ++ bind(Done); ++} ++ ++#endif // COMPILER2 ++ +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index c6b71bdbc3c..2ef28771e2e 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -851,6 +851,109 @@ class MacroAssembler: public Assembler { + void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); + void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); + ++public: ++ void string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ Register tmp1, Register tmp2, Register tmp3, ++ int ae); ++ ++ void string_indexof_char_short(Register str1, Register cnt1, ++ Register ch, Register result, ++ bool isL); ++ ++ void string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ bool isL); ++ ++ void string_indexof(Register str1, Register str2, ++ Register cnt1, Register cnt2, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, int ae); ++ ++ void string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ int needle_con_cnt, Register result, int ae); ++ ++ void arrays_equals(Register r1, Register r2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, Register cnt1, ++ int elem_size); ++ ++ void string_equals(Register r1, Register r2, ++ Register result, Register cnt1, ++ int elem_size); ++ ++ // refer to conditional_branches and float_conditional_branches ++ static const int bool_test_bits = 3; ++ static const int neg_cond_bits = 2; ++ static const int unsigned_branch_mask = 1 << bool_test_bits; ++ static const int double_branch_mask = 1 << bool_test_bits; + -+ // Return address: -+ public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } -+ public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } ++ // cmp ++ void cmp_branch(int cmpFlag, ++ Register op1, Register op2, ++ Label& label, bool is_far = false); + -+ // return address of param, zero origin index. -+ public Address getNativeParamAddr(int idx) { -+ return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx); ++ void float_cmp_branch(int cmpFlag, ++ FloatRegister op1, FloatRegister op2, ++ Label& label, bool is_far = false); ++ ++ void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op, ++ Label& L, bool is_far = false); ++ ++ void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op, ++ Label& L, bool is_far = false); ++ ++ void enc_cmove(int cmpFlag, ++ Register op1, Register op2, ++ Register dst, Register src); ++ ++ void spill(Register r, bool is64, int offset) { ++ is64 ? sd(r, Address(sp, offset)) ++ : sw(r, Address(sp, offset)); + } + -+ public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } ++ void spill(FloatRegister f, bool is64, int offset) { ++ is64 ? fsd(f, Address(sp, offset)) ++ : fsw(f, Address(sp, offset)); ++ } + -+ public Address addressOfInterpreterFrameLocals() { -+ return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); ++ void spill(VectorRegister v, int offset) { ++ add(t0, sp, offset); ++ vs1r_v(v, t0); ++ } ++ ++ void unspill(Register r, bool is64, int offset) { ++ is64 ? ld(r, Address(sp, offset)) ++ : lw(r, Address(sp, offset)); ++ } ++ ++ void unspillu(Register r, bool is64, int offset) { ++ is64 ? ld(r, Address(sp, offset)) ++ : lwu(r, Address(sp, offset)); + } + -+ private Address addressOfInterpreterFrameBCX() { -+ return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); -+ } ++ void unspill(FloatRegister f, bool is64, int offset) { ++ is64 ? fld(f, Address(sp, offset)) ++ : flw(f, Address(sp, offset)); ++ } ++ ++ void unspill(VectorRegister v, int offset) { ++ add(t0, sp, offset); ++ vl1r_v(v, t0); ++ } ++ ++ void minmax_FD(FloatRegister dst, ++ FloatRegister src1, FloatRegister src2, ++ bool is_double, bool is_min); ++ + }; + + #ifdef ASSERT +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 13546ab328b..2e7eed8fb52 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -997,7 +997,7 @@ void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + #endif + + void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + __ ebreak(); + } +@@ -1015,7 +1015,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { + #endif + + void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes. + for (int i = 0; i < _count; i++) { + __ nop(); +@@ -1074,7 +1074,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + assert_cond(ra_ != NULL); + Compile* C = ra_->C; +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + + // n.b. frame size includes space for return pc and fp + const int framesize = C->output()->frame_size_in_bytes(); +@@ -1150,7 +1150,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + assert_cond(ra_ != NULL); + Compile* C = ra_->C; +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + assert_cond(C != NULL); + int framesize = C->output()->frame_size_in_bytes(); + +@@ -1251,7 +1251,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo + int dst_offset = ra_->reg2offset(dst_lo); + + if (cbuf != NULL) { +- C2_MacroAssembler _masm(cbuf); ++ MacroAssembler _masm(cbuf); + Assembler::CompressibleRegion cr(&_masm); + switch (src_lo_rc) { + case rc_int: +@@ -1371,7 +1371,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + #endif + + void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + + assert_cond(ra_ != NULL); + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); +@@ -1422,7 +1422,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const + void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const + { + // This is the unverified entry point. +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + + Label skip; + __ cmp_klass(j_rarg0, t1, t0, skip); +@@ -1449,7 +1449,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) + // j #exception_blob_entry_point + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_exception_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); +@@ -1467,7 +1467,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) + { + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_deopt_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); +@@ -1848,7 +1848,7 @@ encode %{ + // BEGIN Non-volatile memory access + + enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + int64_t con = (int64_t)$src$$constant; + Register dst_reg = as_Register($dst$$reg); +@@ -1856,7 +1856,7 @@ encode %{ + %} + + enc_class riscv_enc_mov_p(iRegP dst, immP src) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + address con = (address)$src$$constant; + if (con == NULL || con == (address)1) { +@@ -1875,7 +1875,7 @@ encode %{ + %} + + enc_class riscv_enc_mov_p1(iRegP dst) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + Register dst_reg = as_Register($dst$$reg); + __ li(dst_reg, 1); +@@ -1893,12 +1893,12 @@ encode %{ + %} + + enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ load_byte_map_base($dst$$Register); + %} + + enc_class riscv_enc_mov_n(iRegN dst, immN src) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + address con = (address)$src$$constant; + if (con == NULL) { +@@ -1911,13 +1911,13 @@ encode %{ + %} + + enc_class riscv_enc_mov_zero(iRegNorP dst) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + __ mv(dst_reg, zr); + %} + + enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + address con = (address)$src$$constant; + if (con == NULL) { +@@ -1930,42 +1930,42 @@ encode %{ + %} + + enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); +@@ -1974,13 +1974,13 @@ encode %{ + // compare and branch instruction encodings + + enc_class riscv_enc_j(label lbl) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Label* L = $lbl$$label; + __ j(*L); + %} + + enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Label* L = $lbl$$label; + switch ($cmp$$cmpcode) { + case(BoolTest::ge): +@@ -2004,7 +2004,7 @@ encode %{ + + Label miss; + Label done; +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg, + NULL, &miss); + if ($primary) { +@@ -2023,7 +2023,7 @@ encode %{ + %} + + enc_class riscv_enc_java_static_call(method meth) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + + address addr = (address)$meth$$method; + address call = NULL; +@@ -2055,7 +2055,7 @@ encode %{ + %} + + enc_class riscv_enc_java_dynamic_call(method meth) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + int method_index = resolved_method_index(cbuf); + address call = __ ic_call((address)$meth$$method, method_index); + if (call == NULL) { +@@ -2065,7 +2065,7 @@ encode %{ + %} + + enc_class riscv_enc_call_epilog() %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + if (VerifyStackAtCalls) { + // Check that stack depth is unchanged: find majik cookie on stack + __ call_Unimplemented(); +@@ -2073,7 +2073,7 @@ encode %{ + %} + + enc_class riscv_enc_java_to_runtime(method meth) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + + // some calls to generated routines (arraycopy code) are scheduled + // by C2 as runtime calls. if so we can call them using a jr (they +@@ -2102,7 +2102,7 @@ encode %{ + + // using the cr register as the bool result: 0 for success; others failed. + enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register flag = t1; + Register oop = as_Register($object$$reg); + Register box = as_Register($box$$reg); +@@ -2189,7 +2189,7 @@ encode %{ + + // using cr flag to indicate the fast_unlock result: 0 for success; others failed. + enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register flag = t1; + Register oop = as_Register($object$$reg); + Register box = as_Register($box$$reg); +@@ -2262,7 +2262,7 @@ encode %{ + // arithmetic encodings + + enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); +@@ -2270,7 +2270,7 @@ encode %{ + %} + + enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); +@@ -2278,7 +2278,7 @@ encode %{ + %} + + enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); +@@ -2286,7 +2286,7 @@ encode %{ + %} + + enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); +@@ -2294,14 +2294,14 @@ encode %{ + %} + + enc_class riscv_enc_tail_call(iRegP jump_target) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + Register target_reg = as_Register($jump_target$$reg); + __ jr(target_reg); + %} + + enc_class riscv_enc_tail_jmp(iRegP jump_target) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + Register target_reg = as_Register($jump_target$$reg); + // exception oop should be in x10 +@@ -2312,12 +2312,12 @@ encode %{ + %} + + enc_class riscv_enc_rethrow() %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); + %} + + enc_class riscv_enc_ret() %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + __ ret(); + %} +@@ -8506,7 +8506,7 @@ instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8526,7 +8526,7 @@ instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8585,7 +8585,7 @@ instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8604,7 +8604,7 @@ instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8625,7 +8625,7 @@ instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8645,7 +8645,7 @@ instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8666,7 +8666,7 @@ instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8686,7 +8686,7 @@ instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8741,7 +8741,7 @@ instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%} + + ins_encode %{ +- __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + +@@ -8759,7 +8759,7 @@ instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%} + + ins_encode %{ +- __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + +@@ -9080,7 +9080,7 @@ instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9095,7 +9095,7 @@ instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9138,7 +9138,7 @@ instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9153,7 +9153,7 @@ instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9171,7 +9171,7 @@ instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9189,7 +9189,7 @@ instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9207,7 +9207,7 @@ instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9225,7 +9225,7 @@ instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9276,7 +9276,7 @@ instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%} + + ins_encode %{ +- __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9292,7 +9292,7 @@ instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%} + + ins_encode %{ +- __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9616,7 +9616,7 @@ instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) + %} + + ins_encode %{ +- __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} +@@ -9673,7 +9673,7 @@ instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) + %} + + ins_encode %{ +- __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} +@@ -9691,7 +9691,7 @@ instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) + %} + + ins_encode %{ +- __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} + +From 115cd21290080b157d0ca8b7080e66ebd814fbdb Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:15:18 +0800 +Subject: [PATCH 091/140] Revert JDK-8222297: IRT_ENTRY/IRT_LEAF etc are the + same as JRT && JDK-8263709: Cleanup THREAD/TRAPS/CHECK usage in JRT_ENTRY + routines + +--- + src/hotspot/cpu/riscv/interpreterRT_riscv.cpp | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp +index d93530d8564..776b0787238 100644 +--- a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp +@@ -278,12 +278,12 @@ class SlowSignatureHandler + }; + + +-JRT_ENTRY(address, +- InterpreterRuntime::slow_signature_handler(JavaThread* current, ++IRT_ENTRY(address, ++ InterpreterRuntime::slow_signature_handler(JavaThread* thread, + Method* method, + intptr_t* from, + intptr_t* to)) +- methodHandle m(current, (Method*)method); ++ methodHandle m(thread, (Method*)method); + assert(m->is_native(), "sanity check"); + + // handle arguments +@@ -292,4 +292,4 @@ JRT_ENTRY(address, + + // return result handler + return Interpreter::result_handler(m->result_type()); +-JRT_END ++IRT_END + +From 6cbf43d5f095aef93ef0bf595f51019a03cc1989 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:20:06 +0800 +Subject: [PATCH 092/140] Revert JDK-8245289: Clean up offset code in + JavaClasses + +--- + src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 18 +++++++++--------- + .../templateInterpreterGenerator_riscv.cpp | 2 +- + 2 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +index 4442b5991b1..e070b8096a6 100644 +--- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +@@ -53,7 +53,7 @@ void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_ + verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class), + "MH argument is a Class"); + } +- __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset())); ++ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); + } + + #ifdef ASSERT +@@ -140,13 +140,13 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, + + // Load the invoker, as MH -> MH.form -> LF.vmentry + __ verify_oop(recv); +- __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2); ++ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2); + __ verify_oop(method_temp); +- __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2); + __ verify_oop(method_temp); +- __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2); + __ verify_oop(method_temp); +- __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg); ++ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg); + + if (VerifyMethodHandles && !for_compiler_entry) { + // make sure recv is already on stack +@@ -284,10 +284,10 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + "MemberName required for invokeVirtual etc."); + } + +- Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset())); +- Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset())); +- Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset())); +- Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())); ++ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); ++ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); ++ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())); ++ Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())); + + Register temp1_recv_klass = temp1; + if (iid != vmIntrinsics::_linkToStatic) { +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index 8aea4eca048..ce6166030b4 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -894,7 +894,7 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { + + address entry = __ pc(); + +- const int referent_offset = java_lang_ref_Reference::referent_offset(); ++ const int referent_offset = java_lang_ref_Reference::referent_offset; + guarantee(referent_offset > 0, "referent offset not initialized"); + + Label slow_path; + +From 8c9b9f4246f4ede3c31f59749f9d4bc625f106b3 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:30:35 +0800 +Subject: [PATCH 093/140] Revert JDK-8242629: Remove references to deprecated + java.util.Observer and Observable + +--- + .../runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java | 2 -- + .../classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java | 2 -- + .../sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java | 2 -- + 3 files changed, 6 deletions(-) + +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java +index f2e224f28ee..5c2b6e0e3ea 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java +@@ -34,8 +34,6 @@ + import sun.jvm.hotspot.runtime.riscv64.*; + import sun.jvm.hotspot.types.*; + import sun.jvm.hotspot.utilities.*; +-import sun.jvm.hotspot.utilities.Observable; +-import sun.jvm.hotspot.utilities.Observer; + + public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess { + private static AddressField lastJavaFPField; +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java +index df280005d72..e372bc5f7be 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java +@@ -34,8 +34,6 @@ + import sun.jvm.hotspot.runtime.*; + import sun.jvm.hotspot.types.*; + import sun.jvm.hotspot.utilities.*; +-import sun.jvm.hotspot.utilities.Observable; +-import sun.jvm.hotspot.utilities.Observer; + + /** Specialization of and implementation of abstract methods of the + Frame class for the riscv64 family of CPUs. */ +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java +index d0ad2b559a6..850758a7ed4 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java +@@ -31,8 +31,6 @@ + import sun.jvm.hotspot.types.*; + import sun.jvm.hotspot.runtime.*; + import sun.jvm.hotspot.utilities.*; +-import sun.jvm.hotspot.utilities.Observable; +-import sun.jvm.hotspot.utilities.Observer; + + public class RISCV64JavaCallWrapper extends JavaCallWrapper { + private static AddressField lastJavaFPField; + +From 43f2a4fec6b4922fa8c187deda310ad636aeed2e Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:33:56 +0800 +Subject: [PATCH 094/140] Revert JDK-8256155: Allow multiple large page sizes + to be used on Linux + +--- + src/hotspot/os/linux/os_linux.cpp | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp +index 6f75e623a9a..7fc9588301b 100644 +--- a/src/hotspot/os/linux/os_linux.cpp ++++ b/src/hotspot/os/linux/os_linux.cpp +@@ -4078,7 +4078,8 @@ size_t os::Linux::find_large_page_size() { + IA64_ONLY(256 * M) + PPC_ONLY(4 * M) + S390_ONLY(1 * M) +- SPARC_ONLY(4 * M); ++ SPARC_ONLY(4 * M) ++ RISCV64_ONLY(2 * M); + #endif // ZERO + + FILE *fp = fopen("/proc/meminfo", "r"); + +From a93191be0155882a0f4d92bba4de9fdf4f508a4a Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:38:53 +0800 +Subject: [PATCH 095/140] Revert JDK-8252204: AArch64: Implement SHA3 + accelerator/intrinsic + +--- + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +index d4b79162d84..50ee7edb708 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +@@ -82,11 +82,6 @@ void VM_Version::initialize() { + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + +- if (UseSHA3Intrinsics) { +- warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); +- FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); +- } +- + if (UseCRC32Intrinsics) { + warning("CRC32 intrinsics are not available on this CPU."); + FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); + +From 29acd4f1bb99e856418f7d9d3da4f205812b1663 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:43:23 +0800 +Subject: [PATCH 096/140] Revert JDK-8253717: Relocate stack overflow code out + of thread.hpp/cpp && JDK-8255766: Fix linux+arm64 build after 8254072 + +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 4 ++-- + src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp | 2 +- + 3 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index d175a62aeeb..d94074b4a3c 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1666,7 +1666,7 @@ void MacroAssembler::bang_stack_size(Register size, Register tmp) { + // was post-decremented.) Skip this address by starting at i=1, and + // touch a few more pages below. N.B. It is important to touch all + // the way down to and including i=StackShadowPages. +- for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { ++ for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { + // this could be any sized move but this is can be a debugging crumb + // so the bigger the better. + sub(tmp, tmp, os::vm_page_size()); +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index ae414224c5b..dc3ac548d73 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1252,7 +1252,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + __ nop(); + + // Generate stack overflow check +- __ bang_stack_with_offset(checked_cast(StackOverflow::stack_shadow_zone_size())); ++ __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size()); + + // Generate a new frame for the wrapper. + __ enter(); +@@ -1551,7 +1551,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + Label reguard; + Label reguard_done; + __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset())); +- __ mv(t1, StackOverflow::stack_guard_yellow_reserved_disabled); ++ __ mv(t1, JavaThread::stack_guard_yellow_reserved_disabled); + __ beq(t0, t1, reguard); + __ bind(reguard_done); + +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index ce6166030b4..e639fa7e12f 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -1248,7 +1248,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + { + Label no_reguard; + __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset()))); +- __ addi(t1, zr, (u1)StackOverflow::stack_guard_yellow_reserved_disabled); ++ __ addi(t1, zr, (u1)JavaThread::stack_guard_yellow_reserved_disabled); + __ bne(t0, t1, no_reguard); + + __ pusha(); // only save smashed registers + +From 6fa17c662dd2488108809e77dcff921bb475813c Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:50:51 +0800 +Subject: [PATCH 097/140] Revert JDK-8258459: Decouple gc_globals.hpp from + globals.hpp + +--- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index 1f4409a9c9a..84b1afc7dc6 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -28,7 +28,6 @@ + #include "asm/macroAssembler.inline.hpp" + #include "gc/shared/barrierSetAssembler.hpp" + #include "gc/shared/collectedHeap.hpp" +-#include "gc/shared/tlab_globals.hpp" + #include "interpreter/interp_masm.hpp" + #include "interpreter/interpreter.hpp" + #include "interpreter/interpreterRuntime.hpp" + +From bcc26e749ccc20db5a4ba51c2cf8740a908a8a74 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:56:58 +0800 +Subject: [PATCH 098/140] Revert JDK-8223136: Move compressed oops functions to + CompressedOops class + +--- + .../cpu/riscv/macroAssembler_riscv.cpp | 64 +++++++++---------- + .../cpu/riscv/macroAssembler_riscv.hpp | 1 - + src/hotspot/cpu/riscv/riscv.ad | 10 +-- + 3 files changed, 37 insertions(+), 38 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index d94074b4a3c..becc1656358 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1318,10 +1318,10 @@ int MacroAssembler::patch_oop(address insn_addr, address o) { + void MacroAssembler::reinit_heapbase() { + if (UseCompressedOops) { + if (Universe::is_fully_initialized()) { +- mv(xheapbase, CompressedOops::ptrs_base()); ++ mv(xheapbase, Universe::narrow_ptrs_base()); + } else { + int32_t offset = 0; +- la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset); ++ la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset); + ld(xheapbase, Address(xheapbase, offset)); + } + } +@@ -1596,8 +1596,8 @@ void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, R + void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) { + if (UseCompressedClassPointers) { + lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); +- if (CompressedKlassPointers::base() == NULL) { +- slli(tmp, tmp, CompressedKlassPointers::shift()); ++ if (Universe::narrow_klass_base() == NULL) { ++ slli(tmp, tmp, Universe::narrow_klass_shift()); + beq(trial_klass, tmp, L); + return; + } +@@ -1745,9 +1745,9 @@ void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, + // Algorithm must match CompressedOops::encode. + void MacroAssembler::encode_heap_oop(Register d, Register s) { + verify_oop(s, "broken oop in encode_heap_oop"); +- if (CompressedOops::base() == NULL) { +- if (CompressedOops::shift() != 0) { +- assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + srli(d, s, LogMinObjAlignmentInBytes); + } else { + mv(d, s); +@@ -1758,9 +1758,9 @@ void MacroAssembler::encode_heap_oop(Register d, Register s) { + bgez(d, notNull); + mv(d, zr); + bind(notNull); +- if (CompressedOops::shift() != 0) { +- assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); +- srli(d, d, CompressedOops::shift()); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ srli(d, d, Universe::narrow_oop_shift()); + } + } + } +@@ -1799,9 +1799,9 @@ void MacroAssembler::decode_klass_not_null(Register r) { + void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + +- if (CompressedKlassPointers::base() == NULL) { +- if (CompressedKlassPointers::shift() != 0) { +- assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ if (Universe::narrow_klass_base() == NULL) { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + slli(dst, src, LogKlassAlignmentInBytes); + } else { + mv(dst, src); +@@ -1815,10 +1815,10 @@ void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register + } + + assert_different_registers(src, xbase); +- li(xbase, (uintptr_t)CompressedKlassPointers::base()); ++ li(xbase, (uintptr_t)Universe::narrow_klass_base()); + +- if (CompressedKlassPointers::shift() != 0) { +- assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + assert_different_registers(t0, xbase); + shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); + } else { +@@ -1835,9 +1835,9 @@ void MacroAssembler::encode_klass_not_null(Register r) { + void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + +- if (CompressedKlassPointers::base() == NULL) { +- if (CompressedKlassPointers::shift() != 0) { +- assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ if (Universe::narrow_klass_base() == NULL) { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + srli(dst, src, LogKlassAlignmentInBytes); + } else { + mv(dst, src); +@@ -1845,8 +1845,8 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register + return; + } + +- if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && +- CompressedKlassPointers::shift() == 0) { ++ if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 && ++ Universe::narrow_klass_shift() == 0) { + zero_extend(dst, src, 32); + return; + } +@@ -1857,10 +1857,10 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register + } + + assert_different_registers(src, xbase); +- li(xbase, (intptr_t)CompressedKlassPointers::base()); ++ li(xbase, (intptr_t)Universe::narrow_klass_base()); + sub(dst, src, xbase); +- if (CompressedKlassPointers::shift() != 0) { +- assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + srli(dst, dst, LogKlassAlignmentInBytes); + } + if (xbase == xheapbase) { +@@ -1878,22 +1878,22 @@ void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. +- if (CompressedOops::shift() != 0) { +- assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + slli(dst, src, LogMinObjAlignmentInBytes); +- if (CompressedOops::base() != NULL) { ++ if (Universe::narrow_oop_base() != NULL) { + add(dst, xheapbase, dst); + } + } else { +- assert(CompressedOops::base() == NULL, "sanity"); ++ assert(Universe::narrow_oop_base() == NULL, "sanity"); + mv(dst, src); + } + } + + void MacroAssembler::decode_heap_oop(Register d, Register s) { +- if (CompressedOops::base() == NULL) { +- if (CompressedOops::shift() != 0 || d != s) { +- slli(d, s, CompressedOops::shift()); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0 || d != s) { ++ slli(d, s, Universe::narrow_oop_shift()); + } + } else { + Label done; +@@ -3004,7 +3004,7 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { + InstructionMark im(this); + RelocationHolder rspec = metadata_Relocation::spec(index); + code_section()->relocate(inst_mark(), rspec); +- narrowKlass nk = CompressedKlassPointers::encode(k); ++ narrowKlass nk = Klass::encode_klass(k); + li32(dst, nk); + zero_extend(dst, dst, 32); + } +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 2ef28771e2e..953bca3cbd8 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -29,7 +29,6 @@ + + #include "asm/assembler.hpp" + #include "metaprogramming/enableIf.hpp" +-#include "oops/compressedOops.hpp" + + // MacroAssembler extends Assembler by frequently used macros. + // +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 2e7eed8fb52..24214964243 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1407,7 +1407,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const + st->print_cr("# MachUEPNode"); + if (UseCompressedClassPointers) { + st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); +- if (CompressedKlassPointers::shift() != 0) { ++ if (Universe::narrow_klass_shift() != 0) { + st->print_cr("\tdecode_klass_not_null t0, t0"); + } + } else { +@@ -3255,7 +3255,7 @@ operand indOffL(iRegP reg, immLOffset off) + + operand indirectN(iRegN reg) + %{ +- predicate(CompressedOops::shift() == 0); ++ predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(DecodeN reg); + op_cost(0); +@@ -3270,7 +3270,7 @@ operand indirectN(iRegN reg) + + operand indOffIN(iRegN reg, immIOffset off) + %{ +- predicate(CompressedOops::shift() == 0); ++ predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) off); + op_cost(0); +@@ -3285,7 +3285,7 @@ operand indOffIN(iRegN reg, immIOffset off) + + operand indOffLN(iRegN reg, immLOffset off) + %{ +- predicate(CompressedOops::shift() == 0); ++ predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) off); + op_cost(0); +@@ -7947,7 +7947,7 @@ instruct convP2I(iRegINoSp dst, iRegP src) %{ + // in case of 32bit oops (heap < 4Gb). + instruct convN2I(iRegINoSp dst, iRegN src) + %{ +- predicate(CompressedOops::shift() == 0); ++ predicate(Universe::narrow_oop_shift() == 0); + match(Set dst (ConvL2I (CastP2X (DecodeN src)))); + + ins_cost(ALU_COST); + +From 81d8ea9077484f1dd20033390cbd3c1844b1b966 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 17:11:20 +0800 +Subject: [PATCH 099/140] Revert JDK-8247912: Make narrowOop a scoped enum + +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index becc1656358..e2841c28c37 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1305,7 +1305,7 @@ int MacroAssembler::patch_oop(address insn_addr, address o) { + // instruction. + if (NativeInstruction::is_li32_at(insn_addr)) { + // Move narrow OOP +- uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); ++ narrowOop n = CompressedOops::encode((oop)o); + return patch_imm_in_li32(insn_addr, (int32_t)n); + } else if (NativeInstruction::is_movptr_at(insn_addr)) { + // Move wide OOP + +From f980e03cb17804ff72958dd13505058048c04da8 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 17:20:05 +0800 +Subject: [PATCH 100/140] Revert JDK-8260467: Move well-known classes from + systemDictionary.hpp to vmClasses.hpp + +--- + src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 11 +++++------ + src/hotspot/cpu/riscv/methodHandles_riscv.hpp | 4 ++-- + 2 files changed, 7 insertions(+), 8 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +index e070b8096a6..fd907f77afb 100644 +--- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +@@ -27,7 +27,6 @@ + #include "precompiled.hpp" + #include "asm/macroAssembler.hpp" + #include "classfile/javaClasses.inline.hpp" +-#include "classfile/vmClasses.hpp" + #include "interpreter/interpreter.hpp" + #include "interpreter/interpreterRuntime.hpp" + #include "memory/allocation.inline.hpp" +@@ -50,7 +49,7 @@ + void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { + assert_cond(_masm != NULL); + if (VerifyMethodHandles) { +- verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class), ++ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), + "MH argument is a Class"); + } + __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); +@@ -68,11 +67,11 @@ static int check_nonzero(const char* xname, int x) { + + #ifdef ASSERT + void MethodHandles::verify_klass(MacroAssembler* _masm, +- Register obj, vmClassID klass_id, ++ Register obj, SystemDictionary::WKID klass_id, + const char* error_message) { + assert_cond(_masm != NULL); +- InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id); +- Klass* klass = vmClasses::klass_at(klass_id); ++ InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id); ++ Klass* klass = SystemDictionary::well_known_klass(klass_id); + Register temp = t1; + Register temp2 = t0; // used by MacroAssembler::cmpptr + Label L_ok, L_bad; +@@ -280,7 +279,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + // The method is a member invoker used by direct method handles. + if (VerifyMethodHandles) { + // make sure the trailing argument really is a MemberName (caller responsibility) +- verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName), ++ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), + "MemberName required for invokeVirtual etc."); + } + +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp +index f73aba29d67..65493eba764 100644 +--- a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp +@@ -36,11 +36,11 @@ enum /* platform_dependent_constants */ { + static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); + + static void verify_klass(MacroAssembler* _masm, +- Register obj, vmClassID klass_id, ++ Register obj, SystemDictionary::WKID klass_id, + const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { +- verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle), ++ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), + "reference is a MH"); + } + + +From 2c68b064100b5abaca80926e213280ea82ff161a Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 17:32:15 +0800 +Subject: [PATCH 101/140] Revert JDK-8268858: Determine register pressure + automatically by the number of available registers for allocation + +--- + src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 2 ++ + src/hotspot/cpu/riscv/riscv.ad | 27 ---------------------- + 2 files changed, 2 insertions(+), 27 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +index 6c301cdae04..33d78fb2f6f 100644 +--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +@@ -44,8 +44,10 @@ define_pd_global(intx, CompileThreshold, 10000); + + define_pd_global(intx, OnStackReplacePercentage, 140); + define_pd_global(intx, ConditionalMoveLimit, 0); ++define_pd_global(intx, FLOATPRESSURE, 32); + define_pd_global(intx, FreqInlineSize, 325); + define_pd_global(intx, MinJumpTableSize, 10); ++define_pd_global(intx, INTPRESSURE, 24); + define_pd_global(intx, InteriorEntryAlignment, 16); + define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); + define_pd_global(intx, LoopUnrollLimit, 60); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 24214964243..c5e0ae23029 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1719,33 +1719,6 @@ bool Matcher::is_spillable_arg(int reg) + return can_be_java_arg(reg); + } + +-uint Matcher::int_pressure_limit() +-{ +- // A derived pointer is live at CallNode and then is flagged by RA +- // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip +- // derived pointers and lastly fail to spill after reaching maximum +- // number of iterations. Lowering the default pressure threshold to +- // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become +- // a high register pressure area of the code so that split_DEF can +- // generate DefinitionSpillCopy for the derived pointer. +- uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1; +- if (!PreserveFramePointer) { +- // When PreserveFramePointer is off, frame pointer is allocatable, +- // but different from other SOC registers, it is excluded from +- // fatproj's mask because its save type is No-Save. Decrease 1 to +- // ensure high pressure at fatproj when PreserveFramePointer is off. +- // See check_pressure_at_fatproj(). +- default_int_pressure_threshold--; +- } +- return (INTPRESSURE == -1) ? default_int_pressure_threshold : INTPRESSURE; +-} +- +-uint Matcher::float_pressure_limit() +-{ +- // _FLOAT_REG_mask is generated by adlc from the float_reg register class. +- return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE; +-} +- + bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { + return false; + } + +From 932ebd6238ea7703dc3164e4506af332f6847592 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 17:51:12 +0800 +Subject: [PATCH 102/140] Revert JDK-8276563: Undefined Behaviour in class + Assembler && 8257882: Implement linkToNative intrinsic on AArch64 (the + register part) + +--- + .../cpu/riscv/globalDefinitions_riscv.hpp | 2 - + src/hotspot/cpu/riscv/register_riscv.cpp | 4 - + src/hotspot/cpu/riscv/register_riscv.hpp | 123 +++++++++++++----- + 3 files changed, 91 insertions(+), 38 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +index 2936837d951..ffd420da024 100644 +--- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +@@ -47,6 +47,4 @@ const bool CCallingConventionRequiresIntsAsLongs = false; + + #define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false + +-#define USE_POINTERS_TO_REGISTER_IMPL_ARRAY +- + #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp +index 96cf1996a83..ef60cb3bb05 100644 +--- a/src/hotspot/cpu/riscv/register_riscv.cpp ++++ b/src/hotspot/cpu/riscv/register_riscv.cpp +@@ -26,10 +26,6 @@ + #include "precompiled.hpp" + #include "register_riscv.hpp" + +-REGISTER_IMPL_DEFINITION(Register, RegisterImpl, RegisterImpl::number_of_registers); +-REGISTER_IMPL_DEFINITION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers); +-REGISTER_IMPL_DEFINITION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers); +- + const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * + RegisterImpl::max_slots_per_register; + +diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp +index d697751f55f..f64a06eb89f 100644 +--- a/src/hotspot/cpu/riscv/register_riscv.hpp ++++ b/src/hotspot/cpu/riscv/register_riscv.hpp +@@ -47,13 +47,13 @@ typedef VMRegImpl* VMReg; + + // Use Register as shortcut + class RegisterImpl; +-typedef const RegisterImpl* Register; ++typedef RegisterImpl* Register; + +-inline constexpr Register as_Register(int encoding); ++inline Register as_Register(int encoding) { ++ return (Register)(intptr_t) encoding; ++} + + class RegisterImpl: public AbstractRegisterImpl { +- static constexpr Register first(); +- + public: + enum { + number_of_registers = 32, +@@ -66,16 +66,16 @@ class RegisterImpl: public AbstractRegisterImpl { + }; + + // derived registers, offsets, and addresses +- const Register successor() const { return this + 1; } ++ const Register successor() const { return as_Register(encoding() + 1); } + + // construction +- inline friend constexpr Register as_Register(int encoding); ++ inline friend Register as_Register(int encoding); + + VMReg as_VMReg() const; + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } +- int encoding_nocheck() const { return this - first(); } ++ int encoding_nocheck() const { return (intptr_t)this; } + bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } + const char* name() const; + +@@ -93,9 +93,11 @@ class RegisterImpl: public AbstractRegisterImpl { + return encoding_nocheck() >= compressed_register_base && + encoding_nocheck() <= compressed_register_top; + } +-}; + +-REGISTER_IMPL_DECLARATION(Register, RegisterImpl, RegisterImpl::number_of_registers); ++ // Return the bit which represents this register. This is intended ++ // to be ORed into a bitmask: for usage see class RegSet below. ++ uint64_t bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; } ++}; + + // The integer registers of the RISCV architecture + +@@ -136,14 +138,14 @@ CONSTANT_REGISTER_DECLARATION(Register, x31, (31)); + + // Use FloatRegister as shortcut + class FloatRegisterImpl; +-typedef const FloatRegisterImpl* FloatRegister; ++typedef FloatRegisterImpl* FloatRegister; + +-inline constexpr FloatRegister as_FloatRegister(int encoding); ++inline FloatRegister as_FloatRegister(int encoding) { ++ return (FloatRegister)(intptr_t) encoding; ++} + + // The implementation of floating point registers for the architecture + class FloatRegisterImpl: public AbstractRegisterImpl { +- static constexpr FloatRegister first(); +- + public: + enum { + number_of_registers = 32, +@@ -155,18 +157,16 @@ class FloatRegisterImpl: public AbstractRegisterImpl { + }; + + // construction +- inline friend constexpr FloatRegister as_FloatRegister(int encoding); ++ inline friend FloatRegister as_FloatRegister(int encoding); + + VMReg as_VMReg() const; + + // derived registers, offsets, and addresses +- FloatRegister successor() const { +- return as_FloatRegister((encoding() + 1) % (unsigned)number_of_registers); +- } ++ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } +- int encoding_nocheck() const { return this - first(); } ++ int encoding_nocheck() const { return (intptr_t)this; } + int is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } + const char* name() const; + +@@ -186,8 +186,6 @@ class FloatRegisterImpl: public AbstractRegisterImpl { + } + }; + +-REGISTER_IMPL_DECLARATION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers); +- + // The float registers of the RISCV architecture + + CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); +@@ -227,14 +225,14 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); + + // Use VectorRegister as shortcut + class VectorRegisterImpl; +-typedef const VectorRegisterImpl* VectorRegister; ++typedef VectorRegisterImpl* VectorRegister; + +-inline constexpr VectorRegister as_VectorRegister(int encoding); ++inline VectorRegister as_VectorRegister(int encoding) { ++ return (VectorRegister)(intptr_t) encoding; ++} + + // The implementation of vector registers for RVV + class VectorRegisterImpl: public AbstractRegisterImpl { +- static constexpr VectorRegister first(); +- + public: + enum { + number_of_registers = 32, +@@ -242,23 +240,21 @@ class VectorRegisterImpl: public AbstractRegisterImpl { + }; + + // construction +- inline friend constexpr VectorRegister as_VectorRegister(int encoding); ++ inline friend VectorRegister as_VectorRegister(int encoding); + + VMReg as_VMReg() const; + + // derived registers, offsets, and addresses +- VectorRegister successor() const { return this + 1; } ++ VectorRegister successor() const { return as_VectorRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } +- int encoding_nocheck() const { return this - first(); } ++ int encoding_nocheck() const { return (intptr_t)this; } + bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } + const char* name() const; + + }; + +-REGISTER_IMPL_DECLARATION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers); +- + // The vector registers of RVV + CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1)); + +@@ -315,8 +311,71 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl { + static const int max_fpr; + }; + +-typedef AbstractRegSet RegSet; +-typedef AbstractRegSet FloatRegSet; +-typedef AbstractRegSet VectorRegSet; ++// A set of registers ++class RegSet { ++ uint32_t _bitset; ++ ++ RegSet(uint32_t bitset) : _bitset(bitset) { } + -+ public int getInterpreterFrameBCI() { -+ // FIXME: this is not atomic with respect to GC and is unsuitable -+ // for use in a non-debugging, or reflective, system. Need to -+ // figure out how to express this. -+ Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); -+ Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); -+ Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); -+ return bcpToBci(bcp, method); -+ } ++public: + -+ public Address addressOfInterpreterFrameMDX() { -+ return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); -+ } ++ RegSet() : _bitset(0) { } + -+ // expression stack -+ // (the max_stack arguments are used by the GC; see class FrameClosure) ++ RegSet(Register r1) : _bitset(r1->bit()) { } + -+ public Address addressOfInterpreterFrameExpressionStack() { -+ Address monitorEnd = interpreterFrameMonitorEnd().address(); -+ return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); ++ RegSet operator+(const RegSet aSet) const { ++ RegSet result(_bitset | aSet._bitset); ++ return result; + } + -+ public int getInterpreterFrameExpressionStackDirection() { return -1; } -+ -+ // top of expression stack -+ public Address addressOfInterpreterFrameTOS() { -+ return getSP(); ++ RegSet operator-(const RegSet aSet) const { ++ RegSet result(_bitset & ~aSet._bitset); ++ return result; + } + -+ /** Expression stack from top down */ -+ public Address addressOfInterpreterFrameTOSAt(int slot) { -+ return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); ++ RegSet &operator+=(const RegSet aSet) { ++ *this = *this + aSet; ++ return *this; + } + -+ public Address getInterpreterFrameSenderSP() { -+ if (Assert.ASSERTS_ENABLED) { -+ Assert.that(isInterpretedFrame(), "interpreted frame expected"); -+ } -+ return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ RegSet &operator-=(const RegSet aSet) { ++ *this = *this - aSet; ++ return *this; + } + -+ // Monitors -+ public BasicObjectLock interpreterFrameMonitorBegin() { -+ return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); ++ static RegSet of(Register r1) { ++ return RegSet(r1); + } + -+ public BasicObjectLock interpreterFrameMonitorEnd() { -+ Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); -+ if (Assert.ASSERTS_ENABLED) { -+ // make sure the pointer points inside the frame -+ Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); -+ Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); -+ } -+ return new BasicObjectLock(result); ++ static RegSet of(Register r1, Register r2) { ++ return of(r1) + r2; + } + -+ public int interpreterFrameMonitorSize() { -+ return BasicObjectLock.size(); ++ static RegSet of(Register r1, Register r2, Register r3) { ++ return of(r1, r2) + r3; + } + -+ // Method -+ public Address addressOfInterpreterFrameMethod() { -+ return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); ++ static RegSet of(Register r1, Register r2, Register r3, Register r4) { ++ return of(r1, r2, r3) + r4; + } + -+ // Constant pool cache -+ public Address addressOfInterpreterFrameCPCache() { -+ return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); -+ } ++ static RegSet range(Register start, Register end) { ++ uint32_t bits = ~0; ++ bits <<= start->encoding(); ++ bits <<= 31 - end->encoding(); ++ bits >>= 31 - end->encoding(); + -+ // Entry frames -+ public JavaCallWrapper getEntryFrameCallWrapper() { -+ return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); ++ return RegSet(bits); + } + -+ protected Address addressOfSavedOopResult() { -+ // offset is 2 for compiler2 and 3 for compiler1 -+ return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * -+ VM.getVM().getAddressSize()); -+ } ++ uint32_t bits() const { return _bitset; } + -+ protected Address addressOfSavedReceiver() { -+ return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); -+ } ++private: + -+ private void dumpStack() { -+ for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); -+ AddressOps.lt(addr, getSP()); -+ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { -+ System.out.println(addr + ": " + addr.getAddressAt(0)); -+ } -+ System.out.println("-----------------------"); -+ for (Address addr = getSP(); -+ AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); -+ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { -+ System.out.println(addr + ": " + addr.getAddressAt(0)); -+ } ++ Register first() { ++ uint32_t first = _bitset & -_bitset; ++ return first ? as_Register(exact_log2(first)) : noreg; + } -+} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java ++}; + + #endif // CPU_RISCV_REGISTER_RISCV_HPP + +From 9c85aa8d3387d795f9c2f4795ffc7f9d7f814d92 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 19:24:49 +0800 +Subject: [PATCH 103/140] Revert JDK-8240363: Refactor Compile::Output() to its + own Phase + +--- + .../cpu/riscv/macroAssembler_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/riscv.ad | 20 +++++++++---------- + 2 files changed, 11 insertions(+), 11 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index e2841c28c37..656334f326b 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -3027,7 +3027,7 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) { + CompileTask* task = ciEnv::current()->task(); + in_scratch_emit_size = + (task != NULL && is_c2_compile(task->comp_level()) && +- Compile::current()->output()->in_scratch_emit_size()); ++ Compile::current()->in_scratch_emit_size()); + #endif + if (!in_scratch_emit_size) { + address stub = emit_trampoline_stub(offset(), entry.target()); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index c5e0ae23029..d736750d02d 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1029,7 +1029,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { + //============================================================================= + const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; + +-int ConstantTable::calculate_table_base_offset() const { ++int Compile::ConstantTable::calculate_table_base_offset() const { + return 0; // absolute addressing, no offset + } + +@@ -1058,9 +1058,9 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + assert_cond(st != NULL && ra_ != NULL); + Compile* C = ra_->C; + +- int framesize = C->output()->frame_slots() << LogBytesPerInt; ++ int framesize = C->frame_slots() << LogBytesPerInt; + +- if (C->output()->need_stack_bang(framesize)) { ++ if (C->need_stack_bang(framesize)) { + st->print("# stack bang size=%d\n\t", framesize); + } + +@@ -1077,7 +1077,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + + // n.b. frame size includes space for return pc and fp +- const int framesize = C->output()->frame_size_in_bytes(); ++ const int framesize = C->frame_size_in_bytes(); + + // insert a nop at the start of the prolog so we can patch in a + // branch if we need to invalidate the method later +@@ -1085,8 +1085,8 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + + assert_cond(C != NULL); + +- int bangsize = C->output()->bang_size_in_bytes(); +- if (C->output()->need_stack_bang(bangsize)) { ++ int bangsize = C->bang_size_in_bytes(); ++ if (C->need_stack_bang(bangsize)) { + __ generate_stack_overflow_check(bangsize); + } + +@@ -1096,12 +1096,12 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Unimplemented(); + } + +- C->output()->set_frame_complete(cbuf.insts_size()); ++ C->set_frame_complete(cbuf.insts_size()); + + if (C->has_mach_constant_base_node()) { + // NOTE: We set the table base offset here because users might be + // emitted before MachConstantBaseNode. +- ConstantTable& constant_table = C->output()->constant_table(); ++ Compile::ConstantTable& constant_table = C->constant_table(); + constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); + } + } +@@ -1125,7 +1125,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + assert_cond(st != NULL && ra_ != NULL); + Compile* C = ra_->C; + assert_cond(C != NULL); +- int framesize = C->output()->frame_size_in_bytes(); ++ int framesize = C->frame_size_in_bytes(); + + st->print("# pop frame %d\n\t", framesize); + +@@ -1152,7 +1152,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile* C = ra_->C; + MacroAssembler _masm(&cbuf); + assert_cond(C != NULL); +- int framesize = C->output()->frame_size_in_bytes(); ++ int framesize = C->frame_size_in_bytes(); + + __ remove_frame(framesize); + + +From 3a58114310a56ebca04ba44b4883d205096eb844 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 19:36:09 +0800 +Subject: [PATCH 104/140] Revert RotateLeft && RotateRight matching rules + +--- + src/hotspot/cpu/riscv/riscv.ad | 2 - + src/hotspot/cpu/riscv/riscv_b.ad | 76 -------------------------------- + 2 files changed, 78 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index d736750d02d..1e6495692da 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1505,8 +1505,6 @@ const bool Matcher::match_rule_supported(int opcode) { + case Op_PopCountL: + return UsePopCountInstruction; + +- case Op_RotateRight: +- case Op_RotateLeft: + case Op_CountLeadingZerosI: + case Op_CountLeadingZerosL: + case Op_CountTrailingZerosI: +diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad +index 4488c1c4031..b9e04c432e1 100644 +--- a/src/hotspot/cpu/riscv/riscv_b.ad ++++ b/src/hotspot/cpu/riscv/riscv_b.ad +@@ -25,82 +25,6 @@ + + // RISCV Bit-Manipulation Extension Architecture Description File + +-instruct rorI_imm_rvb(iRegINoSp dst, iRegI src, immI shift) %{ +- predicate(UseRVB); +- match(Set dst (RotateRight src shift)); +- +- format %{ "roriw $dst, $src, ($shift & 0x1f)\t#@rorI_imm_rvb" %} +- +- ins_cost(ALU_COST); +- ins_encode %{ +- __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x1f); +- %} +- +- ins_pipe(ialu_reg_shift); +-%} +- +-instruct rorL_imm_rvb(iRegLNoSp dst, iRegL src, immI shift) %{ +- predicate(UseRVB); +- match(Set dst (RotateRight src shift)); +- +- format %{ "rori $dst, $src, ($shift & 0x3f)\t#@rorL_imm_rvb" %} +- +- ins_cost(ALU_COST); +- ins_encode %{ +- __ rori(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x3f); +- %} +- +- ins_pipe(ialu_reg_shift); +-%} +- +-instruct rorI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{ +- predicate(UseRVB); +- match(Set dst (RotateRight src shift)); +- +- format %{ "rorw $dst, $src, $shift\t#@rorI_reg_rvb" %} +- ins_cost(ALU_COST); +- ins_encode %{ +- __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); +- %} +- ins_pipe(ialu_reg_reg); +-%} +- +-instruct rorL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{ +- predicate(UseRVB); +- match(Set dst (RotateRight src shift)); +- +- format %{ "ror $dst, $src, $shift\t#@rorL_reg_rvb" %} +- ins_cost(ALU_COST); +- ins_encode %{ +- __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); +- %} +- ins_pipe(ialu_reg_reg); +-%} +- +-instruct rolI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{ +- predicate(UseRVB); +- match(Set dst (RotateLeft src shift)); +- +- format %{ "rolw $dst, $src, $shift\t#@rolI_reg_rvb" %} +- ins_cost(ALU_COST); +- ins_encode %{ +- __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); +- %} +- ins_pipe(ialu_reg_reg); +-%} +- +-instruct rolL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{ +- predicate(UseRVB); +- match(Set dst (RotateLeft src shift)); +- +- format %{ "rol $dst, $src, $shift\t#@rolL_reg_rvb" %} +- ins_cost(ALU_COST); +- ins_encode %{ +- __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); +- %} +- ins_pipe(ialu_reg_reg); +-%} +- + // Convert oop into int for vectors alignment masking + instruct convP2I_rvb(iRegINoSp dst, iRegP src) %{ + predicate(UseRVB); + +From 21577388eda0218eeb4b28bc71ecf5737d40639e Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 19:49:28 +0800 +Subject: [PATCH 105/140] Revert JDK-8230565: ZGC: Redesign C2 load barrier to + expand on the MachNode level + +--- + src/hotspot/cpu/riscv/riscv.ad | 14 ++++---------- + 1 file changed, 4 insertions(+), 10 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 1e6495692da..533eaf843e3 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -4324,7 +4324,6 @@ instruct loadRange(iRegINoSp dst, memory mem) + instruct loadP(iRegPNoSp dst, memory mem) + %{ + match(Set dst (LoadP mem)); +- predicate(n->as_Load()->barrier_data() == 0); + + ins_cost(LOAD_COST); + format %{ "ld $dst, $mem\t# ptr, #@loadP" %} +@@ -5060,8 +5059,6 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoS + + instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) + %{ +- predicate(n->as_LoadStore()->barrier_data() == 0); +- + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); +@@ -5181,7 +5178,7 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegL + + instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) + %{ +- predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + +@@ -5327,7 +5324,6 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne + + instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) + %{ +- predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); +@@ -5462,7 +5458,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN + + instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) + %{ +- predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + +@@ -5592,7 +5588,6 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne + + instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) + %{ +- predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); +@@ -5731,7 +5726,7 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN + + instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) + %{ +- predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + +@@ -5798,7 +5793,6 @@ instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) + + instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) + %{ +- predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set prev (GetAndSetP mem newv)); + + ins_cost(ALU_COST); +@@ -5865,7 +5859,7 @@ instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) + + instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) + %{ +- predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set prev (GetAndSetP mem newv)); + + +From 4673921af60f4779d4322256f92bb60a850cb035 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 19:51:09 +0800 +Subject: [PATCH 106/140] Revert JDK-8252990: Intrinsify Unsafe.storeStoreFence + +--- + src/hotspot/cpu/riscv/riscv.ad | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 533eaf843e3..5fa3b85c001 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -7537,7 +7537,6 @@ instruct membar_release() %{ + + instruct membar_storestore() %{ + match(MemBarStoreStore); +- match(StoreStoreFence); + ins_cost(ALU_COST); + + format %{ "MEMBAR-store-store\t#@membar_storestore" %} + +From e254a03e87ffc6d8f563dbd7db1b607a95657263 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 19:54:02 +0800 +Subject: [PATCH 107/140] Revert JDK-8255150: Add utility methods to check long + indexes and ranges && JDK-8252372: Check if cloning is required to move loads + out of loops in PhaseIdealLoop::split_if_with_blocks_post() + +--- + src/hotspot/cpu/riscv/riscv.ad | 33 --------------------------------- + 1 file changed, 33 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 5fa3b85c001..388e65f623d 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -7621,17 +7621,6 @@ instruct castPP(iRegPNoSp dst) + ins_pipe(pipe_class_empty); + %} + +-instruct castLL(iRegL dst) +-%{ +- match(Set dst (CastLL dst)); +- +- size(0); +- format %{ "# castLL of $dst, #@castLL" %} +- ins_encode(/* empty encoding */); +- ins_cost(0); +- ins_pipe(pipe_class_empty); +-%} +- + instruct castII(iRegI dst) + %{ + match(Set dst (CastII dst)); +@@ -7654,28 +7643,6 @@ instruct checkCastPP(iRegPNoSp dst) + ins_pipe(pipe_class_empty); + %} + +-instruct castFF(fRegF dst) +-%{ +- match(Set dst (CastFF dst)); +- +- size(0); +- format %{ "# castFF of $dst" %} +- ins_encode(/* empty encoding */); +- ins_cost(0); +- ins_pipe(pipe_class_empty); +-%} +- +-instruct castDD(fRegD dst) +-%{ +- match(Set dst (CastDD dst)); +- +- size(0); +- format %{ "# castDD of $dst" %} +- ins_encode(/* empty encoding */); +- ins_cost(0); +- ins_pipe(pipe_class_empty); +-%} +- + // ============================================================================ + // Convert Instructions + + +From 2c1820363992d09ef0cd2ed2553c04e0f7afd91f Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 20:02:14 +0800 +Subject: [PATCH 108/140] Revert reset_label part of JDK-8248411: [aarch64] + Insufficient error handling when CodeBuffer is exhausted + +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 14 +++++--------- + 2 files changed, 6 insertions(+), 10 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 656334f326b..37ccf132986 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -3784,7 +3784,7 @@ address MacroAssembler::zero_words(Register ptr, Register cnt) + if (StubRoutines::riscv::complete()) { + address tpc = trampoline_call(zero_blocks); + if (tpc == NULL) { +- DEBUG_ONLY(reset_labels(around)); ++ DEBUG_ONLY(reset_labels1(around)); + postcond(pc() == badAddress); + return NULL; + } +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 953bca3cbd8..45ffc663963 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -815,17 +815,13 @@ class MacroAssembler: public Assembler { + private: + + #ifdef ASSERT +- // Template short-hand support to clean-up after a failed call to trampoline ++ // Macro short-hand support to clean-up after a failed call to trampoline + // call generation (see trampoline_call() below), when a set of Labels must + // be reset (before returning). +- template +- void reset_labels(Label& lbl, More&... more) { +- lbl.reset(); reset_labels(more...); +- } +- template +- void reset_labels(Label& lbl) { +- lbl.reset(); +- } ++#define reset_labels1(L1) L1.reset() ++#define reset_labels2(L1, L2) L1.reset(); L2.reset() ++#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3) ++#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5) + #endif + void repne_scan(Register addr, Register value, Register count, Register tmp); + + +From 014972a0778b8c5568fae9e92d286b634cb44674 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 20:30:49 +0800 +Subject: [PATCH 109/140] Revert JDK-8242289: C2: Support platform-specific + node cloning in Matcher + +--- + src/hotspot/cpu/riscv/riscv.ad | 12 +----------- + 1 file changed, 1 insertion(+), 11 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 388e65f623d..7cd6c2995ba 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1765,20 +1765,10 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) { + + const bool Matcher::convi2l_type_required = false; + +-// Should the Matcher clone input 'm' of node 'n'? +-bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { +- assert_cond(m != NULL); +- if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) +- mstack.push(m, Visit); // m = ShiftCntV +- return true; +- } +- return false; +-} +- + // Should the Matcher clone shifts on addressing modes, expecting them + // to be subsumed into complex addressing expressions or compute them + // into registers? +-bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { ++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + return clone_base_plus_offset_address(m, mstack, address_visited); + } + + +From d15e155e9b84f4789cfbb1cf75382be859b0a8ca Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 20:40:00 +0800 +Subject: [PATCH 110/140] Revert JDK-8255782: Turn UseTLAB and ResizeTLAB from + product_pd to product, defaulting to "true" + +--- + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 2 ++ + src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +index 8f2f4e0e81d..25e00bea901 100644 +--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +@@ -34,6 +34,8 @@ + + #ifndef TIERED + define_pd_global(bool, BackgroundCompilation, true ); ++define_pd_global(bool, UseTLAB, true ); ++define_pd_global(bool, ResizeTLAB, true ); + define_pd_global(bool, InlineIntrinsics, true ); + define_pd_global(bool, PreferInterpreterNativeStubs, false); + define_pd_global(bool, ProfileTraps, false); +diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +index 33d78fb2f6f..3da1f1c6d86 100644 +--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +@@ -33,6 +33,8 @@ + // (see c2_globals.hpp). Alpha-sorted. + + define_pd_global(bool, BackgroundCompilation, true); ++define_pd_global(bool, UseTLAB, true); ++define_pd_global(bool, ResizeTLAB, true); + define_pd_global(bool, CICompileOSR, true); + define_pd_global(bool, InlineIntrinsics, true); + define_pd_global(bool, PreferInterpreterNativeStubs, false); + +From f3fa0cfa987743b4ee83332ddf71add421561908 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 20:49:57 +0800 +Subject: [PATCH 111/140] Revert JDK-8265245: depChecker_ don't have any + functionalities + +--- + src/hotspot/cpu/riscv/depChecker_riscv.hpp | 32 ++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + create mode 100644 src/hotspot/cpu/riscv/depChecker_riscv.hpp + +diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp new file mode 100644 -index 000000000..4d79e3ee4 +index 00000000000..e9ff307b647 --- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java -@@ -0,0 +1,58 @@ ++++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp +@@ -0,0 +1,32 @@ +/* -+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -59934,435 +80563,1447 @@ index 000000000..4d79e3ee4 + * + */ + -+package sun.jvm.hotspot.runtime.riscv64; -+ -+import java.util.*; -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.types.*; -+import sun.jvm.hotspot.runtime.*; -+ -+public class RISCV64JavaCallWrapper extends JavaCallWrapper { -+ private static AddressField lastJavaFPField; -+ -+ static { -+ VM.registerVMInitializedObserver(new Observer() { -+ public void update(Observable o, Object data) { -+ initialize(VM.getVM().getTypeDataBase()); -+ } -+ }); -+ } -+ -+ private static synchronized void initialize(TypeDataBase db) { -+ Type type = db.lookupType("JavaFrameAnchor"); -+ -+ lastJavaFPField = type.getAddressField("_last_Java_fp"); -+ } -+ -+ public RISCV64JavaCallWrapper(Address addr) { -+ super(addr); -+ } ++#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP ++#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP + -+ public Address getLastJavaFP() { -+ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); -+ } -+} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java -new file mode 100644 -index 000000000..d7187a5f8 ---- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java -@@ -0,0 +1,53 @@ -+/* -+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++// Nothing to do on riscv + -+package sun.jvm.hotspot.runtime.riscv64; ++#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP + +From 97a3d4d3b98a450aa316eaa94103cf8473d12d50 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 20:58:34 +0800 +Subject: [PATCH 112/140] Revert JDK-8241438: Move IntelJccErratum mitigation + code to platform-specific code + +--- + src/hotspot/cpu/riscv/riscv.ad | 18 ------------------ + 1 file changed, 18 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 7cd6c2995ba..fc6823daf8b 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -740,13 +740,6 @@ class HandlerImpl { + } + }; + +-class Node::PD { +-public: +- enum NodeFlags { +- _last_flag = Node::_last_flag +- }; +-}; +- + bool is_CAS(int opcode, bool maybe_volatile); + + // predicate controlling translation of CompareAndSwapX +@@ -805,17 +798,6 @@ void reg_mask_init() { + } + } + +-void PhaseOutput::pd_perform_mach_node_analysis() { +-} +- +-int MachNode::pd_alignment_required() const { +- return 1; +-} +- +-int MachNode::compute_padding(int current_offset) const { +- return 0; +-} +- + // is_CAS(int opcode, bool maybe_volatile) + // + // return true if opcode is one of the possible CompareAndSwapX + +From 8a3e7b81b79918a4f2feb4d9226ab8be6c43c28a Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:03:47 +0800 +Subject: [PATCH 113/140] Revert JDK-8260355: AArch64: deoptimization stub + should save vector registers + +--- + src/hotspot/cpu/riscv/registerMap_riscv.cpp | 45 --------------------- + src/hotspot/cpu/riscv/registerMap_riscv.hpp | 1 - + 2 files changed, 46 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.cpp + +diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.cpp b/src/hotspot/cpu/riscv/registerMap_riscv.cpp +deleted file mode 100644 +index 26c1edc36ff..00000000000 +--- a/src/hotspot/cpu/riscv/registerMap_riscv.cpp ++++ /dev/null +@@ -1,45 +0,0 @@ +-/* +- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "runtime/registerMap.hpp" +-#include "vmreg_riscv.inline.hpp" +- +-address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const { +- if (base_reg->is_VectorRegister()) { +- assert(base_reg->is_concrete(), "must pass base reg"); +- int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) / +- VectorRegisterImpl::max_slots_per_register; +- intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size; +- address base_location = location(base_reg); +- if (base_location != NULL) { +- return base_location + offset_in_bytes; +- } else { +- return NULL; +- } +- } else { +- return location(base_reg->next(slot_idx)); +- } +-} +diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp +index f34349811a9..fef8ca9b64e 100644 +--- a/src/hotspot/cpu/riscv/registerMap_riscv.hpp ++++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp +@@ -33,7 +33,6 @@ + // This is the hook for finding a register in an "well-known" location, + // such as a register block of a predetermined format. + address pd_location(VMReg reg) const { return NULL; } +- address pd_location(VMReg base_reg, int slot_idx) const; + + // no PD state to clear or copy: + void pd_clear() {} + +From 5fc20f93a312f9189b55c5236c15a55b3da10cf9 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:05:37 +0800 +Subject: [PATCH 114/140] Revert JDK-8250914: Matcher::stack_direction() is + unused + +--- + src/hotspot/cpu/riscv/riscv.ad | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index fc6823daf8b..c21508b6e7c 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -2326,6 +2326,9 @@ encode %{ + // SP meets the minimum alignment. + + frame %{ ++ // What direction does stack grow in (assumed to be same for C & Java) ++ stack_direction(TOWARDS_LOW); + -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.runtime.*; + // These three registers define part of the calling convention + // between compiled code and the interpreter. + + +From aab3322fd2507a3aeae39c69ba871400dd342834 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:15:45 +0800 +Subject: [PATCH 115/140] Revert CacheWB*Node matching rules + +--- + src/hotspot/cpu/riscv/riscv.ad | 8 -------- + 1 file changed, 8 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index c21508b6e7c..e410bd06aa6 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1475,14 +1475,6 @@ const bool Matcher::match_rule_supported(int opcode) { + } + + switch (opcode) { +- case Op_CacheWB: // fall through +- case Op_CacheWBPreSync: // fall through +- case Op_CacheWBPostSync: +- if (!VM_Version::supports_data_cache_line_flush()) { +- return false; +- } +- break; +- + case Op_PopCountI: + case Op_PopCountL: + return UsePopCountInstruction; + +From 705981aaff19b442b55df8a038aab9c61133bc3a Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:21:10 +0800 +Subject: [PATCH 116/140] Revert JDK-8263595: Remove oop type punning in + JavaCallArguments + +--- + src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp +index bc4e5758256..df3c0267eea 100644 +--- a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp ++++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp +@@ -65,8 +65,9 @@ class JNITypes : private AllStatic { + } + + // Oops are stored in native format in one JavaCallArgument slot at *to. +- static inline void put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); } +- static inline void put_obj(jobject from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; } ++ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } ++ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } ++ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } + +From bba22725b9f1386d8899941ccee3e8dc7f9a4a6f Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:33:01 +0800 +Subject: [PATCH 117/140] Revert JDK-8260012: Reduce inclusion of + collectedHeap.hpp and heapInspection.hpp + +--- + src/hotspot/cpu/riscv/frame_riscv.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index 40ec584b994..d4fcbdcbbde 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -598,7 +598,7 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) + oop* obj_p = (oop*)tos_addr; + obj = (obj_p == NULL) ? (oop)NULL : *obj_p; + } +- assert(Universe::is_in_heap_or_null(obj), "sanity check"); ++ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); + *oop_result = obj; + break; + } + +From 49000a43408aba29d3dc9ee4e03219e6f85be602 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:35:21 +0800 +Subject: [PATCH 118/140] Revert JDK-8271869: AArch64: build errors with GCC11 + in frame::saved_oop_result + +--- + src/hotspot/cpu/riscv/frame_riscv.inline.hpp | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp +index 5ac1bf57f57..abd5bda7e49 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp +@@ -230,8 +230,6 @@ inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { + + + // Compiled frames +-PRAGMA_DIAG_PUSH +-PRAGMA_NONNULL_IGNORED + inline oop frame::saved_oop_result(RegisterMap* map) const { + oop* result_adr = (oop *)map->location(x10->as_VMReg()); + guarantee(result_adr != NULL, "bad register save location"); +@@ -243,6 +241,5 @@ inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + guarantee(result_adr != NULL, "bad register save location"); + *result_adr = obj; + } +-PRAGMA_DIAG_POP + + #endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP + +From 14a46a85e65f6fec09ac566d49a6232216881adb Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:40:43 +0800 +Subject: [PATCH 119/140] Revert JDK-8230392: Define AArch64 as + MULTI_COPY_ATOMIC + +--- + src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +index ffd420da024..606f0fa0da3 100644 +--- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +@@ -33,10 +33,6 @@ const int StackAlignmentInBytes = 16; + // 32-bit integer argument values are extended to 64 bits. + const bool CCallingConventionRequiresIntsAsLongs = false; + +-// RISCV has adopted a multicopy atomic model closely following +-// that of ARMv8. +-#define CPU_MULTI_COPY_ATOMIC +- + // To be safe, we deoptimize when we come across an access that needs + // patching. This is similar to what is done on aarch64. + #define DEOPTIMIZE_WHEN_PATCHING + +From 8740928267a831c62f1deb20c910e3c27716bc40 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:42:20 +0800 +Subject: [PATCH 120/140] Revert: JDK-8246689: Enable independent compressed + oops/class ptrs on Aarch64 JDK-8241825: Make compressed oops and compressed + class pointers independent (x86_64, PPC, S390) + +--- + src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +index 606f0fa0da3..acdf75d324e 100644 +--- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +@@ -41,6 +41,4 @@ const bool CCallingConventionRequiresIntsAsLongs = false; + + #define SUPPORT_RESERVED_STACK_AREA + +-#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false +- + #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP + +From 94b40f4efccc19c8ac66eda6c57381a222b02d2d Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:50:49 +0800 +Subject: [PATCH 121/140] Revert JDK-8222637: Obsolete NeedsDeoptSuspend + +--- + src/hotspot/cpu/riscv/globals_riscv.hpp | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index b78f258a764..a838a377829 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -32,6 +32,8 @@ + // Sets the default values for platform dependent flags used by the runtime system. + // (see globals.hpp) + ++define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this + -+public class RISCV64RegisterMap extends RegisterMap { + define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks + define_pd_global(bool, TrapBasedNullChecks, false); + define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast + +From 09968c9fc102fd32bc628d3e6fd9d9adcbec4373 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:52:44 +0800 +Subject: [PATCH 122/140] Revert JDK-8220051: Remove global safepoint code + +--- + src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +index acdf75d324e..d6ce8da07b8 100644 +--- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +@@ -41,4 +41,6 @@ const bool CCallingConventionRequiresIntsAsLongs = false; + + #define SUPPORT_RESERVED_STACK_AREA + ++#define THREAD_LOCAL_POLL + -+ /** This is the only public constructor */ -+ public RISCV64RegisterMap(JavaThread thread, boolean updateMap) { -+ super(thread, updateMap); -+ } + #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP + +From 2f4fb2b5ac420d456421592dc09b81244636ba4d Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 22:00:52 +0800 +Subject: [PATCH 123/140] Revert JDK-8272873: C2: Inlining should not depend on + absolute call site counts + +--- + src/hotspot/cpu/riscv/globals_riscv.hpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index a838a377829..b4f71c45ec1 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -41,6 +41,7 @@ define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs + define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. + define_pd_global(intx, CodeEntryAlignment, 64); + define_pd_global(intx, OptoLoopAlignment, 16); ++define_pd_global(intx, InlineFrequencyCount, 100); + + #define DEFAULT_STACK_YELLOW_PAGES (2) + #define DEFAULT_STACK_RED_PAGES (1) + +From 2df3625eea16fc0d45c0e4cf12c9433f0ec070fd Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 22:02:13 +0800 +Subject: [PATCH 124/140] Revert JDK-8220049: Obsolete ThreadLocalHandshakes + +--- + src/hotspot/cpu/riscv/globals_riscv.hpp | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index b4f71c45ec1..b7d85373c4a 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -76,6 +76,8 @@ define_pd_global(bool, CompactStrings, true); + // Clear short arrays bigger than one word in an arch-specific way + define_pd_global(intx, InitArrayShortSize, BytesPerLong); + ++define_pd_global(bool, ThreadLocalHandshakes, true); + -+ protected RISCV64RegisterMap(RegisterMap map) { -+ super(map); -+ } + define_pd_global(intx, InlineSmallCode, 1000); + + #define ARCH_FLAGS(develop, \ + +From a875c4caa423dd727cea1c891b17f4ded97e57d1 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 22:04:32 +0800 +Subject: [PATCH 125/140] Revert: JDK-8243208: Clean up JVMFlag implementation + JDK-8236625: Remove writeable macro from JVM flags declaration + +--- + src/hotspot/cpu/riscv/globals_riscv.hpp | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index b7d85373c4a..0becd9efd35 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -82,9 +82,12 @@ define_pd_global(intx, InlineSmallCode, 1000); + + #define ARCH_FLAGS(develop, \ + product, \ ++ diagnostic, \ ++ experimental, \ + notproduct, \ + range, \ +- constraint) \ ++ constraint, \ ++ writeable) \ + \ + product(bool, NearCpool, true, \ + "constant pool is close to instructions") \ + +From 19a9e6e8c3dba77cf8be0f25b1aec394aeca0b25 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Wed, 10 May 2023 09:44:12 +0800 +Subject: [PATCH 126/140] Revert JDK-8213436: Obsolete UseMembar && + JDK-8188764: Obsolete AssumeMP and then remove all support for non-MP builds, + always enabled + +--- + src/hotspot/cpu/riscv/globals_riscv.hpp | 2 ++ + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index 0becd9efd35..e820898d87f 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -64,6 +64,8 @@ define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); + define_pd_global(bool, RewriteBytecodes, true); + define_pd_global(bool, RewriteFrequentPairs, true); + ++define_pd_global(bool, UseMembar, true); + -+ public Object clone() { -+ RISCV64RegisterMap retval = new RISCV64RegisterMap(this); -+ return retval; -+ } + define_pd_global(bool, PreserveFramePointer, false); + + // GC Ergo Flags +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +index 50ee7edb708..f13e4269b77 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +@@ -139,6 +139,8 @@ void VM_Version::initialize() { + #endif // COMPILER2 + + UNSUPPORTED_OPTION(CriticalJNINatives); + -+ // no PD state to clear or copy: -+ protected void clearPD() {} -+ protected void initializePD() {} -+ protected void initializeFromPD(RegisterMap map) {} -+ protected Address getLocationPD(VMReg reg) { return null; } -+} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java -index 7d7a6107c..948eabcab 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java -@@ -54,7 +54,7 @@ public class PlatformInfo { ++ FLAG_SET_DEFAULT(UseMembar, true); + } - public static boolean knownCPU(String cpu) { - final String[] KNOWN = -- new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"}; -+ new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "riscv64"}; + #ifdef COMPILER2 + +From 0c4a9d1b6b3b3b31a1c105ff311414ae542764bb Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Mon, 1 May 2023 16:04:15 +0800 +Subject: [PATCH 127/140] Misc adaptations to jdk11u + +--- + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 2 +- + .../linux_riscv/vm_version_linux_riscv.cpp | 16 ++++++++-------- + 2 files changed, 9 insertions(+), 9 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +index 25e00bea901..9316d4be02e 100644 +--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +@@ -57,7 +57,7 @@ define_pd_global(uintx, CodeCacheMinBlockLength, 1); + define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + define_pd_global(uintx, MetaspaceSize, 12*M ); + define_pd_global(bool, NeverActAsServerClassMachine, true ); +-define_pd_global(uint64_t, MaxRAM, 1ULL*G); ++define_pd_global(uint64_t, MaxRAM, 1ULL*G); + define_pd_global(bool, CICompileOSR, true ); + #endif // !TIERED + define_pd_global(bool, UseTypeProfile, false); +diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp +index 4623dbfad42..60260854db6 100644 +--- a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp ++++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp +@@ -83,14 +83,14 @@ void VM_Version::get_os_cpu_info() { - for(String s : KNOWN) { - if(s.equals(cpu)) -diff --git a/src/utils/hsdis/hsdis.c b/src/utils/hsdis/hsdis.c -index d0a6f4ea8..a29c7bf8b 100644 ---- a/src/utils/hsdis/hsdis.c -+++ b/src/utils/hsdis/hsdis.c -@@ -28,9 +28,6 @@ - */ - - #include /* required by bfd.h */ --#include --#include --#include + uint64_t auxv = getauxval(AT_HWCAP); - #include - #include -@@ -479,6 +476,9 @@ static const char* native_arch_name() { - #endif - #ifdef LIBARCH_s390x - res = "s390:64-bit"; -+#endif -+#ifdef LIBARCH_riscv64 -+ res = "riscv:rv64"; +- static_assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP"); +- static_assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP"); +- static_assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP"); +- static_assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP"); +- static_assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP"); +- static_assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP"); +- static_assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP"); +- static_assert(CPU_B == HWCAP_ISA_B, "Flag CPU_B must follow Linux HWCAP"); ++ STATIC_ASSERT(CPU_I == HWCAP_ISA_I); ++ STATIC_ASSERT(CPU_M == HWCAP_ISA_M); ++ STATIC_ASSERT(CPU_A == HWCAP_ISA_A); ++ STATIC_ASSERT(CPU_F == HWCAP_ISA_F); ++ STATIC_ASSERT(CPU_D == HWCAP_ISA_D); ++ STATIC_ASSERT(CPU_C == HWCAP_ISA_C); ++ STATIC_ASSERT(CPU_V == HWCAP_ISA_V); ++ STATIC_ASSERT(CPU_B == HWCAP_ISA_B); + _features = auxv & ( + HWCAP_ISA_I | + HWCAP_ISA_M | + +From 4ce5e05526029360ad15eb9639c9c05fac77ac8e Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 20 May 2023 17:51:52 +0800 +Subject: [PATCH 128/140] Save all call-clobbered registers for spark tests may + crash + +--- + .../cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +index bc847388f68..e191cbcee2a 100644 +--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +@@ -157,21 +157,15 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, + __ j(done); + + __ bind(runtime); +- // save the live input values +- RegSet saved = RegSet::of(pre_val); +- if (tosca_live) { saved += RegSet::of(x10); } +- if (obj != noreg) { saved += RegSet::of(obj); } +- +- __ push_reg(saved, sp); + ++ __ push_call_clobbered_registers(); + if (expand_call) { + assert(pre_val != c_rarg1, "smashed arg"); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } +- +- __ pop_reg(saved, sp); ++ __ pop_call_clobbered_registers(); + + __ bind(done); + + +From 1b8778b0831571e9ac688bbd22afca4cf8f62407 Mon Sep 17 00:00:00 2001 +From: Kuai Wei +Date: Tue, 22 Aug 2023 16:17:31 +0800 +Subject: [PATCH 129/140] Build with gcc 13 + +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 1 + + src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 37ccf132986..fd18bb77058 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -49,6 +49,7 @@ + #include "runtime/thread.hpp" + #ifdef COMPILER2 + #include "opto/compile.hpp" ++#include "opto/intrinsicnode.hpp" + #include "opto/node.hpp" + #include "opto/output.hpp" #endif - if (res == NULL) - res = "architecture not set in Makefile!"; -diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java -index 7805918c2..a21307083 100644 ---- a/test/hotspot/jtreg/compiler/c2/TestBit.java -+++ b/test/hotspot/jtreg/compiler/c2/TestBit.java -@@ -1,5 +1,6 @@ - /* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -34,7 +35,7 @@ import jdk.test.lib.process.ProcessTools; - * - * @run driver compiler.c2.TestBit - * -- * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" -+ * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64" - * @requires vm.debug == true & vm.compiler2.enabled +diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +index 31d9254d8ad..ccceed643ed 100644 +--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp ++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +@@ -24,6 +24,7 @@ */ - public class TestBit { -@@ -54,7 +55,8 @@ public class TestBit { - String expectedTestBitInstruction = - "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" : - "aarch64".equals(System.getProperty("os.arch")) ? "tb" : -- "amd64".equals(System.getProperty("os.arch")) ? "test" : null; -+ "amd64".equals(System.getProperty("os.arch")) ? "test" : -+ "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null; - if (expectedTestBitInstruction != null) { - output.shouldContain(expectedTestBitInstruction); -diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java -index 558b4218f..9d875e33f 100644 ---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java -+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java -@@ -1,5 +1,6 @@ - /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli; + #include "precompiled.hpp" ++#include "memory/metaspaceShared.hpp" + #include "runtime/frame.inline.hpp" + #include "runtime/thread.inline.hpp" - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; - import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; -@@ -54,6 +56,8 @@ public class TestUseSHA1IntrinsicsOptionOnUnsupportedCPU { - SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), - new GenericTestCaseForUnsupportedAArch64CPU( - SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), -+ new GenericTestCaseForUnsupportedRISCV64CPU( -+ SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), - new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( - SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), - new GenericTestCaseForOtherCPU( -diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java -index 3ed72bf0a..a7e277060 100644 ---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java -+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java -@@ -1,5 +1,6 @@ + +From 4c23be6665aec94462e82e3b4adcf7abb5b23981 Mon Sep 17 00:00:00 2001 +From: Kuai Wei +Date: Tue, 5 Sep 2023 15:37:43 +0800 +Subject: [PATCH 130/140] Fix copyright information + +--- + make/autoconf/build-aux/config.guess | 2 +- + .../MyPackage/HeapMonitorEventsForTwoThreadsTest.java | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess +index 15111d827ab..a88a9adec3f 100644 +--- a/make/autoconf/build-aux/config.guess ++++ b/make/autoconf/build-aux/config.guess +@@ -1,6 +1,6 @@ + #!/bin/sh + # +-# Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved. + # Copyright (c) 2021, Azul Systems, Inc. All rights reserved. + # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + # +diff --git a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java +index f0b7aed5ceb..54640b245f8 100644 +--- a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java ++++ b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java +@@ -1,4 +1,5 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, Google and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli; + +From 70a060f73c3617e58f881bcee19f1a3ce43f54ff Mon Sep 17 00:00:00 2001 +From: Chris Plummer +Date: Thu, 2 Jul 2020 13:13:10 -0700 +Subject: [PATCH 131/140] 8247533: SA stack walking sometimes fails with + sun.jvm.hotspot.debugger.DebuggerException: get_thread_regs failed for a lwp + +Reviewed-by: sspitsyn, ysuenaga, dtitov +--- + .../native/libsaproc/LinuxDebuggerLocal.c | 8 ++++++- + .../linux/native/libsaproc/ps_proc.c | 3 ++- + .../native/libsaproc/MacosxDebuggerLocal.m | 24 ++++++++++++------- + .../debugger/bsd/BsdDebuggerLocal.java | 2 +- + .../jvm/hotspot/debugger/bsd/BsdThread.java | 10 +++++--- + .../debugger/linux/LinuxDebuggerLocal.java | 2 +- + .../hotspot/debugger/linux/LinuxThread.java | 10 +++++--- + .../windbg/amd64/WindbgAMD64Thread.java | 15 ++++++++---- + .../windows/native/libsaproc/sawindbg.cpp | 14 ++++++++--- + 9 files changed, 61 insertions(+), 27 deletions(-) + +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +index 45a927fb5ee..6f1887f8113 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +@@ -413,7 +413,13 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; - import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; -@@ -54,6 +56,8 @@ public class TestUseSHA256IntrinsicsOptionOnUnsupportedCPU { - SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), - new GenericTestCaseForUnsupportedAArch64CPU( - SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), -+ new GenericTestCaseForUnsupportedRISCV64CPU( -+ SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), - new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( - SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), - new GenericTestCaseForOtherCPU( -diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java -index c05cf309d..e714fcc59 100644 ---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java -+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java -@@ -1,5 +1,6 @@ + struct ps_prochandle* ph = get_proc_handle(env, this_obj); + if (get_lwp_regs(ph, lwp_id, &gregs) != true) { +- THROW_NEW_DEBUGGER_EXCEPTION_("get_thread_regs failed for a lwp", 0); ++ // This is not considered fatal and does happen on occassion, usually with an ++ // ESRCH error. The root cause is not fully understood, but by ignoring this error ++ // and returning NULL, stacking walking code will get null registers and fallback ++ // to using the "last java frame" if setup. ++ fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: get_lwp_regs failed for lwp (%d)\n", lwp_id); ++ fflush(stdout); ++ return NULL; + } + + #undef NPRGREG +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +index de5254d859e..691c3f6684a 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +@@ -144,7 +144,8 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use + + #ifdef PTRACE_GETREGS_REQ + if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { +- print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid); ++ print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid, ++ errno, strerror(errno)); + return false; + } + return true; +diff --git a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m +index 18b8b4282fe..e46370a1f18 100644 +--- a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m ++++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m +@@ -685,7 +685,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo + JNIEnv *env, jobject this_obj, + jlong thread_id) + { +- print_debug("getThreadRegisterSet0 called\n"); ++ print_debug("getThreadIntegerRegisterSet0 called\n"); + + struct ps_prochandle* ph = get_proc_handle(env, this_obj); + if (ph != NULL && ph->core != NULL) { +@@ -705,7 +705,13 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo + result = thread_get_state(tid, HSDB_THREAD_STATE, (thread_state_t)&state, &count); + + if (result != KERN_SUCCESS) { +- print_error("getregs: thread_get_state(%d) failed (%d)\n", tid, result); ++ // This is not considered fatal. Unlike on Linux and Windows, we haven't seen a ++ // failure to get thread registers, but if it were to fail the response should ++ // be the same. By ignoring this error and returning NULL, stacking walking code ++ // will get null registers and fallback to using the "last java frame" if setup. ++ fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: thread_get_state failed (%d) for thread (%d)\n", ++ result, tid); ++ fflush(stdout); + return NULL; + } + +@@ -808,25 +814,25 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo + */ + JNIEXPORT jint JNICALL + Java_sun_jvm_hotspot_debugger_macosx_MacOSXDebuggerLocal_translateTID0( +- JNIEnv *env, jobject this_obj, jint tid) ++ JNIEnv *env, jobject this_obj, jint tid) + { + print_debug("translateTID0 called on tid = 0x%x\n", (int)tid); + + kern_return_t result; + thread_t foreign_tid, usable_tid; + mach_msg_type_name_t type; +- ++ + foreign_tid = tid; +- ++ + task_t gTask = getTask(env, this_obj); +- result = mach_port_extract_right(gTask, foreign_tid, +- MACH_MSG_TYPE_COPY_SEND, ++ result = mach_port_extract_right(gTask, foreign_tid, ++ MACH_MSG_TYPE_COPY_SEND, + &usable_tid, &type); + if (result != KERN_SUCCESS) + return -1; +- ++ + print_debug("translateTID0: 0x%x -> 0x%x\n", foreign_tid, usable_tid); +- ++ + return (jint) usable_tid; + } + +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java +index 655b450c3fc..d0557a7d254 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java +@@ -166,7 +166,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException + } catch (InterruptedException x) {} + } + if (lastException != null) { +- throw new DebuggerException(lastException); ++ throw new DebuggerException(lastException.getMessage(), lastException); + } else { + return task; + } +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java +index 0d637f30f14..c52d3a51d54 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli; - - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; - import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; -@@ -54,6 +56,8 @@ public class TestUseSHA512IntrinsicsOptionOnUnsupportedCPU { - SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), - new GenericTestCaseForUnsupportedAArch64CPU( - SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), -+ new GenericTestCaseForUnsupportedRISCV64CPU( -+ SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), - new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( - SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), - new GenericTestCaseForOtherCPU( -diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java -index 58ce5366b..d52d81e26 100644 ---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java -+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java -@@ -1,5 +1,6 @@ +@@ -67,8 +67,12 @@ public String toString() { + public ThreadContext getContext() throws IllegalThreadStateException { + long[] data = debugger.getThreadIntegerRegisterSet(unique_thread_id); + ThreadContext context = BsdThreadContextFactory.createThreadContext(debugger); +- for (int i = 0; i < data.length; i++) { +- context.setRegister(i, data[i]); ++ // null means we failed to get the register set for some reason. The caller ++ // is responsible for dealing with the set of null registers in that case. ++ if (data != null) { ++ for (int i = 0; i < data.length; i++) { ++ context.setRegister(i, data[i]); ++ } + } + return context; + } +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java +index cb6712b58ee..6a0648f508a 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java +@@ -173,7 +173,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException + } catch (InterruptedException x) {} + } + if (lastException != null) { +- throw new DebuggerException(lastException); ++ throw new DebuggerException(lastException.getMessage(), lastException); + } else { + return task; + } +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java +index 52307b9cdcf..3fe795d34bc 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -40,6 +41,7 @@ package compiler.intrinsics.sha.cli; - - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; - import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU; -@@ -53,6 +55,8 @@ public class TestUseSHAOptionOnUnsupportedCPU { - SHAOptionsBase.USE_SHA_OPTION), - new GenericTestCaseForUnsupportedAArch64CPU( - SHAOptionsBase.USE_SHA_OPTION), -+ new GenericTestCaseForUnsupportedRISCV64CPU( -+ SHAOptionsBase.USE_SHA_OPTION), - new UseSHASpecificTestCaseForUnsupportedCPU( - SHAOptionsBase.USE_SHA_OPTION), - new GenericTestCaseForOtherCPU( -diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java -index faa9fdbae..50e549069 100644 ---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java -+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java -@@ -1,5 +1,6 @@ +@@ -73,8 +73,12 @@ public String toString() { + public ThreadContext getContext() throws IllegalThreadStateException { + long[] data = debugger.getThreadIntegerRegisterSet(lwp_id); + ThreadContext context = LinuxThreadContextFactory.createThreadContext(debugger); +- for (int i = 0; i < data.length; i++) { +- context.setRegister(i, data[i]); ++ // null means we failed to get the register set for some reason. The caller ++ // is responsible for dealing with the set of null registers in that case. ++ if (data != null) { ++ for (int i = 0; i < data.length; i++) { ++ context.setRegister(i, data[i]); ++ } + } + return context; + } +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java +index ec5aea35e8c..377650a0a1c 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -32,26 +33,27 @@ import jdk.test.lib.cli.predicate.OrPredicate; +@@ -30,9 +30,9 @@ - /** - * Generic test case for SHA-related options targeted to any CPU except -- * AArch64, PPC, S390x, SPARC and X86. -+ * AArch64, RISCV64, PPC, S390x, SPARC and X86. - */ - public class GenericTestCaseForOtherCPU extends - SHAOptionsBase.TestCase { - public GenericTestCaseForOtherCPU(String optionName) { -- // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86. -+ // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, SPARC and X86. - super(optionName, new NotPredicate( - new OrPredicate(Platform::isAArch64, -+ new OrPredicate(Platform::isRISCV64, - new OrPredicate(Platform::isS390x, - new OrPredicate(Platform::isSparc, - new OrPredicate(Platform::isPPC, - new OrPredicate(Platform::isX64, -- Platform::isX86))))))); -+ Platform::isX86)))))))); + class WindbgAMD64Thread implements ThreadProxy { + private WindbgDebugger debugger; +- private long sysId; ++ private long sysId; // SystemID for Windows thread, stored in OSThread::_thread_id + private boolean gotID; +- private long id; ++ private long id; // ThreadID for Windows thread, returned by GetThreadIdBySystemId + + // The address argument must be the address of the OSThread::_thread_id + WindbgAMD64Thread(WindbgDebugger debugger, Address addr) { +@@ -50,8 +50,12 @@ class WindbgAMD64Thread implements ThreadProxy { + public ThreadContext getContext() throws IllegalThreadStateException { + long[] data = debugger.getThreadIntegerRegisterSet(getThreadID()); + WindbgAMD64ThreadContext context = new WindbgAMD64ThreadContext(debugger); +- for (int i = 0; i < data.length; i++) { +- context.setRegister(i, data[i]); ++ // null means we failed to get the register set for some reason. The caller ++ // is responsible for dealing with the set of null registers in that case. ++ if (data != null) { ++ for (int i = 0; i < data.length; i++) { ++ context.setRegister(i, data[i]); ++ } + } + return context; + } +@@ -86,6 +90,7 @@ public String toString() { + private long getThreadID() { + if (!gotID) { + id = debugger.getThreadIdFromSysId(sysId); ++ gotID = true; } - @Override - protected void verifyWarnings() throws Throwable { - String shouldPassMessage = String.format("JVM should start with " - + "option '%s' without any warnings", optionName); -- // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of -+ // Verify that on non-x86, non-SPARC, non-AArch64 CPU and non-RISCV64 usage of - // SHA-related options will not cause any warnings. - CommandLineOptionTest.verifySameJVMStartup(null, - new String[] { ".*" + optionName + ".*" }, shouldPassMessage, -diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java -new file mode 100644 -index 000000000..d81b5b53f ---- /dev/null -+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java -@@ -0,0 +1,102 @@ -+/* -+ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ */ + return id; +diff --git a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp +index 314cf69c957..e3b218b4dae 100644 +--- a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp ++++ b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp +@@ -45,6 +45,7 @@ + + #include + #include ++#include + + #define DEBUG_NO_IMPLEMENTATION + #include +@@ -765,9 +766,16 @@ JNIEXPORT jlong JNICALL Java_sun_jvm_hotspot_debugger_windbg_WindbgDebuggerLocal + CHECK_EXCEPTION_(0); + + ULONG id = 0; +- COM_VERIFY_OK_(ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id), +- "Windbg Error: GetThreadIdBySystemId failed!", 0); +- ++ HRESULT hr = ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id); ++ if (hr != S_OK) { ++ // This is not considered fatal and does happen on occassion, usually with an ++ // 0x80004002 "No such interface supported". The root cause is not fully understood, ++ // but by ignoring this error and returning NULL, stacking walking code will get ++ // null registers and fallback to using the "last java frame" if setup. ++ printf("WARNING: GetThreadIdBySystemId failed with 0x%x for sysId (%" PRIu64 ")\n", ++ hr, sysId); ++ return -1; ++ } + return (jlong) id; + } + + +From 2cadd133d25e05be6ab9b16024a37bed79af1f15 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zheng +Date: Wed, 30 Mar 2022 09:04:55 +0000 +Subject: [PATCH 132/140] 8283737: riscv: MacroAssembler::stop() should emit + fixed-length instruction sequence + +Reviewed-by: fyang, shade +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index fd18bb77058..b72a553da2f 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -542,8 +542,11 @@ void MacroAssembler::resolve_jobject(Register value, Register thread, Register t + void MacroAssembler::stop(const char* msg) { + address ip = pc(); + pusha(); +- li(c_rarg0, (uintptr_t)(address)msg); +- li(c_rarg1, (uintptr_t)(address)ip); ++ // The length of the instruction sequence emitted should be independent ++ // of the values of msg and ip so that the size of mach nodes for scratch ++ // emit and normal emit matches. ++ mv(c_rarg0, (address)msg); ++ mv(c_rarg1, (address)ip); + mv(c_rarg2, sp); + mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); + jalr(c_rarg3); + +From 729e0db14cb320aedf1f12051e667513bddbb8e8 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zheng +Date: Sun, 24 Apr 2022 02:17:03 +0000 +Subject: [PATCH 133/140] 8285437: riscv: Fix MachNode size mismatch for + MacroAssembler::verify_oops* + +Reviewed-by: shade, fyang +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index b72a553da2f..9f80f7e2650 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -389,7 +389,10 @@ void MacroAssembler::verify_oop(Register reg, const char* s) { + push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + + mv(c_rarg0, reg); // c_rarg0 : x10 +- li(t0, (uintptr_t)(address)b); ++ // The length of the instruction sequence emitted should be independent ++ // of the values of the local char buffer address so that the size of mach ++ // nodes for scratch emit and normal emit matches. ++ mv(t0, (address)b); + + // call indirectly to solve generation ordering problem + int32_t offset = 0; +@@ -425,7 +428,10 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) { + ld(x10, addr); + } + +- li(t0, (uintptr_t)(address)b); ++ // The length of the instruction sequence emitted should be independent ++ // of the values of the local char buffer address so that the size of mach ++ // nodes for scratch emit and normal emit matches. ++ mv(t0, (address)b); + + // call indirectly to solve generation ordering problem + int32_t offset = 0; + +From 5cab06c6f09f4b62d54d8d291b1a23f796a085c1 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zheng +Date: Mon, 30 May 2022 07:45:50 +0000 +Subject: [PATCH 134/140] 8287418: riscv: Fix correctness issue of + MacroAssembler::movptr + +Reviewed-by: fjiang, yadongwang, fyang +--- + src/hotspot/cpu/riscv/assembler_riscv.cpp | 14 +++++++------- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 18 +++++++++--------- + src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 3 ++- + src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 2 +- + 4 files changed, 19 insertions(+), 18 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp +index f15ef5304c5..a5f688cda1f 100644 +--- a/src/hotspot/cpu/riscv/assembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp +@@ -282,9 +282,9 @@ void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) { + } + #endif + assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1), +- "48-bit overflow in address constant"); +- // Load upper 32 bits +- int32_t imm = imm64 >> 16; ++ "bit 47 overflows in address constant"); ++ // Load upper 31 bits ++ int32_t imm = imm64 >> 17; + int64_t upper = imm, lower = imm; + lower = (lower << 52) >> 52; + upper -= lower; +@@ -292,13 +292,13 @@ void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) { + lui(Rd, upper); + addi(Rd, Rd, lower); + +- // Load the rest 16 bits. ++ // Load the rest 17 bits. + slli(Rd, Rd, 11); +- addi(Rd, Rd, (imm64 >> 5) & 0x7ff); +- slli(Rd, Rd, 5); ++ addi(Rd, Rd, (imm64 >> 6) & 0x7ff); ++ slli(Rd, Rd, 6); + + // This offset will be used by following jalr/ld. +- offset = imm64 & 0x1f; ++ offset = imm64 & 0x3f; + } + + void Assembler::movptr(Register Rd, uintptr_t imm64) { +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 9f80f7e2650..f592d7585da 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1158,12 +1158,12 @@ static int patch_offset_in_pc_relative(address branch, int64_t offset) { + + static int patch_addr_in_movptr(address branch, address target) { + const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load +- int32_t lower = ((intptr_t)target << 36) >> 36; +- int64_t upper = ((intptr_t)target - lower) >> 28; +- Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[47:28] + target[27] ==> branch[31:12] +- Assembler::patch(branch + 4, 31, 20, (lower >> 16) & 0xfff); // Addi. target[27:16] ==> branch[31:20] +- Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff); // Addi. target[15: 5] ==> branch[31:20] +- Assembler::patch(branch + 20, 31, 20, lower & 0x1f); // Addi/Jalr/Load. target[ 4: 0] ==> branch[31:20] ++ int32_t lower = ((intptr_t)target << 35) >> 35; ++ int64_t upper = ((intptr_t)target - lower) >> 29; ++ Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] ++ Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] ++ Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] ++ Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] + return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; + } + +@@ -1235,9 +1235,9 @@ static long get_offset_of_pc_relative(address insn_addr) { + + static address get_target_of_movptr(address insn_addr) { + assert_cond(insn_addr != NULL); +- intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28; // Lui. +- target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16; // Addi. +- target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5; // Addi. ++ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29; // Lui. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)); // Addi/Jalr/Load. + return (address) target_address; + } +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 45ffc663963..792c1fc2103 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -827,7 +827,8 @@ class MacroAssembler: public Assembler { + + // Return true if an address is within the 48-bit RISCV64 address space. + bool is_valid_riscv64_address(address addr) { +- return ((uintptr_t)addr >> 48) == 0; ++ // sv48: must have bits 63–48 all equal to bit 47 ++ return ((uintptr_t)addr >> 47) == 0; + } + + void ld_constant(Register dest, const Address &const_addr) { +diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +index bfe84fa4e30..27011ad1283 100644 +--- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp ++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +@@ -89,7 +89,7 @@ bool NativeInstruction::is_movptr_at(address instr) { + is_addi_at(instr + instruction_size) && // Addi + is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11 + is_addi_at(instr + instruction_size * 3) && // Addi +- is_slli_shift_at(instr + instruction_size * 4, 5) && // Slli Rd, Rs, 5 ++ is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6 + (is_addi_at(instr + instruction_size * 5) || + is_jalr_at(instr + instruction_size * 5) || + is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load + +From 41d73298bf28473b3ba2483e61a39c188eddfde3 Mon Sep 17 00:00:00 2001 +From: Kuai Wei +Date: Fri, 22 Sep 2023 16:57:56 +0800 +Subject: [PATCH 135/140] Fix: Fixed-length mv() mistakenly redirected to li() + during reshaping + +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 6 ++++++ + src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 3 +-- + 2 files changed, 7 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index f592d7585da..f851cc1e413 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1343,6 +1343,12 @@ void MacroAssembler::mv(Register Rd, Address dest) { + movptr(Rd, dest.target()); + } + ++void MacroAssembler::mv(Register Rd, address addr) { ++ // Here in case of use with relocation, use fix length instruction ++ // movptr instead of li ++ movptr(Rd, addr); ++} ++ + void MacroAssembler::mv(Register Rd, RegisterOrConstant src) { + if (src.is_register()) { + mv(Rd, src.as_register()); +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 792c1fc2103..65f91532661 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -540,8 +540,6 @@ class MacroAssembler: public Assembler { + } + + // mv +- void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); } +- + inline void mv(Register Rd, int imm64) { li(Rd, (int64_t)imm64); } + inline void mv(Register Rd, long imm64) { li(Rd, (int64_t)imm64); } + inline void mv(Register Rd, long long imm64) { li(Rd, (int64_t)imm64); } +@@ -552,6 +550,7 @@ class MacroAssembler: public Assembler { + inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } + + void mv(Register Rd, Address dest); ++ void mv(Register Rd, address dest); + void mv(Register Rd, RegisterOrConstant src); + + // logic + +From 26f4b26a98507ec03a2329bfcbaab393247fe83f Mon Sep 17 00:00:00 2001 +From: Xiaolin Zheng +Date: Fri, 2 Sep 2022 07:01:02 +0000 +Subject: [PATCH 136/140] 8293100: RISC-V: Need to save and restore + callee-saved FloatRegisters in StubGenerator::generate_call_stub + +Reviewed-by: yadongwang, fjiang, shade, vkempik +--- + src/hotspot/cpu/riscv/frame_riscv.hpp | 2 +- + src/hotspot/cpu/riscv/riscv.ad | 18 ++--- + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 74 +++++++++++++++++-- + src/hotspot/cpu/riscv/vmreg_riscv.cpp | 2 +- + 4 files changed, 80 insertions(+), 16 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp +index 3b88f6d5a1a..18e021dcb94 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.hpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.hpp +@@ -131,7 +131,7 @@ + // Entry frames + // n.b. these values are determined by the layout defined in + // stubGenerator for the Java call stub +- entry_frame_after_call_words = 22, ++ entry_frame_after_call_words = 34, + entry_frame_call_wrapper_offset = -10, + + // we don't need a save area +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index e410bd06aa6..69696b272a5 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -8601,7 +8601,7 @@ instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); +- format %{ "float_b$cmp $op1, $op2 \t#@cmpF_branch"%} ++ format %{ "float_b$cmp $op1, $op2, $lbl \t#@cmpF_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); +@@ -8618,7 +8618,7 @@ instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); +- format %{ "float_b$cmp $op1, $op2\t#@cmpF_loop"%} ++ format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); +@@ -8636,7 +8636,7 @@ instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); +- format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%} ++ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), +@@ -8654,7 +8654,7 @@ instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); +- format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%} ++ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), +@@ -8929,7 +8929,7 @@ instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{ + effect(USE lbl); + + ins_cost(BRANCH_COST); +- format %{ "far_b$cmp $cr, zr, L\t#@far_cmpFlag_branch"%} ++ format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true); +@@ -9138,7 +9138,7 @@ instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); +- format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_branch"%} ++ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), +@@ -9154,7 +9154,7 @@ instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); +- format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_loop"%} ++ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), +@@ -9171,7 +9171,7 @@ instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); +- format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%} ++ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), +@@ -9187,7 +9187,7 @@ instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); +- format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%} ++ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), +diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +index 74c38c3d044..9970229c5c5 100644 +--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +@@ -118,16 +118,28 @@ class StubGenerator: public StubCodeGenerator { + // we don't need to save x6-x7 and x28-x31 which both C and Java treat as + // volatile + // +- // we save x18-x27 which Java uses as temporary registers and C +- // expects to be callee-save ++ // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary ++ // registers and C expects to be callee-save + // + // so the stub frame looks like this when we enter Java code + // + // [ return_from_Java ] <--- sp + // [ argument word n ] + // ... +- // -22 [ argument word 1 ] +- // -21 [ saved x27 ] <--- sp_after_call ++ // -34 [ argument word 1 ] ++ // -33 [ saved f27 ] <--- sp_after_call ++ // -32 [ saved f26 ] ++ // -31 [ saved f25 ] ++ // -30 [ saved f24 ] ++ // -29 [ saved f23 ] ++ // -28 [ saved f22 ] ++ // -27 [ saved f21 ] ++ // -26 [ saved f20 ] ++ // -25 [ saved f19 ] ++ // -24 [ saved f18 ] ++ // -23 [ saved f9 ] ++ // -22 [ saved f8 ] ++ // -21 [ saved x27 ] + // -20 [ saved x26 ] + // -19 [ saved x25 ] + // -18 [ saved x24 ] +@@ -152,7 +164,20 @@ class StubGenerator: public StubCodeGenerator { + + // Call stub stack layout word offsets from fp + enum call_stub_layout { +- sp_after_call_off = -21, ++ sp_after_call_off = -33, ++ ++ f27_off = -33, ++ f26_off = -32, ++ f25_off = -31, ++ f24_off = -30, ++ f23_off = -29, ++ f22_off = -28, ++ f21_off = -27, ++ f20_off = -26, ++ f19_off = -25, ++ f18_off = -24, ++ f9_off = -23, ++ f8_off = -22, + + x27_off = -21, + x26_off = -20, +@@ -198,6 +223,19 @@ class StubGenerator: public StubCodeGenerator { + + const Address thread (fp, thread_off * wordSize); + ++ const Address f27_save (fp, f27_off * wordSize); ++ const Address f26_save (fp, f26_off * wordSize); ++ const Address f25_save (fp, f25_off * wordSize); ++ const Address f24_save (fp, f24_off * wordSize); ++ const Address f23_save (fp, f23_off * wordSize); ++ const Address f22_save (fp, f22_off * wordSize); ++ const Address f21_save (fp, f21_off * wordSize); ++ const Address f20_save (fp, f20_off * wordSize); ++ const Address f19_save (fp, f19_off * wordSize); ++ const Address f18_save (fp, f18_off * wordSize); ++ const Address f9_save (fp, f9_off * wordSize); ++ const Address f8_save (fp, f8_off * wordSize); + -+package compiler.intrinsics.sha.cli.testcases; + const Address x27_save (fp, x27_off * wordSize); + const Address x26_save (fp, x26_off * wordSize); + const Address x25_save (fp, x25_off * wordSize); +@@ -244,6 +282,19 @@ class StubGenerator: public StubCodeGenerator { + __ sd(x26, x26_save); + __ sd(x27, x27_save); + ++ __ fsd(f8, f8_save); ++ __ fsd(f9, f9_save); ++ __ fsd(f18, f18_save); ++ __ fsd(f19, f19_save); ++ __ fsd(f20, f20_save); ++ __ fsd(f21, f21_save); ++ __ fsd(f22, f22_save); ++ __ fsd(f23, f23_save); ++ __ fsd(f24, f24_save); ++ __ fsd(f25, f25_save); ++ __ fsd(f26, f26_save); ++ __ fsd(f27, f27_save); + -+import compiler.intrinsics.sha.cli.SHAOptionsBase; -+import jdk.test.lib.process.ExitCode; -+import jdk.test.lib.Platform; -+import jdk.test.lib.cli.CommandLineOptionTest; -+import jdk.test.lib.cli.predicate.AndPredicate; -+import jdk.test.lib.cli.predicate.NotPredicate; + // install Java thread in global register now we have saved + // whatever value it held + __ mv(xthread, c_rarg7); +@@ -335,6 +386,19 @@ class StubGenerator: public StubCodeGenerator { + #endif + + // restore callee-save registers ++ __ fld(f27, f27_save); ++ __ fld(f26, f26_save); ++ __ fld(f25, f25_save); ++ __ fld(f24, f24_save); ++ __ fld(f23, f23_save); ++ __ fld(f22, f22_save); ++ __ fld(f21, f21_save); ++ __ fld(f20, f20_save); ++ __ fld(f19, f19_save); ++ __ fld(f18, f18_save); ++ __ fld(f9, f9_save); ++ __ fld(f8, f8_save); + -+/** -+ * Generic test case for SHA-related options targeted to RISCV64 CPUs -+ * which don't support instruction required by the tested option. -+ */ -+public class GenericTestCaseForUnsupportedRISCV64CPU extends -+ SHAOptionsBase.TestCase { -+ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) { -+ super(optionName, new AndPredicate(Platform::isRISCV64, -+ new NotPredicate(SHAOptionsBase.getPredicateForOption( -+ optionName)))); -+ } + __ ld(x27, x27_save); + __ ld(x26, x26_save); + __ ld(x25, x25_save); +diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp +index 5d1187c2a27..c4338715f95 100644 +--- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp +@@ -40,7 +40,7 @@ void VMRegImpl::set_regName() { + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { + for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) { +- regName[i++] = reg->name(); ++ regName[i++] = freg->name(); + } + freg = freg->successor(); + } + +From 69ea557c320ad7b2f35fc0e986af9b485f95addf Mon Sep 17 00:00:00 2001 +From: Xiaolin Zheng +Date: Fri, 28 Oct 2022 11:56:21 +0000 +Subject: [PATCH 137/140] 8295926: RISC-V: C1: Fix + LIRGenerator::do_LibmIntrinsic + +Reviewed-by: yadongwang, fyang +--- + .../cpu/riscv/c1_LIRGenerator_riscv.cpp | 21 +++-- + .../floatingpoint/TestLibmIntrinsics.java | 80 +++++++++++++++++++ + 2 files changed, 96 insertions(+), 5 deletions(-) + create mode 100644 test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java + +diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +index f9242251491..c41819fc2ae 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +@@ -679,19 +679,30 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { + LIRItem value(x->argument_at(0), this); + value.set_destroys_register(); + -+ @Override -+ protected void verifyWarnings() throws Throwable { -+ String shouldPassMessage = String.format("JVM startup should pass with" -+ + "option '-XX:-%s' without any warnings", optionName); -+ //Verify that option could be disabled without any warnings. -+ CommandLineOptionTest.verifySameJVMStartup(null, new String[] { -+ SHAOptionsBase.getWarningForUnsupportedCPU(optionName) -+ }, shouldPassMessage, shouldPassMessage, ExitCode.OK, -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ CommandLineOptionTest.prepareBooleanFlag(optionName, false)); + LIR_Opr calc_result = rlock_result(x); + LIR_Opr result_reg = result_register_for(x->type()); + -+ shouldPassMessage = String.format("If JVM is started with '-XX:-" -+ + "%s' '-XX:+%s', output should contain warning.", -+ SHAOptionsBase.USE_SHA_OPTION, optionName); -+ -+ // Verify that when the tested option is enabled, then -+ // a warning will occur in VM output if UseSHA is disabled. -+ if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) { -+ CommandLineOptionTest.verifySameJVMStartup( -+ new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) }, -+ null, -+ shouldPassMessage, -+ shouldPassMessage, -+ ExitCode.OK, -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false), -+ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); -+ } -+ } + CallingConvention* cc = NULL; +- BasicTypeList signature(1); +- signature.append(T_DOUBLE); +- if (x->id() == vmIntrinsics::_dpow) { signature.append(T_DOUBLE); } +- cc = frame_map()->c_calling_convention(&signature); +- value.load_item_force(cc->at(0)); + -+ @Override -+ protected void verifyOptionValues() throws Throwable { -+ // Verify that option is disabled by default. -+ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", -+ String.format("Option '%s' should be disabled by default", -+ optionName), -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); + if (x->id() == vmIntrinsics::_dpow) { + LIRItem value1(x->argument_at(1), this); + -+ // Verify that option is disabled even if it was explicitly enabled -+ // using CLI options. -+ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", -+ String.format("Option '%s' should be off on unsupported " -+ + "RISCV64CPU even if set to true directly", optionName), -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + value1.set_destroys_register(); + -+ // Verify that option is disabled when +UseSHA was passed to JVM. -+ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", -+ String.format("Option '%s' should be off on unsupported " -+ + "RISCV64CPU even if %s flag set to JVM", -+ optionName, CommandLineOptionTest.prepareBooleanFlag( -+ SHAOptionsBase.USE_SHA_OPTION, true)), -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ CommandLineOptionTest.prepareBooleanFlag( -+ SHAOptionsBase.USE_SHA_OPTION, true)); -+ } -+} -diff --git a/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java b/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java ++ BasicTypeList signature(2); ++ signature.append(T_DOUBLE); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); + value1.load_item_force(cc->at(1)); ++ } else { ++ BasicTypeList signature(1); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); + } ++ + switch (x->id()) { + case vmIntrinsics::_dexp: + if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); } +diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java new file mode 100644 -index 000000000..d3aafec8e +index 00000000000..5c711efddea --- /dev/null -+++ b/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java -@@ -0,0 +1,153 @@ ++++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java +@@ -0,0 +1,80 @@ +/* -+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. ++ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Alibaba Group Holding Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -60386,701 +82027,420 @@ index 000000000..d3aafec8e + +/* + * @test -+ * @bug 8173585 -+ * @summary Test intrinsification of StringLatin1.indexOf(char). Note that -+ * differing code paths are taken contingent upon the length of the input String. -+ * Hence we must test against differing string lengths in order to validate -+ * correct functionality. We also ensure the strings are long enough to trigger -+ * the looping conditions of the individual code paths. -+ * -+ * Run with varing levels of AVX and SSE support, also without the intrinsic at all -+ * -+ * @library /compiler/patches /test/lib -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+UnlockDiagnosticVMOptions -XX:DisableIntrinsic=_indexOfL_char compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseSSE=0 compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=1 compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=2 compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=3 compiler.intrinsics.string.TestStringLatin1IndexOfChar ++ * @summary Test libm intrinsics ++ * @library /test/lib / ++ * ++ * @build jdk.test.whitebox.WhiteBox ++ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox ++ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI ++ * -XX:-BackgroundCompilation -XX:-UseOnStackReplacement ++ * compiler.floatingpoint.TestLibmIntrinsics + */ + -+package compiler.intrinsics.string; -+ -+import jdk.test.lib.Asserts; -+ -+public class TestStringLatin1IndexOfChar{ -+ private final static int MAX_LENGTH = 2048;//future proof for AVX-512 instructions -+ -+ public static void main(String[] args) throws Exception { -+ for (int i = 0; i < 1_000; ++i) {//repeat such that we enter into C2 code... -+ findOneItem(); -+ withOffsetTest(); -+ testEmpty(); -+ } -+ } ++package compiler.floatingpoint; + -+ private static void testEmpty(){ -+ Asserts.assertEQ("".indexOf('a'), -1); -+ } ++import compiler.whitebox.CompilerWhiteBoxTest; ++import jdk.test.whitebox.WhiteBox; + -+ private final static char SEARCH_CHAR = 'z'; -+ private final static char INVERLEAVING_CHAR = 'a'; -+ private final static char MISSING_CHAR = 'd'; ++import java.lang.reflect.Method; + -+ private static void findOneItem(){ -+ //test strings of varying length ensuring that for all lengths one instance of the -+ //search char can be found. We check what happens when the search character is in -+ //each position of the search string (including first and last positions) -+ for(int strLength : new int[]{1, 15, 31, 32, 79}){ -+ for(int searchPos = 0; searchPos < strLength; searchPos++){ -+ String totest = makeOneItemStringLatin1(strLength, searchPos); ++public class TestLibmIntrinsics { + -+ int intri = totest.indexOf(SEARCH_CHAR); -+ int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, 0); -+ Asserts.assertEQ(intri, nonintri); -+ } -+ } -+ } ++ private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox(); + -+ private static String makeOneItemStringLatin1(int length, int searchPos){ -+ StringBuilder sb = new StringBuilder(length); ++ private static final double pi = 3.1415926; + -+ for(int n =0; n < length; n++){ -+ sb.append(searchPos==n?SEARCH_CHAR:INVERLEAVING_CHAR); -+ } ++ private static final double expected = 2.5355263553695413; + -+ return sb.toString(); ++ static double m() { ++ return Math.pow(pi, Math.sin(Math.cos(Math.tan(Math.log(Math.log10(Math.exp(pi))))))); + } + -+ private static void withOffsetTest(){ -+ //progressivly move through string checking indexes and starting offset correctly processed -+ //string is of form azaza, aazaazaa, aaazaaazaaa, etc -+ //we find n s.t. maxlength = (n*3) + 2 -+ int maxaInstances = (MAX_LENGTH-2)/3; -+ -+ for(int aInstances = 5; aInstances < MAX_LENGTH; aInstances++){ -+ String totest = makeWithOffsetStringLatin1(aInstances); -+ -+ int startoffset; -+ { -+ int intri = totest.indexOf(SEARCH_CHAR); -+ int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, 0); -+ -+ Asserts.assertEQ(intri, nonintri); -+ startoffset = intri+1; -+ } -+ -+ { -+ int intri = totest.indexOf(SEARCH_CHAR, startoffset); -+ int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, startoffset); -+ -+ Asserts.assertEQ(intri, nonintri); -+ startoffset = intri+1; -+ } ++ static public void main(String[] args) throws NoSuchMethodException { ++ Method test_method = compiler.floatingpoint.TestLibmIntrinsics.class.getDeclaredMethod("m"); + -+ Asserts.assertEQ(totest.indexOf(SEARCH_CHAR, startoffset), -1);//only two SEARCH_CHAR per string -+ Asserts.assertEQ(totest.indexOf(MISSING_CHAR), -1); -+ } -+ } ++ double interpreter_result = m(); + -+ private static String makeWithOffsetStringLatin1(int aInstances){ -+ StringBuilder sb = new StringBuilder((aInstances*3) + 2); -+ for(int n =0; n < aInstances; n++){ -+ sb.append(INVERLEAVING_CHAR); -+ } ++ // Compile with C1 if possible ++ WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE); + -+ sb.append(SEARCH_CHAR); ++ double c1_result = m(); + -+ for(int n =0; n < aInstances; n++){ -+ sb.append(INVERLEAVING_CHAR); -+ } ++ WHITE_BOX.deoptimizeMethod(test_method); + -+ sb.append(SEARCH_CHAR); ++ // Compile it with C2 if possible ++ WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION); + -+ for(int n =0; n < aInstances; n++){ -+ sb.append(INVERLEAVING_CHAR); -+ } -+ return sb.toString(); -+ } ++ double c2_result = m(); + -+ private static int indexOfCharNonIntrinsic(String value, int ch, int fromIndex) { -+ //non intrinsic version of indexOfChar -+ byte c = (byte)ch; -+ for (int i = fromIndex; i < value.length(); i++) { -+ if (value.charAt(i) == c) { -+ return i; -+ } ++ if (interpreter_result != c1_result || ++ interpreter_result != c2_result || ++ c1_result != c2_result) { ++ System.out.println("interpreter = " + interpreter_result + " c1 = " + c1_result + " c2 = " + c2_result); ++ throw new RuntimeException("Test Failed"); + } -+ return -1; + } +} -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java -index 2e3e2717a..8093d6598 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java -@@ -25,7 +25,7 @@ - * @test - * @bug 8074981 - * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java -index 0e06a9e43..1ff9f36e1 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java -@@ -25,7 +25,7 @@ - * @test - * @bug 8074981 - * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java -index c3cdbf374..f3531ea74 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java -@@ -25,7 +25,7 @@ - * @test - * @bug 8074981 - * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java -index d33bd411f..589209447 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java -@@ -25,7 +25,7 @@ - * @test - * @bug 8074981 - * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions - * -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java -index 992fa4b51..907e21371 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java -@@ -25,7 +25,7 @@ - * @test - * @bug 8138583 - * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test -- * @requires os.arch=="aarch64" -+ * @requires os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java -index 3e79b3528..c41c0b606 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java -@@ -25,7 +25,7 @@ - * @test - * @bug 8138583 - * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test -- * @requires os.arch=="aarch64" -+ * @requires os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java -index 6603dd224..b626da40d 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java -@@ -25,7 +25,7 @@ - * @test - * @bug 8135028 - * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java -index d9a0c9880..92cd84a2f 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java -@@ -25,7 +25,7 @@ - * @test - * @bug 8074981 - * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java -index 722db95ae..e72345799 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java -@@ -25,7 +25,7 @@ - * @test - * @bug 8074981 - * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java -index f58f21feb..f4f67cf52 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java -@@ -25,7 +25,7 @@ - * @test - * @bug 8074981 - * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + +From ec57f23aa4001315a030cacd55aa5ef7c3269fbb Mon Sep 17 00:00:00 2001 +From: Kuai Wei +Date: Mon, 9 Oct 2023 11:07:34 +0800 +Subject: [PATCH 138/140] Fix test error after port 8295926 + +--- + .../jtreg/compiler/floatingpoint/TestLibmIntrinsics.java | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java +index 5c711efddea..5a1b659bbe0 100644 +--- a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java ++++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java +@@ -27,8 +27,8 @@ + * @summary Test libm intrinsics + * @library /test/lib / * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java -index acb86812d..c5e38ba72 100644 ---- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java -+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java -@@ -24,7 +24,7 @@ +- * @build jdk.test.whitebox.WhiteBox +- * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox ++ * @build sun.hotspot.WhiteBox ++ * @run driver ClassFileInstaller sun.hotspot.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI + * -XX:-BackgroundCompilation -XX:-UseOnStackReplacement + * compiler.floatingpoint.TestLibmIntrinsics +@@ -37,7 +37,7 @@ + package compiler.floatingpoint; - /* @test - * @bug 8167409 -- * @requires (os.arch != "aarch64") & (os.arch != "arm") -+ * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64") - * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs - */ - package compiler.runtime.criticalnatives.argumentcorruption; -diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java -index eab36f931..4437367b6 100644 ---- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java -+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java -@@ -24,7 +24,7 @@ + import compiler.whitebox.CompilerWhiteBoxTest; +-import jdk.test.whitebox.WhiteBox; ++import sun.hotspot.WhiteBox; - /* @test - * @bug 8167408 -- * @requires (os.arch != "aarch64") & (os.arch != "arm") -+ * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64") - * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp - */ - package compiler.runtime.criticalnatives.lookup; -diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java -index 7774dabcb..284b51019 100644 ---- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java -+++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java -@@ -61,15 +61,17 @@ public class IntrinsicPredicates { + import java.lang.reflect.Method; - public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE - = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null), -+ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null), - new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha1" }, null), - new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha1" }, null), - // x86 variants - new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null), - new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), -- new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null)))))); -+ new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null))))))); + +From b115ec4381ad3ad8cbe9ca3d225cb438538916ac Mon Sep 17 00:00:00 2001 +From: Kuai Wei +Date: Tue, 17 Oct 2023 14:22:49 +0800 +Subject: [PATCH 139/140] Revert JDK-8247533: SA stack walking sometimes fails + with sun.jvm.hotspot.debugger.DebuggerException: get_thread_regs failed for a + lwp + +--- + .../native/libsaproc/LinuxDebuggerLocal.c | 8 +------ + .../linux/native/libsaproc/ps_proc.c | 3 +-- + .../native/libsaproc/MacosxDebuggerLocal.m | 24 +++++++------------ + .../debugger/bsd/BsdDebuggerLocal.java | 2 +- + .../jvm/hotspot/debugger/bsd/BsdThread.java | 10 +++----- + .../debugger/linux/LinuxDebuggerLocal.java | 2 +- + .../hotspot/debugger/linux/LinuxThread.java | 10 +++----- + .../windbg/amd64/WindbgAMD64Thread.java | 15 ++++-------- + .../windows/native/libsaproc/sawindbg.cpp | 14 +++-------- + 9 files changed, 27 insertions(+), 61 deletions(-) + +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +index 6f1887f8113..45a927fb5ee 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +@@ -413,13 +413,7 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo - public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE - = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256" }, null), -+ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha256" }, null), - new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha256" }, null), - new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, null), - new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), -@@ -79,10 +81,11 @@ public class IntrinsicPredicates { - new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), - new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), - new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), -- new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); -+ new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))))); + struct ps_prochandle* ph = get_proc_handle(env, this_obj); + if (get_lwp_regs(ph, lwp_id, &gregs) != true) { +- // This is not considered fatal and does happen on occassion, usually with an +- // ESRCH error. The root cause is not fully understood, but by ignoring this error +- // and returning NULL, stacking walking code will get null registers and fallback +- // to using the "last java frame" if setup. +- fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: get_lwp_regs failed for lwp (%d)\n", lwp_id); +- fflush(stdout); +- return NULL; ++ THROW_NEW_DEBUGGER_EXCEPTION_("get_thread_regs failed for a lwp", 0); + } - public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE - = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512" }, null), -+ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha512" }, null), - new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha512" }, null), - new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha512" }, null), - new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), -@@ -92,7 +95,7 @@ public class IntrinsicPredicates { - new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), - new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), - new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), -- new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); -+ new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))))); + #undef NPRGREG +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +index 691c3f6684a..de5254d859e 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +@@ -144,8 +144,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use - public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE - = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE, -diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java -index 57256aa5a..16c199e37 100644 ---- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java -+++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java -@@ -1,5 +1,6 @@ - /* - * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -112,7 +113,7 @@ public class CheckForProperDetailStackTrace { - // It's ok for ARM not to have symbols, because it does not support NMT detail - // when targeting thumb2. It's also ok for Windows not to have symbols, because - // they are only available if the symbols file is included with the build. -- if (Platform.isWindows() || Platform.isARM()) { -+ if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) { - return; // we are done - } - output.reportDiagnosticSummary(); -diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java -index 127bb6abc..46be4dc98 100644 ---- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java -+++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java -@@ -1,5 +1,6 @@ + #ifdef PTRACE_GETREGS_REQ + if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { +- print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid, +- errno, strerror(errno)); ++ print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid); + return false; + } + return true; +diff --git a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m +index e46370a1f18..18b8b4282fe 100644 +--- a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m ++++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m +@@ -685,7 +685,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo + JNIEnv *env, jobject this_obj, + jlong thread_id) + { +- print_debug("getThreadIntegerRegisterSet0 called\n"); ++ print_debug("getThreadRegisterSet0 called\n"); + + struct ps_prochandle* ph = get_proc_handle(env, this_obj); + if (ph != NULL && ph->core != NULL) { +@@ -705,13 +705,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo + result = thread_get_state(tid, HSDB_THREAD_STATE, (thread_state_t)&state, &count); + + if (result != KERN_SUCCESS) { +- // This is not considered fatal. Unlike on Linux and Windows, we haven't seen a +- // failure to get thread registers, but if it were to fail the response should +- // be the same. By ignoring this error and returning NULL, stacking walking code +- // will get null registers and fallback to using the "last java frame" if setup. +- fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: thread_get_state failed (%d) for thread (%d)\n", +- result, tid); +- fflush(stdout); ++ print_error("getregs: thread_get_state(%d) failed (%d)\n", tid, result); + return NULL; + } + +@@ -814,25 +808,25 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo + */ + JNIEXPORT jint JNICALL + Java_sun_jvm_hotspot_debugger_macosx_MacOSXDebuggerLocal_translateTID0( +- JNIEnv *env, jobject this_obj, jint tid) ++ JNIEnv *env, jobject this_obj, jint tid) + { + print_debug("translateTID0 called on tid = 0x%x\n", (int)tid); + + kern_return_t result; + thread_t foreign_tid, usable_tid; + mach_msg_type_name_t type; +- ++ + foreign_tid = tid; +- ++ + task_t gTask = getTask(env, this_obj); +- result = mach_port_extract_right(gTask, foreign_tid, +- MACH_MSG_TYPE_COPY_SEND, ++ result = mach_port_extract_right(gTask, foreign_tid, ++ MACH_MSG_TYPE_COPY_SEND, + &usable_tid, &type); + if (result != KERN_SUCCESS) + return -1; +- ++ + print_debug("translateTID0: 0x%x -> 0x%x\n", foreign_tid, usable_tid); +- ++ + return (jint) usable_tid; + } + +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java +index d0557a7d254..655b450c3fc 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java +@@ -166,7 +166,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException + } catch (InterruptedException x) {} + } + if (lastException != null) { +- throw new DebuggerException(lastException.getMessage(), lastException); ++ throw new DebuggerException(lastException); + } else { + return task; + } +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java +index c52d3a51d54..0d637f30f14 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -239,7 +240,7 @@ public class ReservedStackTest { - return Platform.isAix() || - (Platform.isLinux() && - (Platform.isPPC() || Platform.isS390x() || Platform.isX64() || -- Platform.isX86())) || -+ Platform.isX86() || Platform.isRISCV64())) || - Platform.isOSX() || - Platform.isSolaris(); +@@ -67,12 +67,8 @@ public String toString() { + public ThreadContext getContext() throws IllegalThreadStateException { + long[] data = debugger.getThreadIntegerRegisterSet(unique_thread_id); + ThreadContext context = BsdThreadContextFactory.createThreadContext(debugger); +- // null means we failed to get the register set for some reason. The caller +- // is responsible for dealing with the set of null registers in that case. +- if (data != null) { +- for (int i = 0; i < data.length; i++) { +- context.setRegister(i, data[i]); +- } ++ for (int i = 0; i < data.length; i++) { ++ context.setRegister(i, data[i]); + } + return context; } -diff --git a/test/hotspot/jtreg/test_env.sh b/test/hotspot/jtreg/test_env.sh -index 0c300d4fd..7f3698c47 100644 ---- a/test/hotspot/jtreg/test_env.sh -+++ b/test/hotspot/jtreg/test_env.sh -@@ -185,6 +185,11 @@ if [ $? = 0 ] - then - VM_CPU="arm" - fi -+grep "riscv64" vm_version.out > ${NULL} -+if [ $? = 0 ] -+then -+ VM_CPU="riscv64" -+fi - grep "ppc" vm_version.out > ${NULL} - if [ $? = 0 ] - then -diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java -index 77458554b..73e92855d 100644 ---- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java -+++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java -@@ -1,5 +1,6 @@ - /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -45,7 +46,7 @@ import java.util.Set; - */ - public class TestMutuallyExclusivePlatformPredicates { - private static enum MethodGroup { -- ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), -+ ARCH("isRISCV64", "isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), - BITNESS("is32bit", "is64bit"), - OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"), - VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"), -diff --git a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java -index cb3348a0f..bc0d1a743 100644 ---- a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java -+++ b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java -@@ -63,13 +63,13 @@ public class thrinfo001 { - try { - t_a.join(); - } catch (InterruptedException e) {} -+ checkInfo(t_a, t_a.getThreadGroup(), 1); - - thrinfo001b t_b = new thrinfo001b(); - t_b.setPriority(Thread.MIN_PRIORITY); - t_b.setDaemon(true); - checkInfo(t_b, t_b.getThreadGroup(), 2); - t_b.start(); -- checkInfo(t_b, t_b.getThreadGroup(), 2); - try { - t_b.join(); - } catch (InterruptedException e) {} -diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java -index 7990c49a1..bb8c79cdd 100644 ---- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java -+++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java -@@ -1,5 +1,6 @@ +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java +index 6a0648f508a..cb6712b58ee 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java +@@ -173,7 +173,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException + } catch (InterruptedException x) {} + } + if (lastException != null) { +- throw new DebuggerException(lastException.getMessage(), lastException); ++ throw new DebuggerException(lastException); + } else { + return task; + } +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java +index 3fe795d34bc..52307b9cdcf 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -54,8 +55,8 @@ public class TestCPUInformation { - Events.assertField(event, "hwThreads").atLeast(1); - Events.assertField(event, "cores").atLeast(1); - Events.assertField(event, "sockets").atLeast(1); -- Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390"); -- Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390"); -+ Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "RISCV64"); -+ Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "RISCV64"); +@@ -73,12 +73,8 @@ public String toString() { + public ThreadContext getContext() throws IllegalThreadStateException { + long[] data = debugger.getThreadIntegerRegisterSet(lwp_id); + ThreadContext context = LinuxThreadContextFactory.createThreadContext(debugger); +- // null means we failed to get the register set for some reason. The caller +- // is responsible for dealing with the set of null registers in that case. +- if (data != null) { +- for (int i = 0; i < data.length; i++) { +- context.setRegister(i, data[i]); +- } ++ for (int i = 0; i < data.length; i++) { ++ context.setRegister(i, data[i]); } + return context; } - } -diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java -index f4ee0546c..a9cd63db9 100644 ---- a/test/lib/jdk/test/lib/Platform.java -+++ b/test/lib/jdk/test/lib/Platform.java -@@ -1,5 +1,6 @@ +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java +index 377650a0a1c..ec5aea35e8c 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -202,6 +203,10 @@ public class Platform { - return isArch("arm.*"); - } +@@ -30,9 +30,9 @@ -+ public static boolean isRISCV64() { -+ return isArch("riscv64"); -+ } -+ - public static boolean isPPC() { - return isArch("ppc.*"); + class WindbgAMD64Thread implements ThreadProxy { + private WindbgDebugger debugger; +- private long sysId; // SystemID for Windows thread, stored in OSThread::_thread_id ++ private long sysId; + private boolean gotID; +- private long id; // ThreadID for Windows thread, returned by GetThreadIdBySystemId ++ private long id; + + // The address argument must be the address of the OSThread::_thread_id + WindbgAMD64Thread(WindbgDebugger debugger, Address addr) { +@@ -50,12 +50,8 @@ class WindbgAMD64Thread implements ThreadProxy { + public ThreadContext getContext() throws IllegalThreadStateException { + long[] data = debugger.getThreadIntegerRegisterSet(getThreadID()); + WindbgAMD64ThreadContext context = new WindbgAMD64ThreadContext(debugger); +- // null means we failed to get the register set for some reason. The caller +- // is responsible for dealing with the set of null registers in that case. +- if (data != null) { +- for (int i = 0; i < data.length; i++) { +- context.setRegister(i, data[i]); +- } ++ for (int i = 0; i < data.length; i++) { ++ context.setRegister(i, data[i]); } -diff --git a/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java b/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java -new file mode 100644 -index 000000000..6852c0540 ---- /dev/null -+++ b/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java -@@ -0,0 +1,221 @@ -+/* -+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ */ -+package org.openjdk.bench.java.lang; -+ -+import java.util.Random; -+import org.openjdk.jmh.annotations.Benchmark; -+import org.openjdk.jmh.annotations.BenchmarkMode; -+import org.openjdk.jmh.annotations.OutputTimeUnit; -+import org.openjdk.jmh.annotations.Mode; -+import org.openjdk.jmh.annotations.Scope; -+import org.openjdk.jmh.annotations.State; -+ -+import java.util.concurrent.TimeUnit; -+ -+/** -+ * This benchmark can be used to measure performance between StringLatin1 and StringUTF16 in terms of -+ * performance of the indexOf(char) and indexOf(String) methods which are intrinsified. -+ * On x86 the behaviour of the indexOf method is contingent upon the length of the string -+ */ -+@BenchmarkMode(Mode.AverageTime) -+@OutputTimeUnit(TimeUnit.NANOSECONDS) -+@State(Scope.Thread) -+public class IndexOfBenchmark { -+ private static final int loops = 100000; -+ private static final Random rng = new Random(1999); -+ private static final int pathCnt = 1000; -+ private static final String [] latn1_short = new String[pathCnt]; -+ private static final String [] latn1_sse4 = new String[pathCnt]; -+ private static final String [] latn1_avx2 = new String[pathCnt]; -+ private static final String [] latn1_mixedLength = new String[pathCnt]; -+ private static final String [] utf16_short = new String[pathCnt]; -+ private static final String [] utf16_sse4 = new String[pathCnt]; -+ private static final String [] utf16_avx2 = new String[pathCnt]; -+ private static final String [] utf16_mixedLength = new String[pathCnt]; -+ static { -+ for (int i = 0; i < pathCnt; i++) { -+ latn1_short[i] = makeRndString(false, 15); -+ latn1_sse4[i] = makeRndString(false, 16); -+ latn1_avx2[i] = makeRndString(false, 32); -+ utf16_short[i] = makeRndString(true, 7); -+ utf16_sse4[i] = makeRndString(true, 8); -+ utf16_avx2[i] = makeRndString(true, 16); -+ latn1_mixedLength[i] = makeRndString(false, rng.nextInt(65)); -+ utf16_mixedLength[i] = makeRndString(true, rng.nextInt(65)); -+ } -+ } -+ -+ private static String makeRndString(boolean isUtf16, int length) { -+ StringBuilder sb = new StringBuilder(length); -+ if(length > 0){ -+ sb.append(isUtf16?'☺':'b'); -+ -+ for (int i = 1; i < length-1; i++) { -+ sb.append((char)('b' + rng.nextInt(26))); -+ } -+ -+ sb.append(rng.nextInt(3) >= 1?'a':'b');//66.6% of time 'a' is in string -+ } -+ return sb.toString(); -+ } -+ -+ -+ @Benchmark -+ public static void latin1_mixed_char() { -+ int ret = 0; -+ for (String what : latn1_mixedLength) { -+ ret += what.indexOf('a'); -+ } -+ } -+ -+ @Benchmark -+ public static void utf16_mixed_char() { -+ int ret = 0; -+ for (String what : utf16_mixedLength) { -+ ret += what.indexOf('a'); -+ } -+ } -+ -+ @Benchmark -+ public static void latin1_mixed_String() { -+ int ret = 0; -+ for (String what : latn1_mixedLength) { -+ ret += what.indexOf("a"); -+ } -+ } -+ -+ @Benchmark -+ public static void utf16_mixed_String() { -+ int ret = 0; -+ for (String what : utf16_mixedLength) { -+ ret += what.indexOf("a"); -+ } -+ } -+ -+ ////////// more detailed code path dependent tests ////////// -+ -+ @Benchmark -+ public static void latin1_Short_char() { -+ int ret = 0; -+ for (String what : latn1_short) { -+ ret += what.indexOf('a'); -+ } -+ } -+ -+ @Benchmark -+ public static void latin1_SSE4_char() { -+ int ret = 0; -+ for (String what : latn1_sse4) { -+ ret += what.indexOf('a'); -+ } -+ } -+ -+ @Benchmark -+ public static void latin1_AVX2_char() { -+ int ret = 0; -+ for (String what : latn1_avx2) { -+ ret += what.indexOf('a'); -+ } -+ } -+ -+ @Benchmark -+ public static int utf16_Short_char() { -+ int ret = 0; -+ for (String what : utf16_short) { -+ ret += what.indexOf('a'); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int utf16_SSE4_char() { -+ int ret = 0; -+ for (String what : utf16_sse4) { -+ ret += what.indexOf('a'); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int utf16_AVX2_char() { -+ int ret = 0; -+ for (String what : utf16_avx2) { -+ ret += what.indexOf('a'); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int latin1_Short_String() { -+ int ret = 0; -+ for (String what : latn1_short) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int latin1_SSE4_String() { -+ int ret = 0; -+ for (String what : latn1_sse4) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int latin1_AVX2_String() { -+ int ret = 0; -+ for (String what : latn1_avx2) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int utf16_Short_String() { -+ int ret = 0; -+ for (String what : utf16_short) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int utf16_SSE4_String() { -+ int ret = 0; -+ for (String what : utf16_sse4) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int utf16_AVX2_String() { -+ int ret = 0; -+ for (String what : utf16_avx2) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+} --- -2.40.0.windows.1 + return context; + } +@@ -90,7 +86,6 @@ public String toString() { + private long getThreadID() { + if (!gotID) { + id = debugger.getThreadIdFromSysId(sysId); +- gotID = true; + } + + return id; +diff --git a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp +index e3b218b4dae..314cf69c957 100644 +--- a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp ++++ b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp +@@ -45,7 +45,6 @@ + + #include + #include +-#include + + #define DEBUG_NO_IMPLEMENTATION + #include +@@ -766,16 +765,9 @@ JNIEXPORT jlong JNICALL Java_sun_jvm_hotspot_debugger_windbg_WindbgDebuggerLocal + CHECK_EXCEPTION_(0); + + ULONG id = 0; +- HRESULT hr = ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id); +- if (hr != S_OK) { +- // This is not considered fatal and does happen on occassion, usually with an +- // 0x80004002 "No such interface supported". The root cause is not fully understood, +- // but by ignoring this error and returning NULL, stacking walking code will get +- // null registers and fallback to using the "last java frame" if setup. +- printf("WARNING: GetThreadIdBySystemId failed with 0x%x for sysId (%" PRIu64 ")\n", +- hr, sysId); +- return -1; +- } ++ COM_VERIFY_OK_(ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id), ++ "Windbg Error: GetThreadIdBySystemId failed!", 0); ++ + return (jlong) id; + } + +From 4b01e13731fc330ca3d57a5cd532c91bc66579c8 Mon Sep 17 00:00:00 2001 +From: Kuai Wei +Date: Wed, 31 Jan 2024 17:26:31 +0800 +Subject: [PATCH 140/140] Remove unused zSyscall_linux_riscv.hpp + +--- + .../linux_riscv/gc/z/zSyscall_linux_riscv.hpp | 42 ------------------- + 1 file changed, 42 deletions(-) + delete mode 100644 src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp + +diff --git a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp +deleted file mode 100644 +index 1aa58f27871..00000000000 +--- a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp ++++ /dev/null +@@ -1,42 +0,0 @@ +-/* +- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP +-#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP +- +-#include +- +-// +-// Support for building on older Linux systems +-// +- +-#ifndef SYS_memfd_create +-#define SYS_memfd_create 279 +-#endif +-#ifndef SYS_fallocate +-#define SYS_fallocate 47 +-#endif +- +-#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP diff --git a/LoongArch64-support.patch b/LoongArch64-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..029b5085ced97aa3f49b7f4c9f71956193358025 --- /dev/null +++ b/LoongArch64-support.patch @@ -0,0 +1,116372 @@ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4 +--- a/make/autoconf/hotspot.m4 2024-01-10 05:19:49.000000000 +0800 ++++ b/make/autoconf/hotspot.m4 2024-01-30 10:00:11.621434355 +0800 +@@ -34,6 +34,12 @@ + # All valid JVM variants + VALID_JVM_VARIANTS="server client minimal core zero custom" + ++# ++# This file has been modified by Loongson Technology in 2021. These ++# modifications are Copyright (c) 2020, 2021, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + ############################################################################### + # Check if the specified JVM variant should be built. To be used in shell if + # constructs, like this: +@@ -340,6 +346,26 @@ + HOTSPOT_TARGET_CPU_ARCH=arm + fi + ++ # Override hotspot cpu definitions for MIPS and LOONGARCH platforms ++ if test "x$OPENJDK_TARGET_CPU" = xmips64el && test "x$HOTSPOT_TARGET_CPU" != xzero; then ++ HOTSPOT_TARGET_CPU=mips_64 ++ HOTSPOT_TARGET_CPU_ARCH=mips ++ elif test "x$OPENJDK_TARGET_CPU" = xloongarch64 && test "x$HOTSPOT_TARGET_CPU" != xzero; then ++ HOTSPOT_TARGET_CPU=loongarch_64 ++ HOTSPOT_TARGET_CPU_ARCH=loongarch ++ fi ++ ++ # Disable compiler1 on linux-mips and linux-loongarch ++ if ! (HOTSPOT_CHECK_JVM_FEATURE(compiler1)); then ++ AC_MSG_CHECKING([if compiler1 should be built, $JVM_FEATURES]) ++ if test "x$OPENJDK_TARGET_OS" = "xlinux" && test "x$HOTSPOT_TARGET_CPU_ARCH" = "xmips"; then ++ DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES compiler1" ++ AC_MSG_RESULT([no, platform not supported]) ++ else ++ AC_MSG_RESULT([yes]) ++ fi ++ fi ++ + # Verify that dependencies are met for explicitly set features. + if HOTSPOT_CHECK_JVM_FEATURE(jvmti) && ! HOTSPOT_CHECK_JVM_FEATURE(services); then + AC_MSG_ERROR([Specified JVM feature 'jvmti' requires feature 'services']) +@@ -424,10 +450,11 @@ + JVM_FEATURES_jvmci="" + INCLUDE_JVMCI="false" + else +- # Only enable jvmci on x86_64, sparcv9 and aarch64 ++ # Only enable jvmci on x86_64, sparcv9, aarch64 and loongarch64 + if test "x$OPENJDK_TARGET_CPU" = "xx86_64" || \ + test "x$OPENJDK_TARGET_CPU" = "xsparcv9" || \ +- test "x$OPENJDK_TARGET_CPU" = "xaarch64" ; then ++ test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \ ++ test "x$OPENJDK_TARGET_CPU" = "xloongarch64" ; then + AC_MSG_RESULT([yes]) + JVM_FEATURES_jvmci="jvmci" + INCLUDE_JVMCI="true" +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/make/autoconf/platform.m4 b/make/autoconf/platform.m4 +--- a/make/autoconf/platform.m4 2024-01-10 05:19:49.000000000 +0800 ++++ b/make/autoconf/platform.m4 2024-01-30 10:00:11.621434355 +0800 +@@ -23,6 +23,12 @@ + # questions. + # + ++# ++# This file has been modified by Loongson Technology in 2021. These ++# modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD. + # Converts autoconf style CPU name to OpenJDK style, into + # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN. +@@ -554,6 +560,12 @@ + HOTSPOT_$1_CPU_DEFINE=PPC64 + elif test "x$OPENJDK_$1_CPU" = xppc64le; then + HOTSPOT_$1_CPU_DEFINE=PPC64 ++ elif test "x$OPENJDK_$1_CPU" = xmips64; then ++ HOTSPOT_$1_CPU_DEFINE=MIPS64 ++ elif test "x$OPENJDK_$1_CPU" = xmips64el; then ++ HOTSPOT_$1_CPU_DEFINE=MIPS64 ++ elif test "x$OPENJDK_$1_CPU" = xloongarch64; then ++ HOTSPOT_$1_CPU_DEFINE=LOONGARCH64 + + # The cpu defines below are for zero, we don't support them directly. + elif test "x$OPENJDK_$1_CPU" = xsparc; then +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/make/CompileJavaModules.gmk b/make/CompileJavaModules.gmk +--- a/make/CompileJavaModules.gmk 2024-01-10 05:19:49.000000000 +0800 ++++ b/make/CompileJavaModules.gmk 2024-01-30 10:00:11.614767768 +0800 +@@ -430,6 +430,7 @@ + + jdk.internal.vm.compiler_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.aarch64=jdk.internal.vm.compiler \ ++ --add-exports jdk.internal.vm.ci/jdk.vm.ci.loongarch64=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.amd64=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.code=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.code.site=jdk.internal.vm.compiler \ +@@ -437,6 +438,7 @@ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.common=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.aarch64=jdk.internal.vm.compiler \ ++ --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.loongarch64=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.amd64=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.sparc=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.meta=jdk.internal.vm.compiler \ +@@ -456,6 +458,7 @@ + org.graalvm.compiler.api.directives.test \ + org.graalvm.compiler.api.test \ + org.graalvm.compiler.asm.aarch64.test \ ++ org.graalvm.compiler.asm.loongarch64.test \ + org.graalvm.compiler.asm.amd64.test \ + org.graalvm.compiler.asm.sparc.test \ + org.graalvm.compiler.asm.test \ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp +--- a/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp 2024-01-30 10:00:11.801432207 +0800 +@@ -52,3 +52,24 @@ + "wrong type for addresses"); + } + #endif // PRODUCT ++ ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp 2024-01-30 10:00:11.801432207 +0800 +@@ -1123,7 +1123,9 @@ + } + } + +- ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); +@@ -1663,6 +1665,10 @@ + __ csel(result->as_register(), opr1->as_register(), opr2->as_register(), acond); + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} ++ + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); + +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp +--- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp 2024-01-30 10:00:11.801432207 +0800 +@@ -260,18 +260,29 @@ + __ store(reg, addr); + } + +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + LIR_Opr reg = new_register(T_INT); + __ load(generate_address(base, disp, T_INT), reg, info); +- __ cmp(condition, reg, LIR_OprFact::intConst(c)); ++ __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt); + } + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); ++ ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + LIR_Opr reg1 = new_register(T_INT); + __ load(generate_address(base, disp, type), reg1, info); +- __ cmp(condition, reg, reg1); ++ __ cmp_branch(condition, reg, reg1, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/arm/c1_LIR_arm.cpp b/src/hotspot/cpu/arm/c1_LIR_arm.cpp +--- a/src/hotspot/cpu/arm/c1_LIR_arm.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/arm/c1_LIR_arm.cpp 2024-01-30 10:00:11.821431969 +0800 +@@ -84,3 +84,24 @@ + #endif // AARCH64 + } + #endif // PRODUCT ++ ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp 2024-01-30 10:00:11.818098676 +0800 +@@ -1150,6 +1150,9 @@ + __ b(*(op->label()), acond); + } + ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); +@@ -3082,6 +3085,10 @@ + __ bind(*stub->continuation()); + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} ++ + #ifdef ASSERT + // emit run-time assertion + void LIR_Assembler::emit_assert(LIR_OpAssert* op) { +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp +--- a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp 2024-01-30 10:00:11.818098676 +0800 +@@ -423,18 +423,27 @@ + __ move(temp, addr); + } + +- +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + __ load(new LIR_Address(base, disp, T_INT), FrameMap::LR_opr, info); +- __ cmp(condition, FrameMap::LR_opr, c); ++ __ cmp_branch(condition, FrameMap::LR_opr, c, T_INT, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + __ load(new LIR_Address(base, disp, type), FrameMap::LR_opr, info); +- __ cmp(condition, reg, FrameMap::LR_opr); ++ __ cmp_branch(condition, reg, FrameMap::LR_opr, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + assert(left != result, "should be different registers"); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp b/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp 2024-01-30 10:00:11.834765144 +0800 +@@ -0,0 +1,132 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "ci/ciMethod.hpp" ++#include "interpreter/interpreter.hpp" ++#include "runtime/frame.inline.hpp" ++ ++// asm based interpreter deoptimization helpers ++int AbstractInterpreter::size_activation(int max_stack, ++ int temps, ++ int extra_args, ++ int monitors, ++ int callee_params, ++ int callee_locals, ++ bool is_top_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ ++ // fixed size of an interpreter frame: ++ int overhead = frame::java_frame_sender_sp_offset - ++ frame::interpreter_frame_initial_sp_offset; ++ // Our locals were accounted for by the caller (or last_frame_adjust ++ // on the transistion) Since the callee parameters already account ++ // for the callee's params we only need to account for the extra ++ // locals. ++ int size = overhead + ++ (callee_locals - callee_params)*Interpreter::stackElementWords + ++ monitors * frame::interpreter_frame_monitor_size() + ++ temps* Interpreter::stackElementWords + extra_args; ++ ++ return size; ++} ++ ++// How much stack a method activation needs in words. ++int AbstractInterpreter::size_top_interpreter_activation(Method* method) { ++ ++ const int entry_size = frame::interpreter_frame_monitor_size(); ++ ++ // total overhead size: entry_size + (saved ebp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size; ++ ++ const int stub_code = 6; // see generate_call_stub ++ // return overhead_size + method->max_locals() + method->max_stack() + stub_code; ++ const int method_stack = (method->max_locals() + method->max_stack()) * ++ Interpreter::stackElementWords; ++ return overhead_size + method_stack + stub_code; ++} ++ ++void AbstractInterpreter::layout_activation(Method* method, ++ int tempcount, ++ int popframe_extra_args, ++ int moncount, ++ int caller_actual_parameters, ++ int callee_param_count, ++ int callee_locals, ++ frame* caller, ++ frame* interpreter_frame, ++ bool is_top_frame, ++ bool is_bottom_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ // If interpreter_frame!=NULL, set up the method, locals, and monitors. ++ // The frame interpreter_frame, if not NULL, is guaranteed to be the ++ // right size, as determined by a previous call to this method. ++ // It is also guaranteed to be walkable even though it is in a skeletal state ++ ++ // fixed size of an interpreter frame: ++ ++ int max_locals = method->max_locals() * Interpreter::stackElementWords; ++ int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords; ++ ++#ifdef ASSERT ++ assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); ++#endif ++ ++ interpreter_frame->interpreter_frame_set_method(method); ++ // NOTE the difference in using sender_sp and interpreter_frame_sender_sp ++ // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) ++ // and sender_sp is fp+8 ++ intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; ++ ++#ifdef ASSERT ++ if (caller->is_interpreted_frame()) { ++ assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); ++ } ++#endif ++ ++ interpreter_frame->interpreter_frame_set_locals(locals); ++ BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); ++ BasicObjectLock* monbot = montop - moncount; ++ interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount); ++ ++ //set last sp; ++ intptr_t* esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords - ++ popframe_extra_args; ++ interpreter_frame->interpreter_frame_set_last_sp(esp); ++ // All frames but the initial interpreter frame we fill in have a ++ // value for sender_sp that allows walking the stack but isn't ++ // truly correct. Correct the value here. ++ // ++ if (extra_locals != 0 && ++ interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { ++ interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); ++ } ++ *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); ++ *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror(); ++} ++ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/assembler_loongarch.cpp b/src/hotspot/cpu/loongarch/assembler_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/assembler_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.cpp 2024-01-30 10:00:11.834765144 +0800 +@@ -0,0 +1,849 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) block_comment(str) ++#define STOP(error) block_comment(error); stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++// Implementation of AddressLiteral ++ ++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { ++ _is_lval = false; ++ _target = target; ++ _rspec = rspec_from_rtype(rtype, target); ++} ++ ++// Implementation of Address ++ ++Address Address::make_array(ArrayAddress adr) { ++ AddressLiteral base = adr.base(); ++ Address index = adr.index(); ++ assert(index._disp == 0, "must not have disp"); // maybe it can? ++ Address array(index._base, index._index, index._scale, (intptr_t) base.target()); ++ array._rspec = base._rspec; ++ return array; ++} ++ ++// exceedingly dangerous constructor ++Address::Address(address loc, RelocationHolder spec) { ++ _base = noreg; ++ _index = noreg; ++ _scale = no_scale; ++ _disp = (intptr_t) loc; ++ _rspec = spec; ++} ++ ++ ++int Assembler::is_int_mask(int x) { ++ int xx = x; ++ int count = 0; ++ ++ while (x != 0) { ++ x &= (x - 1); ++ count++; ++ } ++ ++ if ((1<> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_b(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_b(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_b(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_bu(Register rd, Address src) { ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_bu(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_bu(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_bu(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_bu(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_bu(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_bu(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_d(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_d(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_d(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_d(dst, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ ldptr_d(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_d(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_d(dst, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ ldptr_d(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_d(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_h(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_h(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_h(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_h(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_h(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_h(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_h(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_hu(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_hu(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_hu(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_hu(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_hu(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_hu(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_hu(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ll_w(Register rd, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ll_w(rd, src.base(), src.disp()); ++} ++ ++void Assembler::ll_d(Register rd, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ll_d(rd, src.base(), src.disp()); ++} ++ ++void Assembler::ld_w(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_w(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_w(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_w(dst, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ ldptr_w(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_w(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_w(dst, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ ldptr_w(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_w(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_wu(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_wu(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_wu(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_wu(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_wu(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_wu(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_wu(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::st_b(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_b(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_b(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_b(src, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_b(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_b(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_b(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::sc_w(Register rd, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sc_w(rd, dst.base(), dst.disp()); ++} ++ ++void Assembler::sc_d(Register rd, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sc_d(rd, dst.base(), dst.disp()); ++} ++ ++void Assembler::st_d(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_d(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_d(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_d(src, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ stptr_d(src, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_d(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_d(src, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ stptr_d(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_d(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::st_h(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_h(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_h(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_h(src, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_h(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_h(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_h(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::st_w(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_w(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_w(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_w(src, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ stptr_w(src, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_w(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_w(src, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ stptr_w(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_w(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::fld_s(FloatRegister fd, Address src) { ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fldx_s(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fld_s(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fld_s(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fldx_s(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fld_s(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fldx_s(fd, base, AT); ++ } ++ } ++} ++ ++void Assembler::fld_d(FloatRegister fd, Address src) { ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fldx_d(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fld_d(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fld_d(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fldx_d(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fld_d(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fldx_d(fd, base, AT); ++ } ++ } ++} ++ ++void Assembler::fst_s(FloatRegister fd, Address dst) { ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fstx_s(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fst_s(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fst_s(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fstx_s(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fst_s(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fstx_s(fd, base, AT); ++ } ++ } ++} ++ ++void Assembler::fst_d(FloatRegister fd, Address dst) { ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fstx_d(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fst_d(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fst_d(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fstx_d(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fst_d(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fstx_d(fd, base, AT); ++ } ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/assembler_loongarch.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/assembler_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp 2024-01-30 10:00:11.834765144 +0800 +@@ -0,0 +1,2827 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/register.hpp" ++#include "runtime/vm_version.hpp" ++ ++class BiasedLockingCounters; ++ ++ ++// Note: A register location is represented via a Register, not ++// via an address for efficiency & simplicity reasons. ++ ++class ArrayAddress; ++ ++class Address { ++ public: ++ enum ScaleFactor { ++ no_scale = -1, ++ times_1 = 0, ++ times_2 = 1, ++ times_4 = 2, ++ times_8 = 3, ++ times_ptr = times_8 ++ }; ++ static ScaleFactor times(int size) { ++ assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); ++ if (size == 8) return times_8; ++ if (size == 4) return times_4; ++ if (size == 2) return times_2; ++ return times_1; ++ } ++ ++ private: ++ Register _base; ++ Register _index; ++ ScaleFactor _scale; ++ int _disp; ++ RelocationHolder _rspec; ++ ++ // Easily misused constructors make them private ++ Address(address loc, RelocationHolder spec); ++ Address(int disp, address loc, relocInfo::relocType rtype); ++ Address(int disp, address loc, RelocationHolder spec); ++ ++ public: ++ ++ // creation ++ Address() ++ : _base(noreg), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(0) { ++ } ++ ++ // No default displacement otherwise Register can be implicitly ++ // converted to 0(Register) which is quite a different animal. ++ ++ Address(Register base, int disp = 0) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(disp) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, int disp = 0) ++ : _base (base), ++ _index(index), ++ _scale(scale), ++ _disp (disp) { ++ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); ++ assert_different_registers(_base, _index, AT); ++ } ++ ++ // The following two overloads are used in connection with the ++ // ByteSize type (see sizes.hpp). They simplify the use of ++ // ByteSize'd arguments in assembly code. Note that their equivalent ++ // for the optimized build are the member functions with int disp ++ // argument since ByteSize is mapped to an int type in that case. ++ // ++ // Note: DO NOT introduce similar overloaded functions for WordSize ++ // arguments as in the optimized mode, both ByteSize and WordSize ++ // are mapped to the same type and thus the compiler cannot make a ++ // distinction anymore (=> compiler errors). ++ ++#ifdef ASSERT ++ Address(Register base, ByteSize disp) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(in_bytes(disp)) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, ByteSize disp) ++ : _base(base), ++ _index(index), ++ _scale(scale), ++ _disp(in_bytes(disp)) { ++ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); ++ assert_different_registers(_base, _index, AT); ++ } ++#endif // ASSERT ++ ++ // accessors ++ bool uses(Register reg) const { return _base == reg || _index == reg; } ++ Register base() const { return _base; } ++ Register index() const { return _index; } ++ ScaleFactor scale() const { return _scale; } ++ int disp() const { return _disp; } ++ ++ static Address make_array(ArrayAddress); ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class LIR_Assembler; // base/index/scale/disp ++}; ++ ++// Calling convention ++class Argument { ++ public: ++ enum { ++ n_register_parameters = 8, // 8 integer registers used to pass parameters ++ n_float_register_parameters = 8 // 8 float registers used to pass parameters ++ }; ++}; ++ ++// ++// AddressLiteral has been split out from Address because operands of this type ++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out ++// the few instructions that need to deal with address literals are unique and the ++// MacroAssembler does not have to implement every instruction in the Assembler ++// in order to search for address literals that may need special handling depending ++// on the instruction and the platform. As small step on the way to merging i486/amd64 ++// directories. ++// ++class AddressLiteral { ++ friend class ArrayAddress; ++ RelocationHolder _rspec; ++ // Typically we use AddressLiterals we want to use their rval ++ // However in some situations we want the lval (effect address) of the item. ++ // We provide a special factory for making those lvals. ++ bool _is_lval; ++ ++ // If the target is far we'll need to load the ea of this to ++ // a register to reach it. Otherwise if near we can do rip ++ // relative addressing. ++ ++ address _target; ++ ++ protected: ++ // creation ++ AddressLiteral() ++ : _is_lval(false), ++ _target(NULL) ++ {} ++ ++ public: ++ ++ ++ AddressLiteral(address target, relocInfo::relocType rtype); ++ ++ AddressLiteral(address target, RelocationHolder const& rspec) ++ : _rspec(rspec), ++ _is_lval(false), ++ _target(target) ++ {} ++ ++ AddressLiteral addr() { ++ AddressLiteral ret = *this; ++ ret._is_lval = true; ++ return ret; ++ } ++ ++ ++ private: ++ ++ address target() { return _target; } ++ bool is_lval() { return _is_lval; } ++ ++ relocInfo::relocType reloc() const { return _rspec.type(); } ++ const RelocationHolder& rspec() const { return _rspec; } ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class Address; ++ friend class LIR_Assembler; ++ RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { ++ switch (rtype) { ++ case relocInfo::external_word_type: ++ return external_word_Relocation::spec(addr); ++ case relocInfo::internal_word_type: ++ return internal_word_Relocation::spec(addr); ++ case relocInfo::opt_virtual_call_type: ++ return opt_virtual_call_Relocation::spec(); ++ case relocInfo::static_call_type: ++ return static_call_Relocation::spec(); ++ case relocInfo::runtime_call_type: ++ return runtime_call_Relocation::spec(); ++ case relocInfo::poll_type: ++ case relocInfo::poll_return_type: ++ return Relocation::spec_simple(rtype); ++ case relocInfo::none: ++ case relocInfo::oop_type: ++ // Oops are a special case. Normally they would be their own section ++ // but in cases like icBuffer they are literals in the code stream that ++ // we don't have a section for. We use none so that we get a literal address ++ // which is always patchable. ++ return RelocationHolder(); ++ default: ++ ShouldNotReachHere(); ++ return RelocationHolder(); ++ } ++ } ++ ++}; ++ ++// Convience classes ++class RuntimeAddress: public AddressLiteral { ++ ++ public: ++ ++ RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} ++ ++}; ++ ++class OopAddress: public AddressLiteral { ++ ++ public: ++ ++ OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){} ++ ++}; ++ ++class ExternalAddress: public AddressLiteral { ++ ++ public: ++ ++ ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){} ++ ++}; ++ ++class InternalAddress: public AddressLiteral { ++ ++ public: ++ ++ InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} ++ ++}; ++ ++// x86 can do array addressing as a single operation since disp can be an absolute ++// address amd64 can't. We create a class that expresses the concept but does extra ++// magic on amd64 to get the final result ++ ++class ArrayAddress { ++ private: ++ ++ AddressLiteral _base; ++ Address _index; ++ ++ public: ++ ++ ArrayAddress() {}; ++ ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; ++ AddressLiteral base() { return _base; } ++ Address index() { return _index; } ++ ++}; ++ ++// The LoongArch Assembler: Pure assembler doing NO optimizations on the instruction ++// level ; i.e., what you write is what you get. The Assembler is generating code into ++// a CodeBuffer. ++ ++class Assembler : public AbstractAssembler { ++ friend class AbstractAssembler; // for the non-virtual hack ++ friend class LIR_Assembler; // as_Address() ++ friend class StubGenerator; ++ ++ public: ++ // 22-bit opcode, highest 22 bits: bits[31...10] ++ enum ops22 { ++ clo_w_op = 0b0000000000000000000100, ++ clz_w_op = 0b0000000000000000000101, ++ cto_w_op = 0b0000000000000000000110, ++ ctz_w_op = 0b0000000000000000000111, ++ clo_d_op = 0b0000000000000000001000, ++ clz_d_op = 0b0000000000000000001001, ++ cto_d_op = 0b0000000000000000001010, ++ ctz_d_op = 0b0000000000000000001011, ++ revb_2h_op = 0b0000000000000000001100, ++ revb_4h_op = 0b0000000000000000001101, ++ revb_2w_op = 0b0000000000000000001110, ++ revb_d_op = 0b0000000000000000001111, ++ revh_2w_op = 0b0000000000000000010000, ++ revh_d_op = 0b0000000000000000010001, ++ bitrev_4b_op = 0b0000000000000000010010, ++ bitrev_8b_op = 0b0000000000000000010011, ++ bitrev_w_op = 0b0000000000000000010100, ++ bitrev_d_op = 0b0000000000000000010101, ++ ext_w_h_op = 0b0000000000000000010110, ++ ext_w_b_op = 0b0000000000000000010111, ++ rdtimel_w_op = 0b0000000000000000011000, ++ rdtimeh_w_op = 0b0000000000000000011001, ++ rdtime_d_op = 0b0000000000000000011010, ++ cpucfg_op = 0b0000000000000000011011, ++ fabs_s_op = 0b0000000100010100000001, ++ fabs_d_op = 0b0000000100010100000010, ++ fneg_s_op = 0b0000000100010100000101, ++ fneg_d_op = 0b0000000100010100000110, ++ flogb_s_op = 0b0000000100010100001001, ++ flogb_d_op = 0b0000000100010100001010, ++ fclass_s_op = 0b0000000100010100001101, ++ fclass_d_op = 0b0000000100010100001110, ++ fsqrt_s_op = 0b0000000100010100010001, ++ fsqrt_d_op = 0b0000000100010100010010, ++ frecip_s_op = 0b0000000100010100010101, ++ frecip_d_op = 0b0000000100010100010110, ++ frsqrt_s_op = 0b0000000100010100011001, ++ frsqrt_d_op = 0b0000000100010100011010, ++ fmov_s_op = 0b0000000100010100100101, ++ fmov_d_op = 0b0000000100010100100110, ++ movgr2fr_w_op = 0b0000000100010100101001, ++ movgr2fr_d_op = 0b0000000100010100101010, ++ movgr2frh_w_op = 0b0000000100010100101011, ++ movfr2gr_s_op = 0b0000000100010100101101, ++ movfr2gr_d_op = 0b0000000100010100101110, ++ movfrh2gr_s_op = 0b0000000100010100101111, ++ movgr2fcsr_op = 0b0000000100010100110000, ++ movfcsr2gr_op = 0b0000000100010100110010, ++ movfr2cf_op = 0b0000000100010100110100, ++ movcf2fr_op = 0b0000000100010100110101, ++ movgr2cf_op = 0b0000000100010100110110, ++ movcf2gr_op = 0b0000000100010100110111, ++ fcvt_s_d_op = 0b0000000100011001000110, ++ fcvt_d_s_op = 0b0000000100011001001001, ++ ftintrm_w_s_op = 0b0000000100011010000001, ++ ftintrm_w_d_op = 0b0000000100011010000010, ++ ftintrm_l_s_op = 0b0000000100011010001001, ++ ftintrm_l_d_op = 0b0000000100011010001010, ++ ftintrp_w_s_op = 0b0000000100011010010001, ++ ftintrp_w_d_op = 0b0000000100011010010010, ++ ftintrp_l_s_op = 0b0000000100011010011001, ++ ftintrp_l_d_op = 0b0000000100011010011010, ++ ftintrz_w_s_op = 0b0000000100011010100001, ++ ftintrz_w_d_op = 0b0000000100011010100010, ++ ftintrz_l_s_op = 0b0000000100011010101001, ++ ftintrz_l_d_op = 0b0000000100011010101010, ++ ftintrne_w_s_op = 0b0000000100011010110001, ++ ftintrne_w_d_op = 0b0000000100011010110010, ++ ftintrne_l_s_op = 0b0000000100011010111001, ++ ftintrne_l_d_op = 0b0000000100011010111010, ++ ftint_w_s_op = 0b0000000100011011000001, ++ ftint_w_d_op = 0b0000000100011011000010, ++ ftint_l_s_op = 0b0000000100011011001001, ++ ftint_l_d_op = 0b0000000100011011001010, ++ ffint_s_w_op = 0b0000000100011101000100, ++ ffint_s_l_op = 0b0000000100011101000110, ++ ffint_d_w_op = 0b0000000100011101001000, ++ ffint_d_l_op = 0b0000000100011101001010, ++ frint_s_op = 0b0000000100011110010001, ++ frint_d_op = 0b0000000100011110010010, ++ iocsrrd_b_op = 0b0000011001001000000000, ++ iocsrrd_h_op = 0b0000011001001000000001, ++ iocsrrd_w_op = 0b0000011001001000000010, ++ iocsrrd_d_op = 0b0000011001001000000011, ++ iocsrwr_b_op = 0b0000011001001000000100, ++ iocsrwr_h_op = 0b0000011001001000000101, ++ iocsrwr_w_op = 0b0000011001001000000110, ++ iocsrwr_d_op = 0b0000011001001000000111, ++ vpcnt_b_op = 0b0111001010011100001000, ++ vpcnt_h_op = 0b0111001010011100001001, ++ vpcnt_w_op = 0b0111001010011100001010, ++ vpcnt_d_op = 0b0111001010011100001011, ++ vneg_b_op = 0b0111001010011100001100, ++ vneg_h_op = 0b0111001010011100001101, ++ vneg_w_op = 0b0111001010011100001110, ++ vneg_d_op = 0b0111001010011100001111, ++ vfclass_s_op = 0b0111001010011100110101, ++ vfclass_d_op = 0b0111001010011100110110, ++ vfsqrt_s_op = 0b0111001010011100111001, ++ vfsqrt_d_op = 0b0111001010011100111010, ++ vfrint_s_op = 0b0111001010011101001101, ++ vfrint_d_op = 0b0111001010011101001110, ++ vfrintrm_s_op = 0b0111001010011101010001, ++ vfrintrm_d_op = 0b0111001010011101010010, ++ vfrintrp_s_op = 0b0111001010011101010101, ++ vfrintrp_d_op = 0b0111001010011101010110, ++ vfrintrz_s_op = 0b0111001010011101011001, ++ vfrintrz_d_op = 0b0111001010011101011010, ++ vfrintrne_s_op = 0b0111001010011101011101, ++ vfrintrne_d_op = 0b0111001010011101011110, ++ vfcvtl_s_h_op = 0b0111001010011101111010, ++ vfcvth_s_h_op = 0b0111001010011101111011, ++ vfcvtl_d_s_op = 0b0111001010011101111100, ++ vfcvth_d_s_op = 0b0111001010011101111101, ++ vffint_s_w_op = 0b0111001010011110000000, ++ vffint_s_wu_op = 0b0111001010011110000001, ++ vffint_d_l_op = 0b0111001010011110000010, ++ vffint_d_lu_op = 0b0111001010011110000011, ++ vffintl_d_w_op = 0b0111001010011110000100, ++ vffinth_d_w_op = 0b0111001010011110000101, ++ vftint_w_s_op = 0b0111001010011110001100, ++ vftint_l_d_op = 0b0111001010011110001101, ++ vftintrm_w_s_op = 0b0111001010011110001110, ++ vftintrm_l_d_op = 0b0111001010011110001111, ++ vftintrp_w_s_op = 0b0111001010011110010000, ++ vftintrp_l_d_op = 0b0111001010011110010001, ++ vftintrz_w_s_op = 0b0111001010011110010010, ++ vftintrz_l_d_op = 0b0111001010011110010011, ++ vftintrne_w_s_op = 0b0111001010011110010100, ++ vftintrne_l_d_op = 0b0111001010011110010101, ++ vftint_wu_s = 0b0111001010011110010110, ++ vftint_lu_d = 0b0111001010011110010111, ++ vftintrz_wu_f = 0b0111001010011110011100, ++ vftintrz_lu_d = 0b0111001010011110011101, ++ vftintl_l_s_op = 0b0111001010011110100000, ++ vftinth_l_s_op = 0b0111001010011110100001, ++ vftintrml_l_s_op = 0b0111001010011110100010, ++ vftintrmh_l_s_op = 0b0111001010011110100011, ++ vftintrpl_l_s_op = 0b0111001010011110100100, ++ vftintrph_l_s_op = 0b0111001010011110100101, ++ vftintrzl_l_s_op = 0b0111001010011110100110, ++ vftintrzh_l_s_op = 0b0111001010011110100111, ++ vftintrnel_l_s_op = 0b0111001010011110101000, ++ vftintrneh_l_s_op = 0b0111001010011110101001, ++ vreplgr2vr_b_op = 0b0111001010011111000000, ++ vreplgr2vr_h_op = 0b0111001010011111000001, ++ vreplgr2vr_w_op = 0b0111001010011111000010, ++ vreplgr2vr_d_op = 0b0111001010011111000011, ++ xvpcnt_b_op = 0b0111011010011100001000, ++ xvpcnt_h_op = 0b0111011010011100001001, ++ xvpcnt_w_op = 0b0111011010011100001010, ++ xvpcnt_d_op = 0b0111011010011100001011, ++ xvneg_b_op = 0b0111011010011100001100, ++ xvneg_h_op = 0b0111011010011100001101, ++ xvneg_w_op = 0b0111011010011100001110, ++ xvneg_d_op = 0b0111011010011100001111, ++ xvfclass_s_op = 0b0111011010011100110101, ++ xvfclass_d_op = 0b0111011010011100110110, ++ xvfsqrt_s_op = 0b0111011010011100111001, ++ xvfsqrt_d_op = 0b0111011010011100111010, ++ xvfrint_s_op = 0b0111011010011101001101, ++ xvfrint_d_op = 0b0111011010011101001110, ++ xvfrintrm_s_op = 0b0111011010011101010001, ++ xvfrintrm_d_op = 0b0111011010011101010010, ++ xvfrintrp_s_op = 0b0111011010011101010101, ++ xvfrintrp_d_op = 0b0111011010011101010110, ++ xvfrintrz_s_op = 0b0111011010011101011001, ++ xvfrintrz_d_op = 0b0111011010011101011010, ++ xvfrintrne_s_op = 0b0111011010011101011101, ++ xvfrintrne_d_op = 0b0111011010011101011110, ++ xvfcvtl_s_h_op = 0b0111011010011101111010, ++ xvfcvth_s_h_op = 0b0111011010011101111011, ++ xvfcvtl_d_s_op = 0b0111011010011101111100, ++ xvfcvth_d_s_op = 0b0111011010011101111101, ++ xvffint_s_w_op = 0b0111011010011110000000, ++ xvffint_s_wu_op = 0b0111011010011110000001, ++ xvffint_d_l_op = 0b0111011010011110000010, ++ xvffint_d_lu_op = 0b0111011010011110000011, ++ xvffintl_d_w_op = 0b0111011010011110000100, ++ xvffinth_d_w_op = 0b0111011010011110000101, ++ xvftint_w_s_op = 0b0111011010011110001100, ++ xvftint_l_d_op = 0b0111011010011110001101, ++ xvftintrm_w_s_op = 0b0111011010011110001110, ++ xvftintrm_l_d_op = 0b0111011010011110001111, ++ xvftintrp_w_s_op = 0b0111011010011110010000, ++ xvftintrp_l_d_op = 0b0111011010011110010001, ++ xvftintrz_w_s_op = 0b0111011010011110010010, ++ xvftintrz_l_d_op = 0b0111011010011110010011, ++ xvftintrne_w_s_op = 0b0111011010011110010100, ++ xvftintrne_l_d_op = 0b0111011010011110010101, ++ xvftint_wu_s = 0b0111011010011110010110, ++ xvftint_lu_d = 0b0111011010011110010111, ++ xvftintrz_wu_f = 0b0111011010011110011100, ++ xvftintrz_lu_d = 0b0111011010011110011101, ++ xvftintl_l_s_op = 0b0111011010011110100000, ++ xvftinth_l_s_op = 0b0111011010011110100001, ++ xvftintrml_l_s_op = 0b0111011010011110100010, ++ xvftintrmh_l_s_op = 0b0111011010011110100011, ++ xvftintrpl_l_s_op = 0b0111011010011110100100, ++ xvftintrph_l_s_op = 0b0111011010011110100101, ++ xvftintrzl_l_s_op = 0b0111011010011110100110, ++ xvftintrzh_l_s_op = 0b0111011010011110100111, ++ xvftintrnel_l_s_op = 0b0111011010011110101000, ++ xvftintrneh_l_s_op = 0b0111011010011110101001, ++ xvreplgr2vr_b_op = 0b0111011010011111000000, ++ xvreplgr2vr_h_op = 0b0111011010011111000001, ++ xvreplgr2vr_w_op = 0b0111011010011111000010, ++ xvreplgr2vr_d_op = 0b0111011010011111000011, ++ vext2xv_h_b_op = 0b0111011010011111000100, ++ vext2xv_w_b_op = 0b0111011010011111000101, ++ vext2xv_d_b_op = 0b0111011010011111000110, ++ vext2xv_w_h_op = 0b0111011010011111000111, ++ vext2xv_d_h_op = 0b0111011010011111001000, ++ vext2xv_d_w_op = 0b0111011010011111001001, ++ vext2xv_hu_bu_op = 0b0111011010011111001010, ++ vext2xv_wu_bu_op = 0b0111011010011111001011, ++ vext2xv_du_bu_op = 0b0111011010011111001100, ++ vext2xv_wu_hu_op = 0b0111011010011111001101, ++ vext2xv_du_hu_op = 0b0111011010011111001110, ++ vext2xv_du_wu_op = 0b0111011010011111001111, ++ xvreplve0_b_op = 0b0111011100000111000000, ++ xvreplve0_h_op = 0b0111011100000111100000, ++ xvreplve0_w_op = 0b0111011100000111110000, ++ xvreplve0_d_op = 0b0111011100000111111000, ++ xvreplve0_q_op = 0b0111011100000111111100, ++ ++ unknow_ops22 = 0b1111111111111111111111 ++ }; ++ ++ // 21-bit opcode, highest 21 bits: bits[31...11] ++ enum ops21 { ++ vinsgr2vr_d_op = 0b011100101110101111110, ++ vpickve2gr_d_op = 0b011100101110111111110, ++ vpickve2gr_du_op = 0b011100101111001111110, ++ vreplvei_d_op = 0b011100101111011111110, ++ ++ unknow_ops21 = 0b111111111111111111111 ++ }; ++ ++ // 20-bit opcode, highest 20 bits: bits[31...12] ++ enum ops20 { ++ vinsgr2vr_w_op = 0b01110010111010111110, ++ vpickve2gr_w_op = 0b01110010111011111110, ++ vpickve2gr_wu_op = 0b01110010111100111110, ++ vreplvei_w_op = 0b01110010111101111110, ++ xvinsgr2vr_d_op = 0b01110110111010111110, ++ xvpickve2gr_d_op = 0b01110110111011111110, ++ xvpickve2gr_du_op = 0b01110110111100111110, ++ xvinsve0_d_op = 0b01110110111111111110, ++ xvpickve_d_op = 0b01110111000000111110, ++ ++ unknow_ops20 = 0b11111111111111111111 ++ }; ++ ++ // 19-bit opcode, highest 19 bits: bits[31...13] ++ enum ops19 { ++ vrotri_b_op = 0b0111001010100000001, ++ vinsgr2vr_h_op = 0b0111001011101011110, ++ vpickve2gr_h_op = 0b0111001011101111110, ++ vpickve2gr_hu_op = 0b0111001011110011110, ++ vreplvei_h_op = 0b0111001011110111110, ++ vbitclri_b_op = 0b0111001100010000001, ++ vbitseti_b_op = 0b0111001100010100001, ++ vbitrevi_b_op = 0b0111001100011000001, ++ vslli_b_op = 0b0111001100101100001, ++ vsrli_b_op = 0b0111001100110000001, ++ vsrai_b_op = 0b0111001100110100001, ++ xvrotri_b_op = 0b0111011010100000001, ++ xvinsgr2vr_w_op = 0b0111011011101011110, ++ xvpickve2gr_w_op = 0b0111011011101111110, ++ xvpickve2gr_wu_op = 0b0111011011110011110, ++ xvinsve0_w_op = 0b0111011011111111110, ++ xvpickve_w_op = 0b0111011100000011110, ++ xvbitclri_b_op = 0b0111011100010000001, ++ xvbitseti_b_op = 0b0111011100010100001, ++ xvbitrevi_b_op = 0b0111011100011000001, ++ xvslli_b_op = 0b0111011100101100001, ++ xvsrli_b_op = 0b0111011100110000001, ++ xvsrai_b_op = 0b0111011100110100001, ++ ++ unknow_ops19 = 0b1111111111111111111 ++ }; ++ ++ // 18-bit opcode, highest 18 bits: bits[31...14] ++ enum ops18 { ++ vrotri_h_op = 0b011100101010000001, ++ vinsgr2vr_b_op = 0b011100101110101110, ++ vpickve2gr_b_op = 0b011100101110111110, ++ vpickve2gr_bu_op = 0b011100101111001110, ++ vreplvei_b_op = 0b011100101111011110, ++ vbitclri_h_op = 0b011100110001000001, ++ vbitseti_h_op = 0b011100110001010001, ++ vbitrevi_h_op = 0b011100110001100001, ++ vslli_h_op = 0b011100110010110001, ++ vsrli_h_op = 0b011100110011000001, ++ vsrai_h_op = 0b011100110011010001, ++ vsrlni_b_h_op = 0b011100110100000001, ++ xvrotri_h_op = 0b011101101010000001, ++ xvbitclri_h_op = 0b011101110001000001, ++ xvbitseti_h_op = 0b011101110001010001, ++ xvbitrevi_h_op = 0b011101110001100001, ++ xvslli_h_op = 0b011101110010110001, ++ xvsrli_h_op = 0b011101110011000001, ++ xvsrai_h_op = 0b011101110011010001, ++ ++ unknow_ops18 = 0b111111111111111111 ++ }; ++ ++ // 17-bit opcode, highest 17 bits: bits[31...15] ++ enum ops17 { ++ asrtle_d_op = 0b00000000000000010, ++ asrtgt_d_op = 0b00000000000000011, ++ add_w_op = 0b00000000000100000, ++ add_d_op = 0b00000000000100001, ++ sub_w_op = 0b00000000000100010, ++ sub_d_op = 0b00000000000100011, ++ slt_op = 0b00000000000100100, ++ sltu_op = 0b00000000000100101, ++ maskeqz_op = 0b00000000000100110, ++ masknez_op = 0b00000000000100111, ++ nor_op = 0b00000000000101000, ++ and_op = 0b00000000000101001, ++ or_op = 0b00000000000101010, ++ xor_op = 0b00000000000101011, ++ orn_op = 0b00000000000101100, ++ andn_op = 0b00000000000101101, ++ sll_w_op = 0b00000000000101110, ++ srl_w_op = 0b00000000000101111, ++ sra_w_op = 0b00000000000110000, ++ sll_d_op = 0b00000000000110001, ++ srl_d_op = 0b00000000000110010, ++ sra_d_op = 0b00000000000110011, ++ rotr_w_op = 0b00000000000110110, ++ rotr_d_op = 0b00000000000110111, ++ mul_w_op = 0b00000000000111000, ++ mulh_w_op = 0b00000000000111001, ++ mulh_wu_op = 0b00000000000111010, ++ mul_d_op = 0b00000000000111011, ++ mulh_d_op = 0b00000000000111100, ++ mulh_du_op = 0b00000000000111101, ++ mulw_d_w_op = 0b00000000000111110, ++ mulw_d_wu_op = 0b00000000000111111, ++ div_w_op = 0b00000000001000000, ++ mod_w_op = 0b00000000001000001, ++ div_wu_op = 0b00000000001000010, ++ mod_wu_op = 0b00000000001000011, ++ div_d_op = 0b00000000001000100, ++ mod_d_op = 0b00000000001000101, ++ div_du_op = 0b00000000001000110, ++ mod_du_op = 0b00000000001000111, ++ crc_w_b_w_op = 0b00000000001001000, ++ crc_w_h_w_op = 0b00000000001001001, ++ crc_w_w_w_op = 0b00000000001001010, ++ crc_w_d_w_op = 0b00000000001001011, ++ crcc_w_b_w_op = 0b00000000001001100, ++ crcc_w_h_w_op = 0b00000000001001101, ++ crcc_w_w_w_op = 0b00000000001001110, ++ crcc_w_d_w_op = 0b00000000001001111, ++ break_op = 0b00000000001010100, ++ fadd_s_op = 0b00000001000000001, ++ fadd_d_op = 0b00000001000000010, ++ fsub_s_op = 0b00000001000000101, ++ fsub_d_op = 0b00000001000000110, ++ fmul_s_op = 0b00000001000001001, ++ fmul_d_op = 0b00000001000001010, ++ fdiv_s_op = 0b00000001000001101, ++ fdiv_d_op = 0b00000001000001110, ++ fmax_s_op = 0b00000001000010001, ++ fmax_d_op = 0b00000001000010010, ++ fmin_s_op = 0b00000001000010101, ++ fmin_d_op = 0b00000001000010110, ++ fmaxa_s_op = 0b00000001000011001, ++ fmaxa_d_op = 0b00000001000011010, ++ fmina_s_op = 0b00000001000011101, ++ fmina_d_op = 0b00000001000011110, ++ fscaleb_s_op = 0b00000001000100001, ++ fscaleb_d_op = 0b00000001000100010, ++ fcopysign_s_op = 0b00000001000100101, ++ fcopysign_d_op = 0b00000001000100110, ++ ldx_b_op = 0b00111000000000000, ++ ldx_h_op = 0b00111000000001000, ++ ldx_w_op = 0b00111000000010000, ++ ldx_d_op = 0b00111000000011000, ++ stx_b_op = 0b00111000000100000, ++ stx_h_op = 0b00111000000101000, ++ stx_w_op = 0b00111000000110000, ++ stx_d_op = 0b00111000000111000, ++ ldx_bu_op = 0b00111000001000000, ++ ldx_hu_op = 0b00111000001001000, ++ ldx_wu_op = 0b00111000001010000, ++ fldx_s_op = 0b00111000001100000, ++ fldx_d_op = 0b00111000001101000, ++ fstx_s_op = 0b00111000001110000, ++ fstx_d_op = 0b00111000001111000, ++ vldx_op = 0b00111000010000000, ++ vstx_op = 0b00111000010001000, ++ xvldx_op = 0b00111000010010000, ++ xvstx_op = 0b00111000010011000, ++ amswap_w_op = 0b00111000011000000, ++ amswap_d_op = 0b00111000011000001, ++ amadd_w_op = 0b00111000011000010, ++ amadd_d_op = 0b00111000011000011, ++ amand_w_op = 0b00111000011000100, ++ amand_d_op = 0b00111000011000101, ++ amor_w_op = 0b00111000011000110, ++ amor_d_op = 0b00111000011000111, ++ amxor_w_op = 0b00111000011001000, ++ amxor_d_op = 0b00111000011001001, ++ ammax_w_op = 0b00111000011001010, ++ ammax_d_op = 0b00111000011001011, ++ ammin_w_op = 0b00111000011001100, ++ ammin_d_op = 0b00111000011001101, ++ ammax_wu_op = 0b00111000011001110, ++ ammax_du_op = 0b00111000011001111, ++ ammin_wu_op = 0b00111000011010000, ++ ammin_du_op = 0b00111000011010001, ++ amswap_db_w_op = 0b00111000011010010, ++ amswap_db_d_op = 0b00111000011010011, ++ amadd_db_w_op = 0b00111000011010100, ++ amadd_db_d_op = 0b00111000011010101, ++ amand_db_w_op = 0b00111000011010110, ++ amand_db_d_op = 0b00111000011010111, ++ amor_db_w_op = 0b00111000011011000, ++ amor_db_d_op = 0b00111000011011001, ++ amxor_db_w_op = 0b00111000011011010, ++ amxor_db_d_op = 0b00111000011011011, ++ ammax_db_w_op = 0b00111000011011100, ++ ammax_db_d_op = 0b00111000011011101, ++ ammin_db_w_op = 0b00111000011011110, ++ ammin_db_d_op = 0b00111000011011111, ++ ammax_db_wu_op = 0b00111000011100000, ++ ammax_db_du_op = 0b00111000011100001, ++ ammin_db_wu_op = 0b00111000011100010, ++ ammin_db_du_op = 0b00111000011100011, ++ dbar_op = 0b00111000011100100, ++ ibar_op = 0b00111000011100101, ++ fldgt_s_op = 0b00111000011101000, ++ fldgt_d_op = 0b00111000011101001, ++ fldle_s_op = 0b00111000011101010, ++ fldle_d_op = 0b00111000011101011, ++ fstgt_s_op = 0b00111000011101100, ++ fstgt_d_op = 0b00111000011101101, ++ fstle_s_op = 0b00111000011101110, ++ fstle_d_op = 0b00111000011101111, ++ ldgt_b_op = 0b00111000011110000, ++ ldgt_h_op = 0b00111000011110001, ++ ldgt_w_op = 0b00111000011110010, ++ ldgt_d_op = 0b00111000011110011, ++ ldle_b_op = 0b00111000011110100, ++ ldle_h_op = 0b00111000011110101, ++ ldle_w_op = 0b00111000011110110, ++ ldle_d_op = 0b00111000011110111, ++ stgt_b_op = 0b00111000011111000, ++ stgt_h_op = 0b00111000011111001, ++ stgt_w_op = 0b00111000011111010, ++ stgt_d_op = 0b00111000011111011, ++ stle_b_op = 0b00111000011111100, ++ stle_h_op = 0b00111000011111101, ++ stle_w_op = 0b00111000011111110, ++ stle_d_op = 0b00111000011111111, ++ vseq_b_op = 0b01110000000000000, ++ vseq_h_op = 0b01110000000000001, ++ vseq_w_op = 0b01110000000000010, ++ vseq_d_op = 0b01110000000000011, ++ vsle_b_op = 0b01110000000000100, ++ vsle_h_op = 0b01110000000000101, ++ vsle_w_op = 0b01110000000000110, ++ vsle_d_op = 0b01110000000000111, ++ vsle_bu_op = 0b01110000000001000, ++ vsle_hu_op = 0b01110000000001001, ++ vsle_wu_op = 0b01110000000001010, ++ vsle_du_op = 0b01110000000001011, ++ vslt_b_op = 0b01110000000001100, ++ vslt_h_op = 0b01110000000001101, ++ vslt_w_op = 0b01110000000001110, ++ vslt_d_op = 0b01110000000001111, ++ vslt_bu_op = 0b01110000000010000, ++ vslt_hu_op = 0b01110000000010001, ++ vslt_wu_op = 0b01110000000010010, ++ vslt_du_op = 0b01110000000010011, ++ vadd_b_op = 0b01110000000010100, ++ vadd_h_op = 0b01110000000010101, ++ vadd_w_op = 0b01110000000010110, ++ vadd_d_op = 0b01110000000010111, ++ vsub_b_op = 0b01110000000011000, ++ vsub_h_op = 0b01110000000011001, ++ vsub_w_op = 0b01110000000011010, ++ vsub_d_op = 0b01110000000011011, ++ vabsd_b_op = 0b01110000011000000, ++ vabsd_h_op = 0b01110000011000001, ++ vabsd_w_op = 0b01110000011000010, ++ vabsd_d_op = 0b01110000011000011, ++ vmax_b_op = 0b01110000011100000, ++ vmax_h_op = 0b01110000011100001, ++ vmax_w_op = 0b01110000011100010, ++ vmax_d_op = 0b01110000011100011, ++ vmin_b_op = 0b01110000011100100, ++ vmin_h_op = 0b01110000011100101, ++ vmin_w_op = 0b01110000011100110, ++ vmin_d_op = 0b01110000011100111, ++ vmul_b_op = 0b01110000100001000, ++ vmul_h_op = 0b01110000100001001, ++ vmul_w_op = 0b01110000100001010, ++ vmul_d_op = 0b01110000100001011, ++ vmuh_b_op = 0b01110000100001100, ++ vmuh_h_op = 0b01110000100001101, ++ vmuh_w_op = 0b01110000100001110, ++ vmuh_d_op = 0b01110000100001111, ++ vmuh_bu_op = 0b01110000100010000, ++ vmuh_hu_op = 0b01110000100010001, ++ vmuh_wu_op = 0b01110000100010010, ++ vmuh_du_op = 0b01110000100010011, ++ vmulwev_h_b_op = 0b01110000100100000, ++ vmulwev_w_h_op = 0b01110000100100001, ++ vmulwev_d_w_op = 0b01110000100100010, ++ vmulwev_q_d_op = 0b01110000100100011, ++ vmulwod_h_b_op = 0b01110000100100100, ++ vmulwod_w_h_op = 0b01110000100100101, ++ vmulwod_d_w_op = 0b01110000100100110, ++ vmulwod_q_d_op = 0b01110000100100111, ++ vmadd_b_op = 0b01110000101010000, ++ vmadd_h_op = 0b01110000101010001, ++ vmadd_w_op = 0b01110000101010010, ++ vmadd_d_op = 0b01110000101010011, ++ vmsub_b_op = 0b01110000101010100, ++ vmsub_h_op = 0b01110000101010101, ++ vmsub_w_op = 0b01110000101010110, ++ vmsub_d_op = 0b01110000101010111, ++ vsll_b_op = 0b01110000111010000, ++ vsll_h_op = 0b01110000111010001, ++ vsll_w_op = 0b01110000111010010, ++ vsll_d_op = 0b01110000111010011, ++ vsrl_b_op = 0b01110000111010100, ++ vsrl_h_op = 0b01110000111010101, ++ vsrl_w_op = 0b01110000111010110, ++ vsrl_d_op = 0b01110000111010111, ++ vsra_b_op = 0b01110000111011000, ++ vsra_h_op = 0b01110000111011001, ++ vsra_w_op = 0b01110000111011010, ++ vsra_d_op = 0b01110000111011011, ++ vrotr_b_op = 0b01110000111011100, ++ vrotr_h_op = 0b01110000111011101, ++ vrotr_w_op = 0b01110000111011110, ++ vrotr_d_op = 0b01110000111011111, ++ vbitclr_b_op = 0b01110001000011000, ++ vbitclr_h_op = 0b01110001000011001, ++ vbitclr_w_op = 0b01110001000011010, ++ vbitclr_d_op = 0b01110001000011011, ++ vbitset_b_op = 0b01110001000011100, ++ vbitset_h_op = 0b01110001000011101, ++ vbitset_w_op = 0b01110001000011110, ++ vbitset_d_op = 0b01110001000011111, ++ vbitrev_b_op = 0b01110001000100000, ++ vbitrev_h_op = 0b01110001000100001, ++ vbitrev_w_op = 0b01110001000100010, ++ vbitrev_d_op = 0b01110001000100011, ++ vand_v_op = 0b01110001001001100, ++ vor_v_op = 0b01110001001001101, ++ vxor_v_op = 0b01110001001001110, ++ vnor_v_op = 0b01110001001001111, ++ vandn_v_op = 0b01110001001010000, ++ vorn_v_op = 0b01110001001010001, ++ vadd_q_op = 0b01110001001011010, ++ vsub_q_op = 0b01110001001011011, ++ vfadd_s_op = 0b01110001001100001, ++ vfadd_d_op = 0b01110001001100010, ++ vfsub_s_op = 0b01110001001100101, ++ vfsub_d_op = 0b01110001001100110, ++ vfmul_s_op = 0b01110001001110001, ++ vfmul_d_op = 0b01110001001110010, ++ vfdiv_s_op = 0b01110001001110101, ++ vfdiv_d_op = 0b01110001001110110, ++ vfmax_s_op = 0b01110001001111001, ++ vfmax_d_op = 0b01110001001111010, ++ vfmin_s_op = 0b01110001001111101, ++ vfmin_d_op = 0b01110001001111110, ++ vfcvt_h_s_op = 0b01110001010001100, ++ vfcvt_s_d_op = 0b01110001010001101, ++ vffint_s_l_op = 0b01110001010010000, ++ vftint_w_d_op = 0b01110001010010011, ++ vftintrm_w_d_op = 0b01110001010010100, ++ vftintrp_w_d_op = 0b01110001010010101, ++ vftintrz_w_d_op = 0b01110001010010110, ++ vftintrne_w_d_op = 0b01110001010010111, ++ vshuf_h_op = 0b01110001011110101, ++ vshuf_w_op = 0b01110001011110110, ++ vshuf_d_op = 0b01110001011110111, ++ vslti_bu_op = 0b01110010100010000, ++ vslti_hu_op = 0b01110010100010001, ++ vslti_wu_op = 0b01110010100010010, ++ vslti_du_op = 0b01110010100010011, ++ vaddi_bu_op = 0b01110010100010100, ++ vaddi_hu_op = 0b01110010100010101, ++ vaddi_wu_op = 0b01110010100010110, ++ vaddi_du_op = 0b01110010100010111, ++ vsubi_bu_op = 0b01110010100011000, ++ vsubi_hu_op = 0b01110010100011001, ++ vsubi_wu_op = 0b01110010100011010, ++ vsubi_du_op = 0b01110010100011011, ++ vrotri_w_op = 0b01110010101000001, ++ vbitclri_w_op = 0b01110011000100001, ++ vbitseti_w_op = 0b01110011000101001, ++ vbitrevi_w_op = 0b01110011000110001, ++ vslli_w_op = 0b01110011001011001, ++ vsrli_w_op = 0b01110011001100001, ++ vsrai_w_op = 0b01110011001101001, ++ vsrlni_h_w_op = 0b01110011010000001, ++ xvseq_b_op = 0b01110100000000000, ++ xvseq_h_op = 0b01110100000000001, ++ xvseq_w_op = 0b01110100000000010, ++ xvseq_d_op = 0b01110100000000011, ++ xvsle_b_op = 0b01110100000000100, ++ xvsle_h_op = 0b01110100000000101, ++ xvsle_w_op = 0b01110100000000110, ++ xvsle_d_op = 0b01110100000000111, ++ xvsle_bu_op = 0b01110100000001000, ++ xvsle_hu_op = 0b01110100000001001, ++ xvsle_wu_op = 0b01110100000001010, ++ xvsle_du_op = 0b01110100000001011, ++ xvslt_b_op = 0b01110100000001100, ++ xvslt_h_op = 0b01110100000001101, ++ xvslt_w_op = 0b01110100000001110, ++ xvslt_d_op = 0b01110100000001111, ++ xvslt_bu_op = 0b01110100000010000, ++ xvslt_hu_op = 0b01110100000010001, ++ xvslt_wu_op = 0b01110100000010010, ++ xvslt_du_op = 0b01110100000010011, ++ xvadd_b_op = 0b01110100000010100, ++ xvadd_h_op = 0b01110100000010101, ++ xvadd_w_op = 0b01110100000010110, ++ xvadd_d_op = 0b01110100000010111, ++ xvsub_b_op = 0b01110100000011000, ++ xvsub_h_op = 0b01110100000011001, ++ xvsub_w_op = 0b01110100000011010, ++ xvsub_d_op = 0b01110100000011011, ++ xvabsd_b_op = 0b01110100011000000, ++ xvabsd_h_op = 0b01110100011000001, ++ xvabsd_w_op = 0b01110100011000010, ++ xvabsd_d_op = 0b01110100011000011, ++ xvmax_b_op = 0b01110100011100000, ++ xvmax_h_op = 0b01110100011100001, ++ xvmax_w_op = 0b01110100011100010, ++ xvmax_d_op = 0b01110100011100011, ++ xvmin_b_op = 0b01110100011100100, ++ xvmin_h_op = 0b01110100011100101, ++ xvmin_w_op = 0b01110100011100110, ++ xvmin_d_op = 0b01110100011100111, ++ xvmul_b_op = 0b01110100100001000, ++ xvmul_h_op = 0b01110100100001001, ++ xvmul_w_op = 0b01110100100001010, ++ xvmul_d_op = 0b01110100100001011, ++ xvmuh_b_op = 0b01110100100001100, ++ xvmuh_h_op = 0b01110100100001101, ++ xvmuh_w_op = 0b01110100100001110, ++ xvmuh_d_op = 0b01110100100001111, ++ xvmuh_bu_op = 0b01110100100010000, ++ xvmuh_hu_op = 0b01110100100010001, ++ xvmuh_wu_op = 0b01110100100010010, ++ xvmuh_du_op = 0b01110100100010011, ++ xvmulwev_h_b_op = 0b01110100100100000, ++ xvmulwev_w_h_op = 0b01110100100100001, ++ xvmulwev_d_w_op = 0b01110100100100010, ++ xvmulwev_q_d_op = 0b01110100100100011, ++ xvmulwod_h_b_op = 0b01110100100100100, ++ xvmulwod_w_h_op = 0b01110100100100101, ++ xvmulwod_d_w_op = 0b01110100100100110, ++ xvmulwod_q_d_op = 0b01110100100100111, ++ xvmadd_b_op = 0b01110100101010000, ++ xvmadd_h_op = 0b01110100101010001, ++ xvmadd_w_op = 0b01110100101010010, ++ xvmadd_d_op = 0b01110100101010011, ++ xvmsub_b_op = 0b01110100101010100, ++ xvmsub_h_op = 0b01110100101010101, ++ xvmsub_w_op = 0b01110100101010110, ++ xvmsub_d_op = 0b01110100101010111, ++ xvsll_b_op = 0b01110100111010000, ++ xvsll_h_op = 0b01110100111010001, ++ xvsll_w_op = 0b01110100111010010, ++ xvsll_d_op = 0b01110100111010011, ++ xvsrl_b_op = 0b01110100111010100, ++ xvsrl_h_op = 0b01110100111010101, ++ xvsrl_w_op = 0b01110100111010110, ++ xvsrl_d_op = 0b01110100111010111, ++ xvsra_b_op = 0b01110100111011000, ++ xvsra_h_op = 0b01110100111011001, ++ xvsra_w_op = 0b01110100111011010, ++ xvsra_d_op = 0b01110100111011011, ++ xvrotr_b_op = 0b01110100111011100, ++ xvrotr_h_op = 0b01110100111011101, ++ xvrotr_w_op = 0b01110100111011110, ++ xvrotr_d_op = 0b01110100111011111, ++ xvbitclr_b_op = 0b01110101000011000, ++ xvbitclr_h_op = 0b01110101000011001, ++ xvbitclr_w_op = 0b01110101000011010, ++ xvbitclr_d_op = 0b01110101000011011, ++ xvbitset_b_op = 0b01110101000011100, ++ xvbitset_h_op = 0b01110101000011101, ++ xvbitset_w_op = 0b01110101000011110, ++ xvbitset_d_op = 0b01110101000011111, ++ xvbitrev_b_op = 0b01110101000100000, ++ xvbitrev_h_op = 0b01110101000100001, ++ xvbitrev_w_op = 0b01110101000100010, ++ xvbitrev_d_op = 0b01110101000100011, ++ xvand_v_op = 0b01110101001001100, ++ xvor_v_op = 0b01110101001001101, ++ xvxor_v_op = 0b01110101001001110, ++ xvnor_v_op = 0b01110101001001111, ++ xvandn_v_op = 0b01110101001010000, ++ xvorn_v_op = 0b01110101001010001, ++ xvadd_q_op = 0b01110101001011010, ++ xvsub_q_op = 0b01110101001011011, ++ xvfadd_s_op = 0b01110101001100001, ++ xvfadd_d_op = 0b01110101001100010, ++ xvfsub_s_op = 0b01110101001100101, ++ xvfsub_d_op = 0b01110101001100110, ++ xvfmul_s_op = 0b01110101001110001, ++ xvfmul_d_op = 0b01110101001110010, ++ xvfdiv_s_op = 0b01110101001110101, ++ xvfdiv_d_op = 0b01110101001110110, ++ xvfmax_s_op = 0b01110101001111001, ++ xvfmax_d_op = 0b01110101001111010, ++ xvfmin_s_op = 0b01110101001111101, ++ xvfmin_d_op = 0b01110101001111110, ++ xvfcvt_h_s_op = 0b01110101010001100, ++ xvfcvt_s_d_op = 0b01110101010001101, ++ xvffint_s_l_op = 0b01110101010010000, ++ xvftint_w_d_op = 0b01110101010010011, ++ xvftintrm_w_d_op = 0b01110101010010100, ++ xvftintrp_w_d_op = 0b01110101010010101, ++ xvftintrz_w_d_op = 0b01110101010010110, ++ xvftintrne_w_d_op = 0b01110101010010111, ++ xvshuf_h_op = 0b01110101011110101, ++ xvshuf_w_op = 0b01110101011110110, ++ xvshuf_d_op = 0b01110101011110111, ++ xvperm_w_op = 0b01110101011111010, ++ xvslti_bu_op = 0b01110110100010000, ++ xvslti_hu_op = 0b01110110100010001, ++ xvslti_wu_op = 0b01110110100010010, ++ xvslti_du_op = 0b01110110100010011, ++ xvaddi_bu_op = 0b01110110100010100, ++ xvaddi_hu_op = 0b01110110100010101, ++ xvaddi_wu_op = 0b01110110100010110, ++ xvaddi_du_op = 0b01110110100010111, ++ xvsubi_bu_op = 0b01110110100011000, ++ xvsubi_hu_op = 0b01110110100011001, ++ xvsubi_wu_op = 0b01110110100011010, ++ xvsubi_du_op = 0b01110110100011011, ++ xvrotri_w_op = 0b01110110101000001, ++ xvbitclri_w_op = 0b01110111000100001, ++ xvbitseti_w_op = 0b01110111000101001, ++ xvbitrevi_w_op = 0b01110111000110001, ++ xvslli_w_op = 0b01110111001011001, ++ xvsrli_w_op = 0b01110111001100001, ++ xvsrai_w_op = 0b01110111001101001, ++ ++ unknow_ops17 = 0b11111111111111111 ++ }; ++ ++ // 16-bit opcode, highest 16 bits: bits[31...16] ++ enum ops16 { ++ vrotri_d_op = 0b0111001010100001, ++ vbitclri_d_op = 0b0111001100010001, ++ vbitseti_d_op = 0b0111001100010101, ++ vbitrevi_d_op = 0b0111001100011001, ++ vslli_d_op = 0b0111001100101101, ++ vsrli_d_op = 0b0111001100110001, ++ vsrai_d_op = 0b0111001100110101, ++ vsrlni_w_d_op = 0b0111001101000001, ++ xvrotri_d_op = 0b0111011010100001, ++ xvbitclri_d_op = 0b0111011100010001, ++ xvbitseti_d_op = 0b0111011100010101, ++ xvbitrevi_d_op = 0b0111011100011001, ++ xvslli_d_op = 0b0111011100101101, ++ xvsrli_d_op = 0b0111011100110001, ++ xvsrai_d_op = 0b0111011100110101, ++ ++ unknow_ops16 = 0b1111111111111111 ++ }; ++ ++ // 15-bit opcode, highest 15 bits: bits[31...17] ++ enum ops15 { ++ vsrlni_d_q_op = 0b011100110100001, ++ ++ unknow_ops15 = 0b111111111111111 ++ }; ++ ++ // 14-bit opcode, highest 14 bits: bits[31...18] ++ enum ops14 { ++ alsl_w_op = 0b00000000000001, ++ bytepick_w_op = 0b00000000000010, ++ bytepick_d_op = 0b00000000000011, ++ alsl_d_op = 0b00000000001011, ++ slli_op = 0b00000000010000, ++ srli_op = 0b00000000010001, ++ srai_op = 0b00000000010010, ++ rotri_op = 0b00000000010011, ++ lddir_op = 0b00000110010000, ++ ldpte_op = 0b00000110010001, ++ vshuf4i_b_op = 0b01110011100100, ++ vshuf4i_h_op = 0b01110011100101, ++ vshuf4i_w_op = 0b01110011100110, ++ vshuf4i_d_op = 0b01110011100111, ++ vandi_b_op = 0b01110011110100, ++ vori_b_op = 0b01110011110101, ++ vxori_b_op = 0b01110011110110, ++ vnori_b_op = 0b01110011110111, ++ vldi_op = 0b01110011111000, ++ vpermi_w_op = 0b01110011111001, ++ xvshuf4i_b_op = 0b01110111100100, ++ xvshuf4i_h_op = 0b01110111100101, ++ xvshuf4i_w_op = 0b01110111100110, ++ xvshuf4i_d_op = 0b01110111100111, ++ xvandi_b_op = 0b01110111110100, ++ xvori_b_op = 0b01110111110101, ++ xvxori_b_op = 0b01110111110110, ++ xvnori_b_op = 0b01110111110111, ++ xvldi_op = 0b01110111111000, ++ xvpermi_w_op = 0b01110111111001, ++ xvpermi_d_op = 0b01110111111010, ++ xvpermi_q_op = 0b01110111111011, ++ ++ unknow_ops14 = 0b11111111111111 ++ }; ++ ++ // 12-bit opcode, highest 12 bits: bits[31...20] ++ enum ops12 { ++ fmadd_s_op = 0b000010000001, ++ fmadd_d_op = 0b000010000010, ++ fmsub_s_op = 0b000010000101, ++ fmsub_d_op = 0b000010000110, ++ fnmadd_s_op = 0b000010001001, ++ fnmadd_d_op = 0b000010001010, ++ fnmsub_s_op = 0b000010001101, ++ fnmsub_d_op = 0b000010001110, ++ vfmadd_s_op = 0b000010010001, ++ vfmadd_d_op = 0b000010010010, ++ vfmsub_s_op = 0b000010010101, ++ vfmsub_d_op = 0b000010010110, ++ vfnmadd_s_op = 0b000010011001, ++ vfnmadd_d_op = 0b000010011010, ++ vfnmsub_s_op = 0b000010011101, ++ vfnmsub_d_op = 0b000010011110, ++ xvfmadd_s_op = 0b000010100001, ++ xvfmadd_d_op = 0b000010100010, ++ xvfmsub_s_op = 0b000010100101, ++ xvfmsub_d_op = 0b000010100110, ++ xvfnmadd_s_op = 0b000010101001, ++ xvfnmadd_d_op = 0b000010101010, ++ xvfnmsub_s_op = 0b000010101101, ++ xvfnmsub_d_op = 0b000010101110, ++ fcmp_cond_s_op = 0b000011000001, ++ fcmp_cond_d_op = 0b000011000010, ++ vfcmp_cond_s_op = 0b000011000101, ++ vfcmp_cond_d_op = 0b000011000110, ++ xvfcmp_cond_s_op = 0b000011001001, ++ xvfcmp_cond_d_op = 0b000011001010, ++ fsel_op = 0b000011010000, ++ vbitsel_v_op = 0b000011010001, ++ xvbitsel_v_op = 0b000011010010, ++ vshuf_b_op = 0b000011010101, ++ xvshuf_b_op = 0b000011010110, ++ ++ unknow_ops12 = 0b111111111111 ++ }; ++ ++ // 10-bit opcode, highest 10 bits: bits[31...22] ++ enum ops10 { ++ bstr_w_op = 0b0000000001, ++ bstrins_d_op = 0b0000000010, ++ bstrpick_d_op = 0b0000000011, ++ slti_op = 0b0000001000, ++ sltui_op = 0b0000001001, ++ addi_w_op = 0b0000001010, ++ addi_d_op = 0b0000001011, ++ lu52i_d_op = 0b0000001100, ++ andi_op = 0b0000001101, ++ ori_op = 0b0000001110, ++ xori_op = 0b0000001111, ++ ld_b_op = 0b0010100000, ++ ld_h_op = 0b0010100001, ++ ld_w_op = 0b0010100010, ++ ld_d_op = 0b0010100011, ++ st_b_op = 0b0010100100, ++ st_h_op = 0b0010100101, ++ st_w_op = 0b0010100110, ++ st_d_op = 0b0010100111, ++ ld_bu_op = 0b0010101000, ++ ld_hu_op = 0b0010101001, ++ ld_wu_op = 0b0010101010, ++ preld_op = 0b0010101011, ++ fld_s_op = 0b0010101100, ++ fst_s_op = 0b0010101101, ++ fld_d_op = 0b0010101110, ++ fst_d_op = 0b0010101111, ++ vld_op = 0b0010110000, ++ vst_op = 0b0010110001, ++ xvld_op = 0b0010110010, ++ xvst_op = 0b0010110011, ++ ldl_w_op = 0b0010111000, ++ ldr_w_op = 0b0010111001, ++ ++ unknow_ops10 = 0b1111111111 ++ }; ++ ++ // 8-bit opcode, highest 8 bits: bits[31...22] ++ enum ops8 { ++ ll_w_op = 0b00100000, ++ sc_w_op = 0b00100001, ++ ll_d_op = 0b00100010, ++ sc_d_op = 0b00100011, ++ ldptr_w_op = 0b00100100, ++ stptr_w_op = 0b00100101, ++ ldptr_d_op = 0b00100110, ++ stptr_d_op = 0b00100111, ++ ++ unknow_ops8 = 0b11111111 ++ }; ++ ++ // 7-bit opcode, highest 7 bits: bits[31...25] ++ enum ops7 { ++ lu12i_w_op = 0b0001010, ++ lu32i_d_op = 0b0001011, ++ pcaddi_op = 0b0001100, ++ pcalau12i_op = 0b0001101, ++ pcaddu12i_op = 0b0001110, ++ pcaddu18i_op = 0b0001111, ++ ++ unknow_ops7 = 0b1111111 ++ }; ++ ++ // 6-bit opcode, highest 6 bits: bits[31...25] ++ enum ops6 { ++ addu16i_d_op = 0b000100, ++ beqz_op = 0b010000, ++ bnez_op = 0b010001, ++ bccondz_op = 0b010010, ++ jirl_op = 0b010011, ++ b_op = 0b010100, ++ bl_op = 0b010101, ++ beq_op = 0b010110, ++ bne_op = 0b010111, ++ blt_op = 0b011000, ++ bge_op = 0b011001, ++ bltu_op = 0b011010, ++ bgeu_op = 0b011011, ++ ++ unknow_ops6 = 0b111111 ++ }; ++ ++ enum fcmp_cond { ++ fcmp_caf = 0x00, ++ fcmp_cun = 0x08, ++ fcmp_ceq = 0x04, ++ fcmp_cueq = 0x0c, ++ fcmp_clt = 0x02, ++ fcmp_cult = 0x0a, ++ fcmp_cle = 0x06, ++ fcmp_cule = 0x0e, ++ fcmp_cne = 0x10, ++ fcmp_cor = 0x14, ++ fcmp_cune = 0x18, ++ fcmp_saf = 0x01, ++ fcmp_sun = 0x09, ++ fcmp_seq = 0x05, ++ fcmp_sueq = 0x0d, ++ fcmp_slt = 0x03, ++ fcmp_sult = 0x0b, ++ fcmp_sle = 0x07, ++ fcmp_sule = 0x0f, ++ fcmp_sne = 0x11, ++ fcmp_sor = 0x15, ++ fcmp_sune = 0x19 ++ }; ++ ++ enum Condition { ++ zero , ++ notZero , ++ equal , ++ notEqual , ++ less , ++ lessEqual , ++ greater , ++ greaterEqual , ++ below , ++ belowEqual , ++ above , ++ aboveEqual ++ }; ++ ++ static const int LogInstructionSize = 2; ++ static const int InstructionSize = 1 << LogInstructionSize; ++ ++ enum WhichOperand { ++ // input to locate_operand, and format code for relocations ++ imm_operand = 0, // embedded 32-bit|64-bit immediate operand ++ disp32_operand = 1, // embedded 32-bit displacement or address ++ call32_operand = 2, // embedded 32-bit self-relative displacement ++ narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop ++ _WhichOperand_limit = 4 ++ }; ++ ++ static int low (int x, int l) { return bitfield(x, 0, l); } ++ static int low16(int x) { return low(x, 16); } ++ static int low26(int x) { return low(x, 26); } ++ ++ static int high (int x, int l) { return bitfield(x, 32-l, l); } ++ static int high16(int x) { return high(x, 16); } ++ static int high6 (int x) { return high(x, 6); } ++ ++ ++ static ALWAYSINLINE void patch(address a, int length, uint32_t val) { ++ guarantee(val < (1ULL << length), "Field too big for insn"); ++ guarantee(length > 0, "length > 0"); ++ unsigned target = *(unsigned *)a; ++ target = (target >> length) << length; ++ target |= val; ++ *(unsigned *)a = target; ++ } ++ ++ protected: ++ // help methods for instruction ejection ++ ++ // 2R-type ++ // 31 10 9 5 4 0 ++ // | opcode | rj | rd | ++ static inline int insn_RR (int op, int rj, int rd) { return (op<<10) | (rj<<5) | rd; } ++ ++ // 3R-type ++ // 31 15 14 10 9 5 4 0 ++ // | opcode | rk | rj | rd | ++ static inline int insn_RRR (int op, int rk, int rj, int rd) { return (op<<15) | (rk<<10) | (rj<<5) | rd; } ++ ++ // 4R-type ++ // 31 20 19 15 14 10 9 5 4 0 ++ // | opcode | ra | rk | rj | rd | ++ static inline int insn_RRRR (int op, int ra, int rk, int rj, int rd) { return (op<<20) | (ra << 15) | (rk<<10) | (rj<<5) | rd; } ++ ++ // 2RI1-type ++ // 31 11 10 9 5 4 0 ++ // | opcode | I1 | vj | rd | ++ static inline int insn_I1RR (int op, int ui1, int vj, int rd) { assert(is_uimm(ui1, 1), "not a unsigned 1-bit int"); return (op<<11) | (low(ui1, 1)<<10) | (vj<<5) | rd; } ++ ++ // 2RI2-type ++ // 31 12 11 10 9 5 4 0 ++ // | opcode | I2 | vj | rd | ++ static inline int insn_I2RR (int op, int ui2, int vj, int rd) { assert(is_uimm(ui2, 2), "not a unsigned 2-bit int"); return (op<<12) | (low(ui2, 2)<<10) | (vj<<5) | rd; } ++ ++ // 2RI3-type ++ // 31 13 12 10 9 5 4 0 ++ // | opcode | I3 | vj | vd | ++ static inline int insn_I3RR (int op, int ui3, int vj, int vd) { assert(is_uimm(ui3, 3), "not a unsigned 3-bit int"); return (op<<13) | (low(ui3, 3)<<10) | (vj<<5) | vd; } ++ ++ // 2RI4-type ++ // 31 14 13 10 9 5 4 0 ++ // | opcode | I4 | vj | vd | ++ static inline int insn_I4RR (int op, int ui4, int vj, int vd) { assert(is_uimm(ui4, 4), "not a unsigned 4-bit int"); return (op<<14) | (low(ui4, 4)<<10) | (vj<<5) | vd; } ++ ++ // 2RI5-type ++ // 31 15 14 10 9 5 4 0 ++ // | opcode | I5 | vj | vd | ++ static inline int insn_I5RR (int op, int ui5, int vj, int vd) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); return (op<<15) | (low(ui5, 5)<<10) | (vj<<5) | vd; } ++ ++ // 2RI6-type ++ // 31 16 15 10 9 5 4 0 ++ // | opcode | I6 | vj | vd | ++ static inline int insn_I6RR (int op, int ui6, int vj, int vd) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); return (op<<16) | (low(ui6, 6)<<10) | (vj<<5) | vd; } ++ ++ // 2RI7-type ++ // 31 17 16 10 9 5 4 0 ++ // | opcode | I7 | vj | vd | ++ static inline int insn_I7RR (int op, int ui7, int vj, int vd) { assert(is_uimm(ui7, 7), "not a unsigned 7-bit int"); return (op<<17) | (low(ui7, 6)<<10) | (vj<<5) | vd; } ++ ++ // 2RI8-type ++ // 31 18 17 10 9 5 4 0 ++ // | opcode | I8 | rj | rd | ++ static inline int insn_I8RR (int op, int imm8, int rj, int rd) { /*assert(is_simm(imm8, 8), "not a signed 8-bit int");*/ return (op<<18) | (low(imm8, 8)<<10) | (rj<<5) | rd; } ++ ++ // 2RI12-type ++ // 31 22 21 10 9 5 4 0 ++ // | opcode | I12 | rj | rd | ++ static inline int insn_I12RR(int op, int imm12, int rj, int rd) { /* assert(is_simm(imm12, 12), "not a signed 12-bit int");*/ return (op<<22) | (low(imm12, 12)<<10) | (rj<<5) | rd; } ++ ++ ++ // 2RI14-type ++ // 31 24 23 10 9 5 4 0 ++ // | opcode | I14 | rj | rd | ++ static inline int insn_I14RR(int op, int imm14, int rj, int rd) { assert(is_simm(imm14, 14), "not a signed 14-bit int"); return (op<<24) | (low(imm14, 14)<<10) | (rj<<5) | rd; } ++ ++ // 2RI16-type ++ // 31 26 25 10 9 5 4 0 ++ // | opcode | I16 | rj | rd | ++ static inline int insn_I16RR(int op, int imm16, int rj, int rd) { assert(is_simm16(imm16), "not a signed 16-bit int"); return (op<<26) | (low16(imm16)<<10) | (rj<<5) | rd; } ++ ++ // 1RI13-type (?) ++ // 31 18 17 5 4 0 ++ // | opcode | I13 | vd | ++ static inline int insn_I13R (int op, int imm13, int vd) { assert(is_simm(imm13, 13), "not a signed 13-bit int"); return (op<<18) | (low(imm13, 13)<<5) | vd; } ++ ++ // 1RI20-type (?) ++ // 31 25 24 5 4 0 ++ // | opcode | I20 | rd | ++ static inline int insn_I20R (int op, int imm20, int rd) { assert(is_simm(imm20, 20), "not a signed 20-bit int"); return (op<<25) | (low(imm20, 20)<<5) | rd; } ++ ++ // 1RI21-type ++ // 31 26 25 10 9 5 4 0 ++ // | opcode | I21[15:0] | rj |I21[20:16]| ++ static inline int insn_IRI(int op, int imm21, int rj) { assert(is_simm(imm21, 21), "not a signed 21-bit int"); return (op << 26) | (low16(imm21) << 10) | (rj << 5) | low(imm21 >> 16, 5); } ++ ++ // I26-type ++ // 31 26 25 10 9 0 ++ // | opcode | I26[15:0] | I26[25:16] | ++ static inline int insn_I26(int op, int imm26) { assert(is_simm(imm26, 26), "not a signed 26-bit int"); return (op << 26) | (low16(imm26) << 10) | low(imm26 >> 16, 10); } ++ ++ // imm15 ++ // 31 15 14 0 ++ // | opcode | I15 | ++ static inline int insn_I15 (int op, int imm15) { assert(is_uimm(imm15, 15), "not a unsigned 15-bit int"); return (op<<15) | low(imm15, 15); } ++ ++ ++ // get the offset field of beq, bne, blt[u], bge[u] instruction ++ int offset16(address entry) { ++ assert(is_simm16((entry - pc()) / 4), "change this code"); ++ if (!is_simm16((entry - pc()) / 4)) { ++ tty->print_cr("!!! is_simm16: %lx", (entry - pc()) / 4); ++ } ++ return (entry - pc()) / 4; ++ } ++ ++ // get the offset field of beqz, bnez instruction ++ int offset21(address entry) { ++ assert(is_simm((int)(entry - pc()) / 4, 21), "change this code"); ++ if (!is_simm((int)(entry - pc()) / 4, 21)) { ++ tty->print_cr("!!! is_simm21: %lx", (entry - pc()) / 4); ++ } ++ return (entry - pc()) / 4; ++ } ++ ++ // get the offset field of b instruction ++ int offset26(address entry) { ++ assert(is_simm((int)(entry - pc()) / 4, 26), "change this code"); ++ if (!is_simm((int)(entry - pc()) / 4, 26)) { ++ tty->print_cr("!!! is_simm26: %lx", (entry - pc()) / 4); ++ } ++ return (entry - pc()) / 4; ++ } ++ ++public: ++ using AbstractAssembler::offset; ++ ++ //sign expand with the sign bit is h ++ static int expand(int x, int h) { return -(x & (1<> 16; ++ } ++ ++ static int split_high16(int x) { ++ return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff; ++ } ++ ++ static int split_low20(int x) { ++ return (x & 0xfffff); ++ } ++ ++ // Convert 20-bit x to a sign-extended 20-bit integer ++ static int simm20(int x) { ++ assert(x == (x & 0xFFFFF), "must be 20-bit only"); ++ return (x << 12) >> 12; ++ } ++ ++ static int split_low12(int x) { ++ return (x & 0xfff); ++ } ++ ++ static inline void split_simm32(jlong si32, jint& si12, jint& si20) { ++ si12 = ((jint)(si32 & 0xfff) << 20) >> 20; ++ si32 += (si32 & 0x800) << 1; ++ si20 = si32 >> 12; ++ } ++ ++ static inline void split_simm38(jlong si38, jint& si18, jint& si20) { ++ si18 = ((jint)(si38 & 0x3ffff) << 14) >> 14; ++ si38 += (si38 & 0x20000) << 1; ++ si20 = si38 >> 18; ++ } ++ ++ // Convert 12-bit x to a sign-extended 12-bit integer ++ static int simm12(int x) { ++ assert(x == (x & 0xFFF), "must be 12-bit only"); ++ return (x << 20) >> 20; ++ } ++ ++ // Convert 26-bit x to a sign-extended 26-bit integer ++ static int simm26(int x) { ++ assert(x == (x & 0x3FFFFFF), "must be 26-bit only"); ++ return (x << 6) >> 6; ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x12) { ++ //lu12i, ori ++ return (((x12 << 12) | x0) << 32) >> 32; ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32) { ++ //lu32i, lu12i, ori ++ return (((x32 << 32) | (x12 << 12) | x0) << 12) >> 12; ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32, intptr_t x52) { ++ //lu52i, lu32i, lu12i, ori ++ return (x52 << 52) | (x32 << 32) | (x12 << 12) | x0; ++ } ++ ++ // Test if x is within signed immediate range for nbits. ++ static bool is_simm (int x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int min = -( ((int)1) << nbits-1 ); ++ const int maxplus1 = ( ((int)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ static bool is_simm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong min = -( ((jlong)1) << nbits-1 ); ++ const jlong maxplus1 = ( ((jlong)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ static bool is_simm16(int x) { return is_simm(x, 16); } ++ static bool is_simm16(long x) { return is_simm((jlong)x, (unsigned int)16); } ++ ++ // Test if x is within unsigned immediate range for nbits ++ static bool is_uimm(int x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int maxplus1 = ( ((int)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++ static bool is_uimm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong maxplus1 = ( ((jlong)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++public: ++ ++ void flush() { ++ AbstractAssembler::flush(); ++ } ++ ++ inline void emit_int32(int x) { ++ AbstractAssembler::emit_int32(x); ++ } ++ ++ inline void emit_data(int x) { emit_int32(x); } ++ inline void emit_data(int x, relocInfo::relocType rtype) { ++ relocate(rtype); ++ emit_int32(x); ++ } ++ ++ inline void emit_data(int x, RelocationHolder const& rspec) { ++ relocate(rspec); ++ emit_int32(x); ++ } ++ ++ ++ // Generic instructions ++ // Does 32bit or 64bit as needed for the platform. In some sense these ++ // belong in macro assembler but there is no need for both varieties to exist ++ ++ void clo_w (Register rd, Register rj) { emit_int32(insn_RR(clo_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void clz_w (Register rd, Register rj) { emit_int32(insn_RR(clz_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void cto_w (Register rd, Register rj) { emit_int32(insn_RR(cto_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void ctz_w (Register rd, Register rj) { emit_int32(insn_RR(ctz_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void clo_d (Register rd, Register rj) { emit_int32(insn_RR(clo_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void clz_d (Register rd, Register rj) { emit_int32(insn_RR(clz_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void cto_d (Register rd, Register rj) { emit_int32(insn_RR(cto_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void ctz_d (Register rd, Register rj) { emit_int32(insn_RR(ctz_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void revb_2h(Register rd, Register rj) { emit_int32(insn_RR(revb_2h_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revb_4h(Register rd, Register rj) { emit_int32(insn_RR(revb_4h_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revb_2w(Register rd, Register rj) { emit_int32(insn_RR(revb_2w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revb_d (Register rd, Register rj) { emit_int32(insn_RR( revb_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revh_2w(Register rd, Register rj) { emit_int32(insn_RR(revh_2w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revh_d (Register rd, Register rj) { emit_int32(insn_RR( revh_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void bitrev_4b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_4b_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void bitrev_8b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_8b_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void bitrev_w (Register rd, Register rj) { emit_int32(insn_RR(bitrev_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void bitrev_d (Register rd, Register rj) { emit_int32(insn_RR(bitrev_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void ext_w_h(Register rd, Register rj) { emit_int32(insn_RR(ext_w_h_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void ext_w_b(Register rd, Register rj) { emit_int32(insn_RR(ext_w_b_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void rdtimel_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimel_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void rdtimeh_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimeh_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void rdtime_d(Register rd, Register rj) { emit_int32(insn_RR(rdtime_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void cpucfg(Register rd, Register rj) { emit_int32(insn_RR(cpucfg_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void asrtle_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtle_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); } ++ void asrtgt_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtgt_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); } ++ ++ void alsl_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ void alsl_wu(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (1 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bytepick_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(bytepick_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bytepick_d(Register rd, Register rj, Register rk, int sa3) { assert(is_uimm(sa3, 3), "not a unsigned 3-bit int"); emit_int32(insn_I8RR(bytepick_d_op, ( (sa3 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void add_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void add_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sub_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sub_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void slt (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(slt_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sltu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sltu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void maskeqz (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(maskeqz_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void masknez (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(masknez_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void nor (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(nor_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void AND (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(and_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void OR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(or_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void XOR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(xor_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void orn (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(orn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void andn(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(andn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void sll_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void srl_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sra_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sll_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void srl_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sra_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void rotr_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void rotr_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void mul_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mul_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_du (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulw_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulw_d_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void div_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void div_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void div_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void div_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void crc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void brk(int code) { assert(is_uimm(code, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(break_op, code)); } ++ ++ void alsl_d(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_d_op, ( (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void slli_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void slli_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srli_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srli_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srai_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srai_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void rotri_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void rotri_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void bstrins_w (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (0<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bstrpick_w (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (1<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bstrins_d (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrins_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bstrpick_d (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrpick_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void fadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmul_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmul_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fdiv_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fdiv_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmax_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmax_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmin_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmin_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmaxa_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmaxa_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmina_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmina_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void fscaleb_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fscaleb_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fcopysign_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fcopysign_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void fabs_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fabs_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fabs_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fabs_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fneg_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fneg_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fneg_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fneg_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void flogb_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(flogb_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void flogb_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(flogb_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fclass_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fclass_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fclass_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fclass_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fsqrt_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fsqrt_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frecip_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frecip_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frecip_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frecip_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frsqrt_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frsqrt_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fmov_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fmov_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fmov_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fmov_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void movgr2fr_w (FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2fr_w_op, (int)rj->encoding(), (int)fd->encoding())); } ++ void movgr2fr_d (FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2fr_d_op, (int)rj->encoding(), (int)fd->encoding())); } ++ void movgr2frh_w(FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2frh_w_op, (int)rj->encoding(), (int)fd->encoding())); } ++ void movfr2gr_s (Register rd, FloatRegister fj) { emit_int32(insn_RR(movfr2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); } ++ void movfr2gr_d (Register rd, FloatRegister fj) { emit_int32(insn_RR(movfr2gr_d_op, (int)fj->encoding(), (int)rd->encoding())); } ++ void movfrh2gr_s(Register rd, FloatRegister fj) { emit_int32(insn_RR(movfrh2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); } ++ void movgr2fcsr (int fcsr, Register rj) { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movgr2fcsr_op, (int)rj->encoding(), fcsr)); } ++ void movfcsr2gr (Register rd, int fcsr) { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movfcsr2gr_op, fcsr, (int)rd->encoding())); } ++ void movfr2cf (ConditionalFlagRegister cd, FloatRegister fj) { emit_int32(insn_RR(movfr2cf_op, (int)fj->encoding(), (int)cd->encoding())); } ++ void movcf2fr (FloatRegister fd, ConditionalFlagRegister cj) { emit_int32(insn_RR(movcf2fr_op, (int)cj->encoding(), (int)fd->encoding())); } ++ void movgr2cf (ConditionalFlagRegister cd, Register rj) { emit_int32(insn_RR(movgr2cf_op, (int)rj->encoding(), (int)cd->encoding())); } ++ void movcf2gr (Register rd, ConditionalFlagRegister cj) { emit_int32(insn_RR(movcf2gr_op, (int)cj->encoding(), (int)rd->encoding())); } ++ ++ void fcvt_s_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fcvt_s_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fcvt_d_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fcvt_d_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void ftintrm_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrm_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrm_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrm_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_s_w(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_s_w_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_s_l(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_s_l_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_d_w(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_d_w_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_d_l(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_d_l_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frint_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frint_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frint_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frint_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void slti (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(slti_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void sltui (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(sltui_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void addi_w(Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void addi_d(Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void lu52i_d(Register rd, Register rj, int si12) { /*assert(is_simm(si12, 12), "not a signed 12-bit int");*/ emit_int32(insn_I12RR(lu52i_d_op, simm12(si12), (int)rj->encoding(), (int)rd->encoding())); } ++ void andi (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(andi_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ori (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(ori_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } ++ void xori (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(xori_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void fmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void fcmp_caf_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cun_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_ceq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_clt_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cle_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cne_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cor_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_saf_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sun_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_seq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_slt_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sle_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sne_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sor_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ ++ void fcmp_caf_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cun_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_ceq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_clt_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cle_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cne_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cor_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_saf_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sun_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_seq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_slt_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sle_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sne_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sor_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ ++ void fsel (FloatRegister fd, FloatRegister fj, FloatRegister fk, ConditionalFlagRegister ca) { emit_int32(insn_RRRR(fsel_op, (int)ca->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void addu16i_d(Register rj, Register rd, int si16) { assert(is_simm(si16, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(addu16i_d_op, si16, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void lu12i_w(Register rj, int si20) { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu12i_w_op, simm20(si20), (int)rj->encoding())); } ++ void lu32i_d(Register rj, int si20) { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu32i_d_op, simm20(si20), (int)rj->encoding())); } ++ void pcaddi(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddi_op, si20, (int)rj->encoding())); } ++ void pcalau12i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcalau12i_op, si20, (int)rj->encoding())); } ++ void pcaddu12i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu12i_op, si20, (int)rj->encoding())); } ++ void pcaddu18i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu18i_op, si20, (int)rj->encoding())); } ++ ++ void ll_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void sc_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void ll_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void sc_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void ldptr_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void stptr_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void ldptr_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void stptr_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void ld_b (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_b_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_h (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_h_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_d (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_b (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_b_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_h (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_h_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_d (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_bu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_bu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_hu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_hu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_wu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_wu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void preld (int hint, Register rj, int si12) { assert(is_uimm(hint, 5), "not a unsigned 5-bit int"); assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(preld_op, si12, (int)rj->encoding(), hint)); } ++ void fld_s (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void fst_s (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void fld_d (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void fst_d (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void ldl_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldl_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ldr_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldr_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void ldx_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_bu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_bu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_hu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_hu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ void fldx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ void fstx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ void fstx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ ++ void ld_b (Register rd, Address src); ++ void ld_bu (Register rd, Address src); ++ void ld_d (Register rd, Address src); ++ void ld_h (Register rd, Address src); ++ void ld_hu (Register rd, Address src); ++ void ll_w (Register rd, Address src); ++ void ll_d (Register rd, Address src); ++ void ld_wu (Register rd, Address src); ++ void ld_w (Register rd, Address src); ++ void st_b (Register rd, Address dst); ++ void st_d (Register rd, Address dst); ++ void st_w (Register rd, Address dst); ++ void sc_w (Register rd, Address dst); ++ void sc_d (Register rd, Address dst); ++ void st_h (Register rd, Address dst); ++ void fld_s (FloatRegister fd, Address src); ++ void fld_d (FloatRegister fd, Address src); ++ void fst_s (FloatRegister fd, Address dst); ++ void fst_d (FloatRegister fd, Address dst); ++ ++ void amswap_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amswap_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rj->encoding())); } ++ void amand_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amand_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_wu (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_du (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_wu (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_du (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amswap_db_w(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amswap_db_d(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amand_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amand_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void dbar(int hint) { ++ assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); ++ ++ if (os::is_ActiveCoresMP()) ++ andi(R0, R0, 0); ++ else ++ emit_int32(insn_I15(dbar_op, hint)); ++ } ++ void ibar(int hint) { assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(ibar_op, hint)); } ++ ++ void fldgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void ldgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void beqz(Register rj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(beqz_op, offs, (int)rj->encoding())); } ++ void bnez(Register rj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bnez_op, offs, (int)rj->encoding())); } ++ void bceqz(ConditionalFlagRegister cj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b00<<3) | (int)cj->encoding()))); } ++ void bcnez(ConditionalFlagRegister cj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b01<<3) | (int)cj->encoding()))); } ++ ++ void jirl(Register rd, Register rj, int offs) { assert(is_simm(offs, 18) && ((offs & 3) == 0), "not a signed 18-bit int"); emit_int32(insn_I16RR(jirl_op, offs >> 2, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void b(int offs) { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(b_op, offs)); } ++ void bl(int offs) { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(bl_op, offs)); } ++ ++ ++ void beq(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(beq_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bne(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bne_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void blt(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(blt_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bge(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bge_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bltu(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bltu_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bgeu(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bgeu_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void beq (Register rj, Register rd, address entry) { beq (rj, rd, offset16(entry)); } ++ void bne (Register rj, Register rd, address entry) { bne (rj, rd, offset16(entry)); } ++ void blt (Register rj, Register rd, address entry) { blt (rj, rd, offset16(entry)); } ++ void bge (Register rj, Register rd, address entry) { bge (rj, rd, offset16(entry)); } ++ void bltu (Register rj, Register rd, address entry) { bltu (rj, rd, offset16(entry)); } ++ void bgeu (Register rj, Register rd, address entry) { bgeu (rj, rd, offset16(entry)); } ++ void beqz (Register rj, address entry) { beqz (rj, offset21(entry)); } ++ void bnez (Register rj, address entry) { bnez (rj, offset21(entry)); } ++ void b(address entry) { b(offset26(entry)); } ++ void bl(address entry) { bl(offset26(entry)); } ++ void bceqz(ConditionalFlagRegister cj, address entry) { bceqz(cj, offset21(entry)); } ++ void bcnez(ConditionalFlagRegister cj, address entry) { bcnez(cj, offset21(entry)); } ++ ++ void beq (Register rj, Register rd, Label& L) { beq (rj, rd, target(L)); } ++ void bne (Register rj, Register rd, Label& L) { bne (rj, rd, target(L)); } ++ void blt (Register rj, Register rd, Label& L) { blt (rj, rd, target(L)); } ++ void bge (Register rj, Register rd, Label& L) { bge (rj, rd, target(L)); } ++ void bltu (Register rj, Register rd, Label& L) { bltu (rj, rd, target(L)); } ++ void bgeu (Register rj, Register rd, Label& L) { bgeu (rj, rd, target(L)); } ++ void beqz (Register rj, Label& L) { beqz (rj, target(L)); } ++ void bnez (Register rj, Label& L) { bnez (rj, target(L)); } ++ void b(Label& L) { b(target(L)); } ++ void bl(Label& L) { bl(target(L)); } ++ void bceqz(ConditionalFlagRegister cj, Label& L) { bceqz(cj, target(L)); } ++ void bcnez(ConditionalFlagRegister cj, Label& L) { bcnez(cj, target(L)); } ++ ++ typedef enum { ++ // hint[4] ++ Completion = 0, ++ Ordering = (1 << 4), ++ ++ // The bitwise-not of the below constants is corresponding to the hint. This is convenient for OR operation. ++ // hint[3:2] and hint[1:0] ++ LoadLoad = ((1 << 3) | (1 << 1)), ++ LoadStore = ((1 << 3) | (1 << 0)), ++ StoreLoad = ((1 << 2) | (1 << 1)), ++ StoreStore = ((1 << 2) | (1 << 0)), ++ AnyAny = ((3 << 2) | (3 << 0)), ++ } Membar_mask_bits; ++ ++ // Serializes memory and blows flags ++ void membar(Membar_mask_bits hint) { ++ assert((hint & (3 << 0)) != 0, "membar mask unsupported!"); ++ assert((hint & (3 << 2)) != 0, "membar mask unsupported!"); ++ dbar(Ordering | (~hint & 0xf)); ++ } ++ ++ // LSX and LASX ++#define ASSERT_LSX assert(UseLSX, ""); ++#define ASSERT_LASX assert(UseLASX, ""); ++ ++ void vadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vaddi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vaddi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vaddi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vaddi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvaddi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvaddi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvaddi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvaddi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsubi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsubi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsubi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsubi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsubi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsubi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsubi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsubi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vneg_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_b_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vneg_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_h_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vneg_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_w_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vneg_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvneg_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvneg_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvneg_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvneg_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vabsd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vabsd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vabsd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vabsd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvabsd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvabsd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvabsd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvabsd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmax_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmax_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmax_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmax_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmax_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmax_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmin_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmin_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmin_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmin_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmin_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmin_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmul_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmul_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmul_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmul_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmul_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmul_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmuh_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmuh_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmuh_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmuh_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmulwev_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwev_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwev_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwev_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmulwev_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwev_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwev_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwev_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmulwod_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwod_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwod_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwod_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmulwod_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwod_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwod_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwod_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vext2xv_h_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_h_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_w_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_d_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_w_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_d_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vext2xv_hu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_hu_bu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_wu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_bu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_du_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_bu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_wu_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_hu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_du_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_hu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_du_wu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_wu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vldi(FloatRegister vd, int i13) { ASSERT_LSX emit_int32(insn_I13R( vldi_op, i13, (int)vd->encoding())); } ++ void xvldi(FloatRegister xd, int i13) { ASSERT_LASX emit_int32(insn_I13R(xvldi_op, i13, (int)xd->encoding())); } ++ ++ void vand_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vand_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvand_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvand_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vxor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vxor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvxor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvxor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vnor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vnor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvnor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvnor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vandn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vandn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvandn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvandn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vorn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vorn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvorn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvorn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vandi_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vandi_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvandi_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvandi_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vxori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vxori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvxori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvxori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vnori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vnori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvnori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvnori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsll_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsll_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsll_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsll_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsll_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsll_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsll_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsll_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vslli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vslli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vslli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvslli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvslli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvslli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrl_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrl_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrl_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrl_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsrl_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrl_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrl_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrl_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vsrli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsrli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsra_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsra_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsra_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsra_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsra_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsra_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsra_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsra_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrai_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vsrai_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrai_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrai_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrai_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrai_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrai_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrai_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsrai_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrai_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrai_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrai_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrai_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrai_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrai_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrai_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vrotr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvrotr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vrotri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vrotri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vrotri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vrotri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vrotri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvrotri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvrotri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvrotri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvrotri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvrotri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrlni_b_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrlni_b_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrlni_h_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrlni_h_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrlni_w_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrlni_w_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrlni_d_q(FloatRegister vd, FloatRegister vj, int ui7) { ASSERT_LSX emit_int32(insn_I7RR( vsrlni_d_q_op, ui7, (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void vpcnt_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_b_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vpcnt_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_h_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vpcnt_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_w_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vpcnt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvpcnt_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpcnt_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpcnt_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpcnt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitclr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitclr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitclri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitclri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitclri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitclri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitclri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitclri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitclri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitclri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitclri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitclri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitset_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitset_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitset_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitset_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitset_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitset_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitset_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitset_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitseti_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitseti_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitseti_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitseti_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitseti_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitseti_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitseti_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitseti_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitseti_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitseti_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitseti_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitseti_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitseti_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitseti_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitseti_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitseti_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitrev_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrev_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrev_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrev_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitrev_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrev_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrev_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrev_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitrevi_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitrevi_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrevi_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitrevi_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrevi_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitrevi_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrevi_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitrevi_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitrevi_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitrevi_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrevi_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitrevi_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrevi_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitrevi_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrevi_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitrevi_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfadd_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfsub_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmul_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmul_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmul_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfdiv_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfdiv_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfdiv_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfdiv_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfdiv_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfdiv_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfnmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfnmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfnmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfnmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfnmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfnmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfnmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfnmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmax_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmax_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmax_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmin_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmin_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmin_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfclass_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfclass_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfclass_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfclass_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfclass_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfclass_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfsqrt_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfsqrt_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfsqrt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfsqrt_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfsqrt_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfsqrt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcvtl_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvtl_s_h_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vfcvtl_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvtl_d_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvfcvtl_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_s_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcvtl_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_d_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcvth_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvth_s_h_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vfcvth_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvth_d_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvfcvth_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_s_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcvth_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_d_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcvt_h_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfcvt_h_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcvt_s_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfcvt_s_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfcvt_h_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_h_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcvt_s_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_s_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrne_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrne_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrne_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrne_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrne_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrne_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrz_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrz_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrz_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrz_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrz_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrz_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrp_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrp_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrp_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrp_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrp_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrp_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrm_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrm_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrm_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrm_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrm_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrm_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrint_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrint_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrint_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrint_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrint_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrint_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrne_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrne_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrne_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrne_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrne_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrne_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrz_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrz_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrz_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrz_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrz_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrz_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrp_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrp_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrp_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrp_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrp_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrp_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrm_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrm_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrm_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrm_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrm_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrm_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftint_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftint_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftint_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftint_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftint_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftint_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrne_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrne_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrne_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrne_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrz_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrz_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrz_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrz_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrp_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrp_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrp_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrp_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrm_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrm_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrm_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrm_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftint_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftint_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftint_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftint_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrnel_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrnel_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrnel_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrnel_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrneh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrneh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrneh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrneh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrzl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrzl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrzl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrzh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrzh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrzh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrpl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrpl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrpl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrpl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrph_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrph_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrph_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrph_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrml_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrml_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrml_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrml_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrmh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrmh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrmh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrmh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftinth_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftinth_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftinth_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftinth_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffint_s_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffint_s_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vffint_d_l(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffint_d_l_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvffint_s_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_s_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvffint_d_l(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_d_l_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffint_s_l(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vffint_s_l_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvffint_s_l(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvffint_s_l_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffintl_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffintl_d_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvffintl_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffintl_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffinth_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffinth_d_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvffinth_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffinth_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vseq_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vseq_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vseq_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vseq_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvseq_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvseq_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvseq_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvseq_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsle_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsle_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsle_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsle_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslt_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslt_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslt_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslt_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslti_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslti_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslti_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslti_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslti_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslti_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslti_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslti_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcmp_caf_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cun_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_ceq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_clt_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cle_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cne_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cor_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_saf_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sun_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_seq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_slt_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sle_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sne_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sor_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void vfcmp_caf_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cun_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_ceq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_clt_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cle_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cne_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cor_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_saf_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sun_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_seq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_slt_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sle_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sne_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sor_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void xvfcmp_caf_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cun_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_ceq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_clt_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cle_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cne_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cor_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_saf_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sun_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_seq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_slt_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sle_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sne_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sor_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvfcmp_caf_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cun_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_ceq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_clt_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cle_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cne_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cor_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_saf_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sun_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_seq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_slt_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sle_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sne_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sor_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitsel_v(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vbitsel_v_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitsel_v(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvbitsel_v_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vinsgr2vr_b(FloatRegister vd, Register rj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vinsgr2vr_b_op, ui4, (int)rj->encoding(), (int)vd->encoding())); } ++ void vinsgr2vr_h(FloatRegister vd, Register rj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vinsgr2vr_h_op, ui3, (int)rj->encoding(), (int)vd->encoding())); } ++ void vinsgr2vr_w(FloatRegister vd, Register rj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vinsgr2vr_w_op, ui2, (int)rj->encoding(), (int)vd->encoding())); } ++ void vinsgr2vr_d(FloatRegister vd, Register rj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vinsgr2vr_d_op, ui1, (int)rj->encoding(), (int)vd->encoding())); } ++ ++ void xvinsgr2vr_w(FloatRegister xd, Register rj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsgr2vr_w_op, ui3, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvinsgr2vr_d(FloatRegister xd, Register rj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsgr2vr_d_op, ui2, (int)rj->encoding(), (int)xd->encoding())); } ++ ++ void vpickve2gr_b(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vpickve2gr_b_op, ui4, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_h(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vpickve2gr_h_op, ui3, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_w(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vpickve2gr_w_op, ui2, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_d(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vpickve2gr_d_op, ui1, (int)vj->encoding(), (int)rd->encoding())); } ++ ++ void vpickve2gr_bu(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vpickve2gr_bu_op, ui4, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_hu(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vpickve2gr_hu_op, ui3, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_wu(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vpickve2gr_wu_op, ui2, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_du(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vpickve2gr_du_op, ui1, (int)vj->encoding(), (int)rd->encoding())); } ++ ++ void xvpickve2gr_w(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_w_op, ui3, (int)xj->encoding(), (int)rd->encoding())); } ++ void xvpickve2gr_d(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_d_op, ui2, (int)xj->encoding(), (int)rd->encoding())); } ++ ++ void xvpickve2gr_wu(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_wu_op, ui3, (int)xj->encoding(), (int)rd->encoding())); } ++ void xvpickve2gr_du(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_du_op, ui2, (int)xj->encoding(), (int)rd->encoding())); } ++ ++ void vreplgr2vr_b(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_b_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vreplgr2vr_h(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_h_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vreplgr2vr_w(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vreplgr2vr_d(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvreplgr2vr_b(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_b_op, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvreplgr2vr_h(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_h_op, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvreplgr2vr_w(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_w_op, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvreplgr2vr_d(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_d_op, (int)rj->encoding(), (int)xd->encoding())); } ++ ++ void vreplvei_b(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR(vreplvei_b_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vreplvei_h(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR(vreplvei_h_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vreplvei_w(FloatRegister vd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR(vreplvei_w_op, ui2, (int)vj->encoding(), (int)vd->encoding())); } ++ void vreplvei_d(FloatRegister vd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR(vreplvei_d_op, ui1, (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void xvreplve0_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_q(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_q_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvinsve0_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsve0_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvinsve0_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsve0_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvpickve_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpickve_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf_b(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vshuf_b_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvshuf_b(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvshuf_b_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void xvshuf_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvperm_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvperm_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf4i_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf4i_h(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_h_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf4i_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvshuf4i_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf4i_h(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_h_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf4i_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf4i_d(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_d_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvshuf4i_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vpermi_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vpermi_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvpermi_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvpermi_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvpermi_q(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_q_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vld(FloatRegister vd, Register rj, int si12) { ASSERT_LSX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vld_op, si12, (int)rj->encoding(), (int)vd->encoding()));} ++ void xvld(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvld_op, si12, (int)rj->encoding(), (int)xd->encoding()));} ++ ++ void vst(FloatRegister vd, Register rj, int si12) { ASSERT_LSX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vst_op, si12, (int)rj->encoding(), (int)vd->encoding()));} ++ void xvst(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvst_op, si12, (int)rj->encoding(), (int)xd->encoding()));} ++ ++ void vldx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX emit_int32(insn_RRR( vldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); } ++ void xvldx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); } ++ ++ void vstx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX emit_int32(insn_RRR( vstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); } ++ void xvstx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); } ++ ++#undef ASSERT_LSX ++#undef ASSERT_LASX ++ ++public: ++ // Creation ++ Assembler(CodeBuffer* code) : AbstractAssembler(code) {} ++ ++ // Decoding ++ static address locate_operand(address inst, WhichOperand which); ++ static address locate_next_instruction(address inst); ++}; ++ ++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp +--- a/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp 2024-01-30 10:00:11.834765144 +0800 +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/bytes_loongarch.hpp b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/bytes_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp 2024-01-30 10:00:11.834765144 +0800 +@@ -0,0 +1,73 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_BYTES_LOONGARCH_HPP ++#define CPU_LOONGARCH_BYTES_LOONGARCH_HPP ++ ++#include "memory/allocation.hpp" ++ ++class Bytes: AllStatic { ++ public: ++ // Returns true if the byte ordering used by Java is different from the native byte ordering ++ // of the underlying machine. For example, this is true for Intel x86, but false for Solaris ++ // on Sparc. ++ // we use LoongArch, so return true ++ static inline bool is_Java_byte_ordering_different(){ return true; } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering ++ // (no special code is needed since LoongArch CPUs can access unaligned data) ++ static inline u2 get_native_u2(address p) { return *(u2*)p; } ++ static inline u4 get_native_u4(address p) { return *(u4*)p; } ++ static inline u8 get_native_u8(address p) { return *(u8*)p; } ++ ++ static inline void put_native_u2(address p, u2 x) { *(u2*)p = x; } ++ static inline void put_native_u4(address p, u4 x) { *(u4*)p = x; } ++ static inline void put_native_u8(address p, u8 x) { *(u8*)p = x; } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in Java ++ // byte ordering (i.e. big-endian ordering). Byte-order reversal is ++ // needed since LoongArch64 CPUs use little-endian format. ++ static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } ++ static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } ++ static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } ++ ++ static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } ++ static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } ++ static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } ++ ++ ++ // Efficient swapping of byte ordering ++ static inline u2 swap_u2(u2 x); // compiler-dependent implementation ++ static inline u4 swap_u4(u4 x); // compiler-dependent implementation ++ static inline u8 swap_u8(u8 x); ++}; ++ ++ ++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] ++#include OS_CPU_HEADER_INLINE(bytes) ++ ++#endif // CPU_LOONGARCH_BYTES_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp 2024-01-30 10:00:11.834765144 +0800 +@@ -0,0 +1,344 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "c1/c1_CodeStubs.hpp" ++#include "c1/c1_FrameMap.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "classfile/javaClasses.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#define __ ce->masm()-> ++ ++void CounterOverflowStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ Metadata *m = _method->as_constant_ptr()->as_metadata(); ++ __ mov_metadata(SCR2, m); ++ ce->store_parameter(SCR2, 1); ++ ce->store_parameter(_bci, 0); ++ __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ __ b(_continuation); ++} ++ ++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) ++ : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) { ++ assert(info != NULL, "must have info"); ++ _info = new CodeEmitInfo(info); ++} ++ ++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) ++ : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { ++ assert(info != NULL, "must have info"); ++ _info = new CodeEmitInfo(info); ++} ++ ++void RangeCheckStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ if (_info->deoptimize_on_exception()) { ++ address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); ++ __ call(a, relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++ return; ++ } ++ ++ if (_index->is_cpu_register()) { ++ __ move(SCR1, _index->as_register()); ++ } else { ++ __ li(SCR1, _index->as_jint()); ++ } ++ Runtime1::StubID stub_id; ++ if (_throw_index_out_of_bounds_exception) { ++ stub_id = Runtime1::throw_index_exception_id; ++ } else { ++ assert(_array != NULL, "sanity"); ++ __ move(SCR2, _array->as_pointer_register()); ++ stub_id = Runtime1::throw_range_check_failed_id; ++ } ++ __ call(Runtime1::entry_for(stub_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { ++ _info = new CodeEmitInfo(info); ++} ++ ++void PredicateFailedStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); ++ __ call(a, relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++void DivByZeroStub::emit_code(LIR_Assembler* ce) { ++ if (_offset != -1) { ++ ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); ++ } ++ __ bind(_entry); ++ __ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++#ifdef ASSERT ++ __ should_not_reach_here(); ++#endif ++} ++ ++// Implementation of NewInstanceStub ++ ++NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, ++ CodeEmitInfo* info, Runtime1::StubID stub_id) { ++ _result = result; ++ _klass = klass; ++ _klass_reg = klass_reg; ++ _info = new CodeEmitInfo(info); ++ assert(stub_id == Runtime1::new_instance_id || ++ stub_id == Runtime1::fast_new_instance_id || ++ stub_id == Runtime1::fast_new_instance_init_check_id, ++ "need new_instance id"); ++ _stub_id = stub_id; ++} ++ ++void NewInstanceStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ __ move(A3, _klass_reg->as_register()); ++ __ call(Runtime1::entry_for(_stub_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ assert(_result->as_register() == A0, "result must in A0"); ++ __ b(_continuation); ++} ++ ++// Implementation of NewTypeArrayStub ++ ++NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, ++ CodeEmitInfo* info) { ++ _klass_reg = klass_reg; ++ _length = length; ++ _result = result; ++ _info = new CodeEmitInfo(info); ++} ++ ++void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ assert(_length->as_register() == S0, "length must in S0,"); ++ assert(_klass_reg->as_register() == A3, "klass_reg must in A3"); ++ __ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ assert(_result->as_register() == A0, "result must in A0"); ++ __ b(_continuation); ++} ++ ++// Implementation of NewObjectArrayStub ++ ++NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, ++ CodeEmitInfo* info) { ++ _klass_reg = klass_reg; ++ _result = result; ++ _length = length; ++ _info = new CodeEmitInfo(info); ++} ++ ++void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ assert(_length->as_register() == S0, "length must in S0,"); ++ assert(_klass_reg->as_register() == A3, "klass_reg must in A3"); ++ __ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ assert(_result->as_register() == A0, "result must in A0"); ++ __ b(_continuation); ++} ++ ++// Implementation of MonitorAccessStubs ++ ++MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) ++ : MonitorAccessStub(obj_reg, lock_reg) { ++ _info = new CodeEmitInfo(info); ++} ++ ++void MonitorEnterStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ ce->store_parameter(_obj_reg->as_register(), 1); ++ ce->store_parameter(_lock_reg->as_register(), 0); ++ Runtime1::StubID enter_id; ++ if (ce->compilation()->has_fpu_code()) { ++ enter_id = Runtime1::monitorenter_id; ++ } else { ++ enter_id = Runtime1::monitorenter_nofpu_id; ++ } ++ __ call(Runtime1::entry_for(enter_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ __ b(_continuation); ++} ++ ++void MonitorExitStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ if (_compute_lock) { ++ // lock_reg was destroyed by fast unlocking attempt => recompute it ++ ce->monitor_address(_monitor_ix, _lock_reg); ++ } ++ ce->store_parameter(_lock_reg->as_register(), 0); ++ // note: non-blocking leaf routine => no call info needed ++ Runtime1::StubID exit_id; ++ if (ce->compilation()->has_fpu_code()) { ++ exit_id = Runtime1::monitorexit_id; ++ } else { ++ exit_id = Runtime1::monitorexit_nofpu_id; ++ } ++ __ lipc(RA, _continuation); ++ __ jmp(Runtime1::entry_for(exit_id), relocInfo::runtime_call_type); ++} ++ ++// Implementation of patching: ++// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) ++// - Replace original code with a call to the stub ++// At Runtime: ++// - call to stub, jump to runtime ++// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object) ++// - in runtime: after initializing class, restore original code, reexecute instruction ++ ++int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; ++ ++void PatchingStub::align_patch_site(MacroAssembler* masm) { ++} ++ ++void PatchingStub::emit_code(LIR_Assembler* ce) { ++ assert(false, "LoongArch64 should not use C1 runtime patching"); ++} ++ ++void DeoptimizeStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ ce->store_parameter(_trap_request, 0); ++ __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ DEBUG_ONLY(__ should_not_reach_here()); ++} ++ ++void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { ++ address a; ++ if (_info->deoptimize_on_exception()) { ++ // Deoptimize, do not throw the exception, because it is probably wrong to do it here. ++ a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); ++ } else { ++ a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id); ++ } ++ ++ ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); ++ __ bind(_entry); ++ __ call(a, relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ ++ __ bind(_entry); ++ // pass the object in a scratch register because all other registers ++ // must be preserved ++ if (_obj->is_cpu_register()) { ++ __ move(SCR1, _obj->as_register()); ++ } ++ __ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++void ArrayCopyStub::emit_code(LIR_Assembler* ce) { ++ //---------------slow case: call to native----------------- ++ __ bind(_entry); ++ // Figure out where the args should go ++ // This should really convert the IntrinsicID to the Method* and signature ++ // but I don't know how to do that. ++ // ++ VMRegPair args[5]; ++ BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT}; ++ SharedRuntime::java_calling_convention(signature, args, 5, true); ++ ++ // push parameters ++ // (src, src_pos, dest, destPos, length) ++ Register r[5]; ++ r[0] = src()->as_register(); ++ r[1] = src_pos()->as_register(); ++ r[2] = dst()->as_register(); ++ r[3] = dst_pos()->as_register(); ++ r[4] = length()->as_register(); ++ ++ // next registers will get stored on the stack ++ for (int i = 0; i < 5 ; i++ ) { ++ VMReg r_1 = args[i].first(); ++ if (r_1->is_stack()) { ++ int st_off = r_1->reg2stack() * wordSize; ++ __ stptr_d (r[i], SP, st_off); ++ } else { ++ assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg "); ++ } ++ } ++ ++ ce->align_call(lir_static_call); ++ ++ ce->emit_static_call_stub(); ++ if (ce->compilation()->bailed_out()) { ++ return; // CodeCache is full ++ } ++ AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(), ++ relocInfo::static_call_type); ++ address call = __ trampoline_call(resolve); ++ if (call == NULL) { ++ ce->bailout("trampoline stub overflow"); ++ return; ++ } ++ ce->add_call_info_here(info()); ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ li(SCR2, (address)&Runtime1::_arraycopy_slowcase_cnt); ++ __ increment(Address(SCR2)); ++ } ++#endif ++ ++ __ b(_continuation); ++} ++ ++#undef __ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp 2024-01-30 10:00:11.834765144 +0800 +@@ -0,0 +1,79 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP ++ ++// native word offsets from memory address (little endian) ++enum { ++ pd_lo_word_offset_in_bytes = 0, ++ pd_hi_word_offset_in_bytes = BytesPerWord ++}; ++ ++// explicit rounding operations are required to implement the strictFP mode ++enum { ++ pd_strict_fp_requires_explicit_rounding = false ++}; ++ ++// FIXME: There are no callee-saved ++ ++// registers ++enum { ++ pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers, // number of registers used during code emission ++ pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of registers used during code emission ++ ++ pd_nof_caller_save_cpu_regs_frame_map = 15, // number of registers killed by calls ++ pd_nof_caller_save_fpu_regs_frame_map = 32, // number of registers killed by calls ++ ++ pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map, ++ pd_last_callee_saved_reg = 21, ++ ++ pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1, ++ ++ pd_nof_cpu_regs_reg_alloc = pd_nof_caller_save_cpu_regs_frame_map, // number of registers that are visible to register allocator ++ pd_nof_fpu_regs_reg_alloc = 32, // number of registers that are visible to register allocator ++ ++ pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan ++ pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan ++ pd_nof_xmm_regs_linearscan = 0, // don't have vector registers ++ pd_first_cpu_reg = 0, ++ pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1, ++ pd_first_byte_reg = 0, ++ pd_last_byte_reg = pd_nof_cpu_regs_reg_alloc - 1, ++ pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, ++ pd_last_fpu_reg = pd_first_fpu_reg + 31, ++ ++ pd_first_callee_saved_fpu_reg = 24 + pd_first_fpu_reg, ++ pd_last_callee_saved_fpu_reg = 31 + pd_first_fpu_reg, ++}; ++ ++// Encoding of float value in debug info. This is true on x86 where ++// floats are extended to doubles when stored in the stack, false for ++// LoongArch64 where floats and doubles are stored in their native form. ++enum { ++ pd_float_saved_as_double = false ++}; ++ ++#endif // CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp 2024-01-30 10:00:11.834765144 +0800 +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++//-------------------------------------------------------- ++// FpuStackSim ++//-------------------------------------------------------- ++ ++// No FPU stack on LoongArch64 ++#include "precompiled.hpp" +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp 2024-01-30 10:00:11.834765144 +0800 +@@ -0,0 +1,32 @@ ++/* ++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP ++ ++// No FPU stack on LoongArch ++class FpuStackSim; ++ ++#endif // CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp 2024-01-30 10:00:11.834765144 +0800 +@@ -0,0 +1,354 @@ ++/* ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_FrameMap.hpp" ++#include "c1/c1_LIR.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { ++ LIR_Opr opr = LIR_OprFact::illegalOpr; ++ VMReg r_1 = reg->first(); ++ VMReg r_2 = reg->second(); ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset ++ // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value ++ // so we must add it in here. ++ int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++ opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type)); ++ } else if (r_1->is_Register()) { ++ Register reg = r_1->as_Register(); ++ if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) { ++ Register reg2 = r_2->as_Register(); ++ assert(reg2 == reg, "must be same register"); ++ opr = as_long_opr(reg); ++ } else if (is_reference_type(type)) { ++ opr = as_oop_opr(reg); ++ } else if (type == T_METADATA) { ++ opr = as_metadata_opr(reg); ++ } else if (type == T_ADDRESS) { ++ opr = as_address_opr(reg); ++ } else { ++ opr = as_opr(reg); ++ } ++ } else if (r_1->is_FloatRegister()) { ++ assert(type == T_DOUBLE || type == T_FLOAT, "wrong type"); ++ int num = r_1->as_FloatRegister()->encoding(); ++ if (type == T_FLOAT) { ++ opr = LIR_OprFact::single_fpu(num); ++ } else { ++ opr = LIR_OprFact::double_fpu(num); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ return opr; ++} ++ ++LIR_Opr FrameMap::r0_opr; ++LIR_Opr FrameMap::ra_opr; ++LIR_Opr FrameMap::tp_opr; ++LIR_Opr FrameMap::sp_opr; ++LIR_Opr FrameMap::a0_opr; ++LIR_Opr FrameMap::a1_opr; ++LIR_Opr FrameMap::a2_opr; ++LIR_Opr FrameMap::a3_opr; ++LIR_Opr FrameMap::a4_opr; ++LIR_Opr FrameMap::a5_opr; ++LIR_Opr FrameMap::a6_opr; ++LIR_Opr FrameMap::a7_opr; ++LIR_Opr FrameMap::t0_opr; ++LIR_Opr FrameMap::t1_opr; ++LIR_Opr FrameMap::t2_opr; ++LIR_Opr FrameMap::t3_opr; ++LIR_Opr FrameMap::t4_opr; ++LIR_Opr FrameMap::t5_opr; ++LIR_Opr FrameMap::t6_opr; ++LIR_Opr FrameMap::t7_opr; ++LIR_Opr FrameMap::t8_opr; ++LIR_Opr FrameMap::rx_opr; ++LIR_Opr FrameMap::fp_opr; ++LIR_Opr FrameMap::s0_opr; ++LIR_Opr FrameMap::s1_opr; ++LIR_Opr FrameMap::s2_opr; ++LIR_Opr FrameMap::s3_opr; ++LIR_Opr FrameMap::s4_opr; ++LIR_Opr FrameMap::s5_opr; ++LIR_Opr FrameMap::s6_opr; ++LIR_Opr FrameMap::s7_opr; ++LIR_Opr FrameMap::s8_opr; ++ ++LIR_Opr FrameMap::receiver_opr; ++ ++LIR_Opr FrameMap::ra_oop_opr; ++LIR_Opr FrameMap::a0_oop_opr; ++LIR_Opr FrameMap::a1_oop_opr; ++LIR_Opr FrameMap::a2_oop_opr; ++LIR_Opr FrameMap::a3_oop_opr; ++LIR_Opr FrameMap::a4_oop_opr; ++LIR_Opr FrameMap::a5_oop_opr; ++LIR_Opr FrameMap::a6_oop_opr; ++LIR_Opr FrameMap::a7_oop_opr; ++LIR_Opr FrameMap::t0_oop_opr; ++LIR_Opr FrameMap::t1_oop_opr; ++LIR_Opr FrameMap::t2_oop_opr; ++LIR_Opr FrameMap::t3_oop_opr; ++LIR_Opr FrameMap::t4_oop_opr; ++LIR_Opr FrameMap::t5_oop_opr; ++LIR_Opr FrameMap::t6_oop_opr; ++LIR_Opr FrameMap::t7_oop_opr; ++LIR_Opr FrameMap::t8_oop_opr; ++LIR_Opr FrameMap::fp_oop_opr; ++LIR_Opr FrameMap::s0_oop_opr; ++LIR_Opr FrameMap::s1_oop_opr; ++LIR_Opr FrameMap::s2_oop_opr; ++LIR_Opr FrameMap::s3_oop_opr; ++LIR_Opr FrameMap::s4_oop_opr; ++LIR_Opr FrameMap::s5_oop_opr; ++LIR_Opr FrameMap::s6_oop_opr; ++LIR_Opr FrameMap::s7_oop_opr; ++LIR_Opr FrameMap::s8_oop_opr; ++ ++LIR_Opr FrameMap::scr1_opr; ++LIR_Opr FrameMap::scr2_opr; ++LIR_Opr FrameMap::scr1_long_opr; ++LIR_Opr FrameMap::scr2_long_opr; ++ ++LIR_Opr FrameMap::a0_metadata_opr; ++LIR_Opr FrameMap::a1_metadata_opr; ++LIR_Opr FrameMap::a2_metadata_opr; ++LIR_Opr FrameMap::a3_metadata_opr; ++LIR_Opr FrameMap::a4_metadata_opr; ++LIR_Opr FrameMap::a5_metadata_opr; ++ ++LIR_Opr FrameMap::long0_opr; ++LIR_Opr FrameMap::long1_opr; ++LIR_Opr FrameMap::fpu0_float_opr; ++LIR_Opr FrameMap::fpu0_double_opr; ++ ++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0 }; ++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0 }; ++ ++//-------------------------------------------------------- ++// FrameMap ++//-------------------------------------------------------- ++ ++void FrameMap::initialize() { ++ assert(!_init_done, "once"); ++ int i = 0; ++ ++ // caller save register ++ map_register(i, A0); a0_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A1); a1_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A2); a2_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A3); a3_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A4); a4_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A5); a5_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A6); a6_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A7); a7_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T0); t0_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T1); t1_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T2); t2_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T3); t3_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T5); t5_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T6); t6_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T8); t8_opr = LIR_OprFact::single_cpu(i); i++; ++ ++ // callee save register ++ map_register(i, S0); s0_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S1); s1_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S2); s2_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S3); s3_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S4); s4_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S7); s7_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S8); s8_opr = LIR_OprFact::single_cpu(i); i++; ++ ++ // special register ++ map_register(i, S5); s5_opr = LIR_OprFact::single_cpu(i); i++; // heapbase ++ map_register(i, S6); s6_opr = LIR_OprFact::single_cpu(i); i++; // thread ++ map_register(i, TP); tp_opr = LIR_OprFact::single_cpu(i); i++; // tp ++ map_register(i, FP); fp_opr = LIR_OprFact::single_cpu(i); i++; // fp ++ map_register(i, RA); ra_opr = LIR_OprFact::single_cpu(i); i++; // ra ++ map_register(i, SP); sp_opr = LIR_OprFact::single_cpu(i); i++; // sp ++ ++ // tmp register ++ map_register(i, T7); t7_opr = LIR_OprFact::single_cpu(i); i++; // scr1 ++ map_register(i, T4); t4_opr = LIR_OprFact::single_cpu(i); i++; // scr2 ++ ++ scr1_opr = t7_opr; ++ scr2_opr = t4_opr; ++ scr1_long_opr = LIR_OprFact::double_cpu(t7_opr->cpu_regnr(), t7_opr->cpu_regnr()); ++ scr2_long_opr = LIR_OprFact::double_cpu(t4_opr->cpu_regnr(), t4_opr->cpu_regnr()); ++ ++ long0_opr = LIR_OprFact::double_cpu(a0_opr->cpu_regnr(), a0_opr->cpu_regnr()); ++ long1_opr = LIR_OprFact::double_cpu(a1_opr->cpu_regnr(), a1_opr->cpu_regnr()); ++ ++ fpu0_float_opr = LIR_OprFact::single_fpu(0); ++ fpu0_double_opr = LIR_OprFact::double_fpu(0); ++ ++ // scr1, scr2 not included ++ _caller_save_cpu_regs[0] = a0_opr; ++ _caller_save_cpu_regs[1] = a1_opr; ++ _caller_save_cpu_regs[2] = a2_opr; ++ _caller_save_cpu_regs[3] = a3_opr; ++ _caller_save_cpu_regs[4] = a4_opr; ++ _caller_save_cpu_regs[5] = a5_opr; ++ _caller_save_cpu_regs[6] = a6_opr; ++ _caller_save_cpu_regs[7] = a7_opr; ++ _caller_save_cpu_regs[8] = t0_opr; ++ _caller_save_cpu_regs[9] = t1_opr; ++ _caller_save_cpu_regs[10] = t2_opr; ++ _caller_save_cpu_regs[11] = t3_opr; ++ _caller_save_cpu_regs[12] = t5_opr; ++ _caller_save_cpu_regs[13] = t6_opr; ++ _caller_save_cpu_regs[14] = t8_opr; ++ ++ for (int i = 0; i < 8; i++) { ++ _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); ++ } ++ ++ _init_done = true; ++ ++ ra_oop_opr = as_oop_opr(RA); ++ a0_oop_opr = as_oop_opr(A0); ++ a1_oop_opr = as_oop_opr(A1); ++ a2_oop_opr = as_oop_opr(A2); ++ a3_oop_opr = as_oop_opr(A3); ++ a4_oop_opr = as_oop_opr(A4); ++ a5_oop_opr = as_oop_opr(A5); ++ a6_oop_opr = as_oop_opr(A6); ++ a7_oop_opr = as_oop_opr(A7); ++ t0_oop_opr = as_oop_opr(T0); ++ t1_oop_opr = as_oop_opr(T1); ++ t2_oop_opr = as_oop_opr(T2); ++ t3_oop_opr = as_oop_opr(T3); ++ t4_oop_opr = as_oop_opr(T4); ++ t5_oop_opr = as_oop_opr(T5); ++ t6_oop_opr = as_oop_opr(T6); ++ t7_oop_opr = as_oop_opr(T7); ++ t8_oop_opr = as_oop_opr(T8); ++ fp_oop_opr = as_oop_opr(FP); ++ s0_oop_opr = as_oop_opr(S0); ++ s1_oop_opr = as_oop_opr(S1); ++ s2_oop_opr = as_oop_opr(S2); ++ s3_oop_opr = as_oop_opr(S3); ++ s4_oop_opr = as_oop_opr(S4); ++ s5_oop_opr = as_oop_opr(S5); ++ s6_oop_opr = as_oop_opr(S6); ++ s7_oop_opr = as_oop_opr(S7); ++ s8_oop_opr = as_oop_opr(S8); ++ ++ a0_metadata_opr = as_metadata_opr(A0); ++ a1_metadata_opr = as_metadata_opr(A1); ++ a2_metadata_opr = as_metadata_opr(A2); ++ a3_metadata_opr = as_metadata_opr(A3); ++ a4_metadata_opr = as_metadata_opr(A4); ++ a5_metadata_opr = as_metadata_opr(A5); ++ ++ sp_opr = as_pointer_opr(SP); ++ fp_opr = as_pointer_opr(FP); ++ ++ VMRegPair regs; ++ BasicType sig_bt = T_OBJECT; ++ SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true); ++ receiver_opr = as_oop_opr(regs.first()->as_Register()); ++ ++ for (int i = 0; i < nof_caller_save_fpu_regs; i++) { ++ _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); ++ } ++} ++ ++Address FrameMap::make_new_address(ByteSize sp_offset) const { ++ // for sp, based address use this: ++ // return Address(sp, in_bytes(sp_offset) - (framesize() - 2) * 4); ++ return Address(SP, in_bytes(sp_offset)); ++} ++ ++// ----------------mapping----------------------- ++// all mapping is based on fp addressing, except for simple leaf methods where we access ++// the locals sp based (and no frame is built) ++ ++// Frame for simple leaf methods (quick entries) ++// ++// +----------+ ++// | ret addr | <- TOS ++// +----------+ ++// | args | ++// | ...... | ++ ++// Frame for standard methods ++// ++// | .........| <- TOS ++// | locals | ++// +----------+ ++// | old fp, | <- RFP ++// +----------+ ++// | ret addr | ++// +----------+ ++// | args | ++// | .........| ++ ++// For OopMaps, map a local variable or spill index to an VMRegImpl name. ++// This is the offset from sp() in the frame of the slot for the index, ++// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.) ++// ++// framesize + ++// stack0 stack0 0 <- VMReg ++// | | | ++// ...........|..............|.............| ++// 0 1 2 3 x x 4 5 6 ... | <- local indices ++// ^ ^ sp() ( x x indicate link ++// | | and return addr) ++// arguments non-argument locals ++ ++VMReg FrameMap::fpu_regname(int n) { ++ // Return the OptoReg name for the fpu stack slot "n" ++ // A spilled fpu stack slot comprises to two single-word OptoReg's. ++ return as_FloatRegister(n)->as_VMReg(); ++} ++ ++LIR_Opr FrameMap::stack_pointer() { ++ return FrameMap::sp_opr; ++} ++ ++// JSR 292 ++LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { ++ return LIR_OprFact::illegalOpr; // Not needed on LoongArch64 ++} ++ ++bool FrameMap::validate_frame() { ++ return true; ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp 2024-01-30 10:00:11.834765144 +0800 +@@ -0,0 +1,143 @@ ++/* ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP ++ ++// On LoongArch64 the frame looks as follows: ++// ++// +-----------------------------+---------+----------------------------------------+----------------+----------- ++// | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling . ++// +-----------------------------+---------+----------------------------------------+----------------+----------- ++ ++ public: ++ static const int pd_c_runtime_reserved_arg_size; ++ ++ enum { ++ first_available_sp_in_frame = 0, ++ frame_pad_in_bytes = 16, ++ nof_reg_args = 8 ++ }; ++ ++ public: ++ static LIR_Opr receiver_opr; ++ ++ static LIR_Opr r0_opr; ++ static LIR_Opr ra_opr; ++ static LIR_Opr tp_opr; ++ static LIR_Opr sp_opr; ++ static LIR_Opr a0_opr; ++ static LIR_Opr a1_opr; ++ static LIR_Opr a2_opr; ++ static LIR_Opr a3_opr; ++ static LIR_Opr a4_opr; ++ static LIR_Opr a5_opr; ++ static LIR_Opr a6_opr; ++ static LIR_Opr a7_opr; ++ static LIR_Opr t0_opr; ++ static LIR_Opr t1_opr; ++ static LIR_Opr t2_opr; ++ static LIR_Opr t3_opr; ++ static LIR_Opr t4_opr; ++ static LIR_Opr t5_opr; ++ static LIR_Opr t6_opr; ++ static LIR_Opr t7_opr; ++ static LIR_Opr t8_opr; ++ static LIR_Opr rx_opr; ++ static LIR_Opr fp_opr; ++ static LIR_Opr s0_opr; ++ static LIR_Opr s1_opr; ++ static LIR_Opr s2_opr; ++ static LIR_Opr s3_opr; ++ static LIR_Opr s4_opr; ++ static LIR_Opr s5_opr; ++ static LIR_Opr s6_opr; ++ static LIR_Opr s7_opr; ++ static LIR_Opr s8_opr; ++ ++ static LIR_Opr ra_oop_opr; ++ static LIR_Opr a0_oop_opr; ++ static LIR_Opr a1_oop_opr; ++ static LIR_Opr a2_oop_opr; ++ static LIR_Opr a3_oop_opr; ++ static LIR_Opr a4_oop_opr; ++ static LIR_Opr a5_oop_opr; ++ static LIR_Opr a6_oop_opr; ++ static LIR_Opr a7_oop_opr; ++ static LIR_Opr t0_oop_opr; ++ static LIR_Opr t1_oop_opr; ++ static LIR_Opr t2_oop_opr; ++ static LIR_Opr t3_oop_opr; ++ static LIR_Opr t4_oop_opr; ++ static LIR_Opr t5_oop_opr; ++ static LIR_Opr t6_oop_opr; ++ static LIR_Opr t7_oop_opr; ++ static LIR_Opr t8_oop_opr; ++ static LIR_Opr fp_oop_opr; ++ static LIR_Opr s0_oop_opr; ++ static LIR_Opr s1_oop_opr; ++ static LIR_Opr s2_oop_opr; ++ static LIR_Opr s3_oop_opr; ++ static LIR_Opr s4_oop_opr; ++ static LIR_Opr s5_oop_opr; ++ static LIR_Opr s6_oop_opr; ++ static LIR_Opr s7_oop_opr; ++ static LIR_Opr s8_oop_opr; ++ ++ static LIR_Opr scr1_opr; ++ static LIR_Opr scr2_opr; ++ static LIR_Opr scr1_long_opr; ++ static LIR_Opr scr2_long_opr; ++ ++ static LIR_Opr a0_metadata_opr; ++ static LIR_Opr a1_metadata_opr; ++ static LIR_Opr a2_metadata_opr; ++ static LIR_Opr a3_metadata_opr; ++ static LIR_Opr a4_metadata_opr; ++ static LIR_Opr a5_metadata_opr; ++ ++ static LIR_Opr long0_opr; ++ static LIR_Opr long1_opr; ++ static LIR_Opr fpu0_float_opr; ++ static LIR_Opr fpu0_double_opr; ++ ++ static LIR_Opr as_long_opr(Register r) { ++ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); ++ } ++ static LIR_Opr as_pointer_opr(Register r) { ++ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); ++ } ++ ++ // VMReg name for spilled physical FPU stack slot n ++ static VMReg fpu_regname (int n); ++ ++ static bool is_caller_save_register(LIR_Opr opr) { return true; } ++ static bool is_caller_save_register(Register r) { return true; } ++ ++ static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; } ++ static int last_cpu_reg() { return pd_last_cpu_reg; } ++ static int last_byte_reg() { return pd_last_byte_reg; } ++ ++#endif // CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,71 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the client compiler. ++// (see c1_globals.hpp) ++ ++#ifndef COMPILER2 ++define_pd_global(bool, BackgroundCompilation, true ); ++define_pd_global(bool, UseTLAB, true ); ++define_pd_global(bool, ResizeTLAB, true ); ++define_pd_global(bool, InlineIntrinsics, true ); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, false); ++define_pd_global(bool, UseOnStackReplacement, true ); ++define_pd_global(bool, TieredCompilation, false); ++define_pd_global(intx, CompileThreshold, 1500 ); ++ ++define_pd_global(intx, OnStackReplacePercentage, 933 ); ++define_pd_global(intx, FreqInlineSize, 325 ); ++define_pd_global(intx, NewSizeThreadIncrease, 4*K ); ++define_pd_global(intx, InitialCodeCacheSize, 160*K); ++define_pd_global(intx, ReservedCodeCacheSize, 32*M ); ++define_pd_global(intx, NonProfiledCodeHeapSize, 13*M ); ++define_pd_global(intx, ProfiledCodeHeapSize, 14*M ); ++define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); ++define_pd_global(bool, ProfileInterpreter, false); ++define_pd_global(intx, CodeCacheExpansionSize, 32*K ); ++define_pd_global(uintx, CodeCacheMinBlockLength, 1); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++define_pd_global(uintx, MetaspaceSize, 12*M ); ++define_pd_global(bool, NeverActAsServerClassMachine, true ); ++define_pd_global(uint64_t,MaxRAM, 1ULL*G); ++define_pd_global(bool, CICompileOSR, true ); ++#endif // !COMPILER2 ++define_pd_global(bool, UseTypeProfile, false); ++define_pd_global(bool, RoundFPResults, true ); ++ ++define_pd_global(bool, LIRFillDelaySlots, false); ++define_pd_global(bool, OptimizeSinglePrecision, true ); ++define_pd_global(bool, CSEArrayLength, false); ++define_pd_global(bool, TwoOperandLIRForm, false ); ++ ++#endif // CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_Instruction.hpp" ++#include "c1/c1_LinearScan.hpp" ++#include "utilities/bitMap.inline.hpp" ++ ++void LinearScan::allocate_fpu_stack() { ++ // No FPU stack on LoongArch64 ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,70 @@ ++/* ++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP ++ ++inline bool LinearScan::is_processed_reg_num(int reg_num) { ++ return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map; ++} ++ ++inline int LinearScan::num_physical_regs(BasicType type) { ++ return 1; ++} ++ ++inline bool LinearScan::requires_adjacent_regs(BasicType type) { ++ return false; ++} ++ ++inline bool LinearScan::is_caller_save(int assigned_reg) { ++ assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers"); ++ if (assigned_reg < pd_first_callee_saved_reg) ++ return true; ++ if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg) ++ return true; ++ if (assigned_reg > pd_last_callee_saved_fpu_reg && assigned_reg < pd_last_fpu_reg) ++ return true; ++ return false; ++} ++ ++inline void LinearScan::pd_add_temps(LIR_Op* op) {} ++ ++// Implementation of LinearScanWalker ++inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) { ++ if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) { ++ assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only"); ++ _first_reg = pd_first_callee_saved_reg; ++ _last_reg = pd_last_callee_saved_reg; ++ return true; ++ } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || ++ cur->type() == T_ADDRESS || cur->type() == T_METADATA) { ++ _first_reg = pd_first_cpu_reg; ++ _last_reg = pd_last_allocatable_cpu_reg; ++ return true; ++ } ++ return false; ++} ++ ++#endif // CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp 2024-01-30 10:00:11.834765144 +0800 +@@ -0,0 +1,3387 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "asm/assembler.hpp" ++#include "c1/c1_CodeStubs.hpp" ++#include "c1/c1_Compilation.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "c1/c1_ValueStack.hpp" ++#include "ci/ciArrayKlass.hpp" ++#include "ci/ciInstance.hpp" ++#include "code/compiledIC.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "gc/shared/gc_globals.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#ifndef PRODUCT ++#define COMMENT(x) do { __ block_comment(x); } while (0) ++#else ++#define COMMENT(x) ++#endif ++ ++NEEDS_CLEANUP // remove this definitions? ++ ++#define __ _masm-> ++ ++static void select_different_registers(Register preserve, Register extra, ++ Register &tmp1, Register &tmp2) { ++ if (tmp1 == preserve) { ++ assert_different_registers(tmp1, tmp2, extra); ++ tmp1 = extra; ++ } else if (tmp2 == preserve) { ++ assert_different_registers(tmp1, tmp2, extra); ++ tmp2 = extra; ++ } ++ assert_different_registers(preserve, tmp1, tmp2); ++} ++ ++static void select_different_registers(Register preserve, Register extra, ++ Register &tmp1, Register &tmp2, ++ Register &tmp3) { ++ if (tmp1 == preserve) { ++ assert_different_registers(tmp1, tmp2, tmp3, extra); ++ tmp1 = extra; ++ } else if (tmp2 == preserve) { ++ assert_different_registers(tmp1, tmp2, tmp3, extra); ++ tmp2 = extra; ++ } else if (tmp3 == preserve) { ++ assert_different_registers(tmp1, tmp2, tmp3, extra); ++ tmp3 = extra; ++ } ++ assert_different_registers(preserve, tmp1, tmp2, tmp3); ++} ++ ++bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } ++ ++LIR_Opr LIR_Assembler::receiverOpr() { ++ return FrameMap::receiver_opr; ++} ++ ++LIR_Opr LIR_Assembler::osrBufferPointer() { ++ return FrameMap::as_pointer_opr(receiverOpr()->as_register()); ++} ++ ++//--------------fpu register translations----------------------- ++ ++address LIR_Assembler::float_constant(float f) { ++ address const_addr = __ float_constant(f); ++ if (const_addr == NULL) { ++ bailout("const section overflow"); ++ return __ code()->consts()->start(); ++ } else { ++ return const_addr; ++ } ++} ++ ++address LIR_Assembler::double_constant(double d) { ++ address const_addr = __ double_constant(d); ++ if (const_addr == NULL) { ++ bailout("const section overflow"); ++ return __ code()->consts()->start(); ++ } else { ++ return const_addr; ++ } ++} ++ ++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ++ ShouldNotReachHere(); ++} ++ ++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::reset_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::fpop() { Unimplemented(); } ++ ++void LIR_Assembler::fxch(int i) { Unimplemented(); } ++ ++void LIR_Assembler::fld(int i) { Unimplemented(); } ++ ++void LIR_Assembler::ffree(int i) { Unimplemented(); } ++ ++void LIR_Assembler::breakpoint() { Unimplemented(); } ++ ++void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } ++ ++void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } ++ ++bool LIR_Assembler::is_literal_address(LIR_Address* addr) { Unimplemented(); return false; } ++ ++static Register as_reg(LIR_Opr op) { ++ return op->is_double_cpu() ? op->as_register_lo() : op->as_register(); ++} ++ ++static jlong as_long(LIR_Opr data) { ++ jlong result; ++ switch (data->type()) { ++ case T_INT: ++ result = (data->as_jint()); ++ break; ++ case T_LONG: ++ result = (data->as_jlong()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ result = 0; // unreachable ++ } ++ return result; ++} ++ ++Address LIR_Assembler::as_Address(LIR_Address* addr) { ++ Register base = addr->base()->as_pointer_register(); ++ LIR_Opr opr = addr->index(); ++ if (opr->is_cpu_register()) { ++ Register index; ++ if (opr->is_single_cpu()) ++ index = opr->as_register(); ++ else ++ index = opr->as_register_lo(); ++ assert(addr->disp() == 0, "must be"); ++ return Address(base, index, Address::ScaleFactor(addr->scale())); ++ } else { ++ assert(addr->scale() == 0, "must be"); ++ return Address(base, addr->disp()); ++ } ++ return Address(); ++} ++ ++Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { ++ ShouldNotReachHere(); ++ return Address(); ++} ++ ++Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { ++ return as_Address(addr); // Ouch ++ // FIXME: This needs to be much more clever. See x86. ++} ++ ++// Ensure a valid Address (base + offset) to a stack-slot. If stack access is ++// not encodable as a base + (immediate) offset, generate an explicit address ++// calculation to hold the address in a temporary register. ++Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) { ++ precond(size == 4 || size == 8); ++ Address addr = frame_map()->address_for_slot(index, adjust); ++ precond(addr.index() == noreg); ++ precond(addr.base() == SP); ++ precond(addr.disp() > 0); ++ uint mask = size - 1; ++ assert((addr.disp() & mask) == 0, "scaled offsets only"); ++ return addr; ++} ++ ++void LIR_Assembler::osr_entry() { ++ offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); ++ BlockBegin* osr_entry = compilation()->hir()->osr_entry(); ++ ValueStack* entry_state = osr_entry->state(); ++ int number_of_locks = entry_state->locks_size(); ++ ++ // we jump here if osr happens with the interpreter ++ // state set up to continue at the beginning of the ++ // loop that triggered osr - in particular, we have ++ // the following registers setup: ++ // ++ // A2: osr buffer ++ // ++ ++ // build frame ++ ciMethod* m = compilation()->method(); ++ __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); ++ ++ // OSR buffer is ++ // ++ // locals[nlocals-1..0] ++ // monitors[0..number_of_locks] ++ // ++ // locals is a direct copy of the interpreter frame so in the osr buffer ++ // so first slot in the local array is the last local from the interpreter ++ // and last slot is local[0] (receiver) from the interpreter ++ // ++ // Similarly with locks. The first lock slot in the osr buffer is the nth lock ++ // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock ++ // in the interpreter frame (the method lock if a sync method) ++ ++ // Initialize monitors in the compiled activation. ++ // A2: pointer to osr buffer ++ // ++ // All other registers are dead at this point and the locals will be ++ // copied into place by code emitted in the IR. ++ ++ Register OSR_buf = osrBufferPointer()->as_pointer_register(); ++ { ++ assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below"); ++ int monitor_offset = BytesPerWord * method()->max_locals() + (2 * BytesPerWord) * (number_of_locks - 1); ++ // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in ++ // the OSR buffer using 2 word entries: first the lock and then ++ // the oop. ++ for (int i = 0; i < number_of_locks; i++) { ++ int slot_offset = monitor_offset - ((i * 2) * BytesPerWord); ++#ifdef ASSERT ++ // verify the interpreter's monitor has a non-null object ++ { ++ Label L; ++ __ ld_ptr(SCR1, Address(OSR_buf, slot_offset + 1 * BytesPerWord)); ++ __ bnez(SCR1, L); ++ __ stop("locked object is NULL"); ++ __ bind(L); ++ } ++#endif ++ __ ld_ptr(S0, Address(OSR_buf, slot_offset + 0)); ++ __ st_ptr(S0, frame_map()->address_for_monitor_lock(i)); ++ __ ld_ptr(S0, Address(OSR_buf, slot_offset + 1*BytesPerWord)); ++ __ st_ptr(S0, frame_map()->address_for_monitor_object(i)); ++ } ++ } ++} ++ ++// inline cache check; done before the frame is built. ++int LIR_Assembler::check_icache() { ++ Register receiver = FrameMap::receiver_opr->as_register(); ++ Register ic_klass = IC_Klass; ++ int start_offset = __ offset(); ++ Label dont; ++ ++ __ verify_oop(receiver); ++ ++ // explicit NULL check not needed since load from [klass_offset] causes a trap ++ // check against inline cache ++ assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), ++ "must add explicit null check"); ++ ++ __ load_klass(SCR2, receiver); ++ __ beq(SCR2, ic_klass, dont); ++ ++ // if icache check fails, then jump to runtime routine ++ // Note: RECEIVER must still contain the receiver! ++ __ jmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); ++ ++ // We align the verified entry point unless the method body ++ // (including its inline cache check) will fit in a single 64-byte ++ // icache line. ++ if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) { ++ // force alignment after the cache check. ++ __ align(CodeEntryAlignment); ++ } ++ ++ __ bind(dont); ++ return start_offset; ++} ++ ++void LIR_Assembler::jobject2reg(jobject o, Register reg) { ++ if (o == NULL) { ++ __ move(reg, R0); ++ } else { ++ int oop_index = __ oop_recorder()->find_index(o); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ __ relocate(rspec); ++ __ patchable_li52(reg, (long)o); ++ } ++} ++ ++void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) { ++ address target = NULL; ++ ++ switch (patching_id(info)) { ++ case PatchingStub::access_field_id: ++ target = Runtime1::entry_for(Runtime1::access_field_patching_id); ++ break; ++ case PatchingStub::load_klass_id: ++ target = Runtime1::entry_for(Runtime1::load_klass_patching_id); ++ break; ++ case PatchingStub::load_mirror_id: ++ target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); ++ break; ++ case PatchingStub::load_appendix_id: ++ target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ call(target, relocInfo::runtime_call_type); ++ add_call_info_here(info); ++} ++ ++void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { ++ deoptimize_trap(info); ++} ++ ++// This specifies the rsp decrement needed to build the frame ++int LIR_Assembler::initial_frame_size_in_bytes() const { ++ // if rounding, must let FrameMap know! ++ return in_bytes(frame_map()->framesize_in_bytes()); ++} ++ ++int LIR_Assembler::emit_exception_handler() { ++ // if the last instruction is a call (typically to do a throw which ++ // is coming at the end after block reordering) the return address ++ // must still point into the code area in order to avoid assertion ++ // failures when searching for the corresponding bci => add a nop ++ // (was bug 5/14/1999 - gri) ++ __ nop(); ++ ++ // generate code for exception handler ++ address handler_base = __ start_a_stub(exception_handler_size()); ++ if (handler_base == NULL) { ++ // not enough space left for the handler ++ bailout("exception handler overflow"); ++ return -1; ++ } ++ ++ int offset = code_offset(); ++ ++ // the exception oop and pc are in A0, and A1 ++ // no other registers need to be preserved, so invalidate them ++ __ invalidate_registers(false, true, true, true, true, true); ++ ++ // check that there is really an exception ++ __ verify_not_null_oop(A0); ++ ++ // search an exception handler (A0: exception oop, A1: throwing pc) ++ __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type); ++ __ should_not_reach_here(); ++ guarantee(code_offset() - offset <= exception_handler_size(), "overflow"); ++ __ end_a_stub(); ++ ++ return offset; ++} ++ ++// Emit the code to remove the frame from the stack in the exception unwind path. ++int LIR_Assembler::emit_unwind_handler() { ++#ifndef PRODUCT ++ if (CommentedAssembly) { ++ _masm->block_comment("Unwind handler"); ++ } ++#endif ++ ++ int offset = code_offset(); ++ ++ // Fetch the exception from TLS and clear out exception related thread state ++ __ ld_ptr(A0, Address(TREG, JavaThread::exception_oop_offset())); ++ __ st_ptr(R0, Address(TREG, JavaThread::exception_oop_offset())); ++ __ st_ptr(R0, Address(TREG, JavaThread::exception_pc_offset())); ++ ++ __ bind(_unwind_handler_entry); ++ __ verify_not_null_oop(V0); ++ if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { ++ __ move(S0, V0); // Preserve the exception ++ } ++ ++ // Perform needed unlocking ++ MonitorExitStub* stub = NULL; ++ if (method()->is_synchronized()) { ++ monitor_address(0, FrameMap::a0_opr); ++ stub = new MonitorExitStub(FrameMap::a0_opr, true, 0); ++ __ unlock_object(A5, A4, A0, *stub->entry()); ++ __ bind(*stub->continuation()); ++ } ++ ++ if (compilation()->env()->dtrace_method_probes()) { ++ __ mov_metadata(A1, method()->constant_encoding()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), TREG, A1); ++ } ++ ++ if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { ++ __ move(A0, S0); // Restore the exception ++ } ++ ++ // remove the activation and dispatch to the unwind handler ++ __ block_comment("remove_frame and dispatch to the unwind handler"); ++ __ remove_frame(initial_frame_size_in_bytes()); ++ __ jmp(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type); ++ ++ // Emit the slow path assembly ++ if (stub != NULL) { ++ stub->emit_code(this); ++ } ++ ++ return offset; ++} ++ ++int LIR_Assembler::emit_deopt_handler() { ++ // if the last instruction is a call (typically to do a throw which ++ // is coming at the end after block reordering) the return address ++ // must still point into the code area in order to avoid assertion ++ // failures when searching for the corresponding bci => add a nop ++ // (was bug 5/14/1999 - gri) ++ __ nop(); ++ ++ // generate code for exception handler ++ address handler_base = __ start_a_stub(deopt_handler_size()); ++ if (handler_base == NULL) { ++ // not enough space left for the handler ++ bailout("deopt handler overflow"); ++ return -1; ++ } ++ ++ int offset = code_offset(); ++ ++ __ call(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type); ++ guarantee(code_offset() - offset <= deopt_handler_size(), "overflow"); ++ __ end_a_stub(); ++ ++ return offset; ++} ++ ++void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { ++ _masm->code_section()->relocate(adr, relocInfo::poll_type); ++ int pc_offset = code_offset(); ++ flush_debug_info(pc_offset); ++ info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); ++ if (info->exception_handlers() != NULL) { ++ compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); ++ } ++} ++ ++void LIR_Assembler::return_op(LIR_Opr result) { ++ assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == V0, ++ "word returns are in V0,"); ++ ++ // Pop the stack before the safepoint code ++ __ remove_frame(initial_frame_size_in_bytes()); ++ ++ if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) { ++ __ reserved_stack_check(); ++ } ++ ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ __ ld_ptr(SCR2, Address(TREG, JavaThread::polling_page_offset())); ++ } else { ++ __ li(SCR2, os::get_polling_page()); ++ } ++ __ relocate(relocInfo::poll_return_type); ++ __ ld_w(SCR1, SCR2, 0); ++ __ jr(RA); ++} ++ ++int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { ++ guarantee(info != NULL, "Shouldn't be NULL"); ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ __ ld_ptr(SCR2, Address(TREG, JavaThread::polling_page_offset())); ++ } else { ++ __ li(SCR2, os::get_polling_page()); ++ } ++ add_debug_info_for_branch(info); // This isn't just debug info: it's the oop map ++ __ relocate(relocInfo::poll_type); ++ __ ld_w(SCR1, SCR2, 0); ++ return __ offset(); ++} ++ ++void LIR_Assembler::move_regs(Register from_reg, Register to_reg) { ++ __ move(to_reg, from_reg); ++} ++ ++void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); } ++ ++void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { ++ assert(src->is_constant(), "should not call otherwise"); ++ assert(dest->is_register(), "should not call otherwise"); ++ LIR_Const* c = src->as_constant_ptr(); ++ ++ switch (c->type()) { ++ case T_INT: ++ assert(patch_code == lir_patch_none, "no patching handled here"); ++ __ li(dest->as_register(), c->as_jint()); ++ break; ++ case T_ADDRESS: ++ assert(patch_code == lir_patch_none, "no patching handled here"); ++ __ li(dest->as_register(), c->as_jint()); ++ break; ++ case T_LONG: ++ assert(patch_code == lir_patch_none, "no patching handled here"); ++ __ li(dest->as_register_lo(), (intptr_t)c->as_jlong()); ++ break; ++ case T_OBJECT: ++ if (patch_code == lir_patch_none) { ++ jobject2reg(c->as_jobject(), dest->as_register()); ++ } else { ++ jobject2reg_with_patching(dest->as_register(), info); ++ } ++ break; ++ case T_METADATA: ++ if (patch_code != lir_patch_none) { ++ klass2reg_with_patching(dest->as_register(), info); ++ } else { ++ __ mov_metadata(dest->as_register(), c->as_metadata()); ++ } ++ break; ++ case T_FLOAT: ++ __ lea(SCR1, InternalAddress(float_constant(c->as_jfloat()))); ++ __ fld_s(dest->as_float_reg(), SCR1, 0); ++ break; ++ case T_DOUBLE: ++ __ lea(SCR1, InternalAddress(double_constant(c->as_jdouble()))); ++ __ fld_d(dest->as_double_reg(), SCR1, 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { ++ LIR_Const* c = src->as_constant_ptr(); ++ switch (c->type()) { ++ case T_OBJECT: ++ if (!c->as_jobject()) ++ __ st_ptr(R0, frame_map()->address_for_slot(dest->single_stack_ix())); ++ else { ++ const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL); ++ reg2stack(FrameMap::scr1_opr, dest, c->type(), false); ++ } ++ break; ++ case T_ADDRESS: ++ const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL); ++ reg2stack(FrameMap::scr1_opr, dest, c->type(), false); ++ case T_INT: ++ case T_FLOAT: ++ if (c->as_jint_bits() == 0) ++ __ st_w(R0, frame_map()->address_for_slot(dest->single_stack_ix())); ++ else { ++ __ li(SCR2, c->as_jint_bits()); ++ __ st_w(SCR2, frame_map()->address_for_slot(dest->single_stack_ix())); ++ } ++ break; ++ case T_LONG: ++ case T_DOUBLE: ++ if (c->as_jlong_bits() == 0) ++ __ st_ptr(R0, frame_map()->address_for_slot(dest->double_stack_ix(), ++ lo_word_offset_in_bytes)); ++ else { ++ __ li(SCR2, (intptr_t)c->as_jlong_bits()); ++ __ st_ptr(SCR2, frame_map()->address_for_slot(dest->double_stack_ix(), ++ lo_word_offset_in_bytes)); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, ++ CodeEmitInfo* info, bool wide) { ++ assert(src->is_constant(), "should not call otherwise"); ++ LIR_Const* c = src->as_constant_ptr(); ++ LIR_Address* to_addr = dest->as_address_ptr(); ++ ++ void (Assembler::* insn)(Register Rt, Address adr); ++ ++ switch (type) { ++ case T_ADDRESS: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_d; ++ break; ++ case T_LONG: ++ assert(c->as_jlong() == 0, "should be"); ++ insn = &Assembler::st_d; ++ break; ++ case T_INT: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_w; ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ assert(c->as_jobject() == 0, "should be"); ++ if (UseCompressedOops && !wide) { ++ insn = &Assembler::st_w; ++ } else { ++ insn = &Assembler::st_d; ++ } ++ break; ++ case T_CHAR: ++ case T_SHORT: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_h; ++ break; ++ case T_BOOLEAN: ++ case T_BYTE: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_b; ++ break; ++ default: ++ ShouldNotReachHere(); ++ insn = &Assembler::st_d; // unreachable ++ } ++ ++ if (info) add_debug_info_for_null_check_here(info); ++ (_masm->*insn)(R0, as_Address(to_addr)); ++} ++ ++void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { ++ assert(src->is_register(), "should not call otherwise"); ++ assert(dest->is_register(), "should not call otherwise"); ++ ++ // move between cpu-registers ++ if (dest->is_single_cpu()) { ++ if (src->type() == T_LONG) { ++ // Can do LONG -> OBJECT ++ move_regs(src->as_register_lo(), dest->as_register()); ++ return; ++ } ++ assert(src->is_single_cpu(), "must match"); ++ if (src->type() == T_OBJECT) { ++ __ verify_oop(src->as_register()); ++ } ++ move_regs(src->as_register(), dest->as_register()); ++ } else if (dest->is_double_cpu()) { ++ if (is_reference_type(src->type())) { ++ // Surprising to me but we can see move of a long to t_object ++ __ verify_oop(src->as_register()); ++ move_regs(src->as_register(), dest->as_register_lo()); ++ return; ++ } ++ assert(src->is_double_cpu(), "must match"); ++ Register f_lo = src->as_register_lo(); ++ Register f_hi = src->as_register_hi(); ++ Register t_lo = dest->as_register_lo(); ++ Register t_hi = dest->as_register_hi(); ++ assert(f_hi == f_lo, "must be same"); ++ assert(t_hi == t_lo, "must be same"); ++ move_regs(f_lo, t_lo); ++ } else if (dest->is_single_fpu()) { ++ __ fmov_s(dest->as_float_reg(), src->as_float_reg()); ++ } else if (dest->is_double_fpu()) { ++ __ fmov_d(dest->as_double_reg(), src->as_double_reg()); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { ++ precond(src->is_register() && dest->is_stack()); ++ ++ uint const c_sz32 = sizeof(uint32_t); ++ uint const c_sz64 = sizeof(uint64_t); ++ ++ if (src->is_single_cpu()) { ++ int index = dest->single_stack_ix(); ++ if (is_reference_type(type)) { ++ __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64)); ++ __ verify_oop(src->as_register()); ++ } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) { ++ __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64)); ++ } else { ++ __ st_w(src->as_register(), stack_slot_address(index, c_sz32)); ++ } ++ } else if (src->is_double_cpu()) { ++ int index = dest->double_stack_ix(); ++ Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); ++ __ st_ptr(src->as_register_lo(), dest_addr_LO); ++ } else if (src->is_single_fpu()) { ++ int index = dest->single_stack_ix(); ++ __ fst_s(src->as_float_reg(), stack_slot_address(index, c_sz32)); ++ } else if (src->is_double_fpu()) { ++ int index = dest->double_stack_ix(); ++ __ fst_d(src->as_double_reg(), stack_slot_address(index, c_sz64)); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, ++ CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { ++ LIR_Address* to_addr = dest->as_address_ptr(); ++ PatchingStub* patch = NULL; ++ Register compressed_src = SCR2; ++ ++ if (patch_code != lir_patch_none) { ++ deoptimize_trap(info); ++ return; ++ } ++ ++ if (is_reference_type(type)) { ++ __ verify_oop(src->as_register()); ++ ++ if (UseCompressedOops && !wide) { ++ __ encode_heap_oop(compressed_src, src->as_register()); ++ } else { ++ compressed_src = src->as_register(); ++ } ++ } ++ ++ int null_check_here = code_offset(); ++ switch (type) { ++ case T_FLOAT: ++ __ fst_s(src->as_float_reg(), as_Address(to_addr)); ++ break; ++ case T_DOUBLE: ++ __ fst_d(src->as_double_reg(), as_Address(to_addr)); ++ break; ++ case T_ARRAY: // fall through ++ case T_OBJECT: // fall through ++ if (UseCompressedOops && !wide) { ++ __ st_w(compressed_src, as_Address(to_addr)); ++ } else { ++ __ st_ptr(compressed_src, as_Address(to_addr)); ++ } ++ break; ++ case T_METADATA: ++ // We get here to store a method pointer to the stack to pass to ++ // a dtrace runtime call. This can't work on 64 bit with ++ // compressed klass ptrs: T_METADATA can be a compressed klass ++ // ptr or a 64 bit method pointer. ++ ShouldNotReachHere(); ++ __ st_ptr(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_ADDRESS: ++ __ st_ptr(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_INT: ++ __ st_w(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_LONG: ++ __ st_ptr(src->as_register_lo(), as_Address_lo(to_addr)); ++ break; ++ case T_BYTE: // fall through ++ case T_BOOLEAN: ++ __ st_b(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_CHAR: // fall through ++ case T_SHORT: ++ __ st_h(src->as_register(), as_Address(to_addr)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ if (info != NULL) { ++ add_debug_info_for_null_check(null_check_here, info); ++ } ++} ++ ++void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { ++ precond(src->is_stack() && dest->is_register()); ++ ++ uint const c_sz32 = sizeof(uint32_t); ++ uint const c_sz64 = sizeof(uint64_t); ++ ++ if (dest->is_single_cpu()) { ++ int index = src->single_stack_ix(); ++ if (is_reference_type(type)) { ++ __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64)); ++ __ verify_oop(dest->as_register()); ++ } else if (type == T_METADATA || type == T_ADDRESS) { ++ __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64)); ++ } else { ++ __ ld_w(dest->as_register(), stack_slot_address(index, c_sz32)); ++ } ++ } else if (dest->is_double_cpu()) { ++ int index = src->double_stack_ix(); ++ Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); ++ __ ld_ptr(dest->as_register_lo(), src_addr_LO); ++ } else if (dest->is_single_fpu()) { ++ int index = src->single_stack_ix(); ++ __ fld_s(dest->as_float_reg(), stack_slot_address(index, c_sz32)); ++ } else if (dest->is_double_fpu()) { ++ int index = src->double_stack_ix(); ++ __ fld_d(dest->as_double_reg(), stack_slot_address(index, c_sz64)); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) { ++ address target = NULL; ++ ++ switch (patching_id(info)) { ++ case PatchingStub::access_field_id: ++ target = Runtime1::entry_for(Runtime1::access_field_patching_id); ++ break; ++ case PatchingStub::load_klass_id: ++ target = Runtime1::entry_for(Runtime1::load_klass_patching_id); ++ break; ++ case PatchingStub::load_mirror_id: ++ target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); ++ break; ++ case PatchingStub::load_appendix_id: ++ target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ call(target, relocInfo::runtime_call_type); ++ add_call_info_here(info); ++} ++ ++void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { ++ LIR_Opr temp; ++ ++ if (type == T_LONG || type == T_DOUBLE) ++ temp = FrameMap::scr1_long_opr; ++ else ++ temp = FrameMap::scr1_opr; ++ ++ stack2reg(src, temp, src->type()); ++ reg2stack(temp, dest, dest->type(), false); ++} ++ ++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, ++ CodeEmitInfo* info, bool wide, bool /* unaligned */) { ++ LIR_Address* addr = src->as_address_ptr(); ++ LIR_Address* from_addr = src->as_address_ptr(); ++ ++ if (addr->base()->type() == T_OBJECT) { ++ __ verify_oop(addr->base()->as_pointer_register()); ++ } ++ ++ if (patch_code != lir_patch_none) { ++ deoptimize_trap(info); ++ return; ++ } ++ ++ if (info != NULL) { ++ add_debug_info_for_null_check_here(info); ++ } ++ int null_check_here = code_offset(); ++ switch (type) { ++ case T_FLOAT: ++ __ fld_s(dest->as_float_reg(), as_Address(from_addr)); ++ break; ++ case T_DOUBLE: ++ __ fld_d(dest->as_double_reg(), as_Address(from_addr)); ++ break; ++ case T_ARRAY: // fall through ++ case T_OBJECT: // fall through ++ if (UseCompressedOops && !wide) { ++ __ ld_wu(dest->as_register(), as_Address(from_addr)); ++ } else { ++ __ ld_ptr(dest->as_register(), as_Address(from_addr)); ++ } ++ break; ++ case T_METADATA: ++ // We get here to store a method pointer to the stack to pass to ++ // a dtrace runtime call. This can't work on 64 bit with ++ // compressed klass ptrs: T_METADATA can be a compressed klass ++ // ptr or a 64 bit method pointer. ++ ShouldNotReachHere(); ++ __ ld_ptr(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_ADDRESS: ++ // FIXME: OMG this is a horrible kludge. Any offset from an ++ // address that matches klass_offset_in_bytes() will be loaded ++ // as a word, not a long. ++ if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { ++ __ ld_wu(dest->as_register(), as_Address(from_addr)); ++ } else { ++ __ ld_ptr(dest->as_register(), as_Address(from_addr)); ++ } ++ break; ++ case T_INT: ++ __ ld_w(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_LONG: ++ __ ld_ptr(dest->as_register_lo(), as_Address_lo(from_addr)); ++ break; ++ case T_BYTE: ++ __ ld_b(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_BOOLEAN: ++ __ ld_bu(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_CHAR: ++ __ ld_hu(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_SHORT: ++ __ ld_h(dest->as_register(), as_Address(from_addr)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ if (is_reference_type(type)) { ++ if (UseCompressedOops && !wide) { ++ __ decode_heap_oop(dest->as_register()); ++ } ++ ++ if (!UseZGC) { ++ // Load barrier has not yet been applied, so ZGC can't verify the oop here ++ __ verify_oop(dest->as_register()); ++ } ++ } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { ++ if (UseCompressedClassPointers) { ++ __ decode_klass_not_null(dest->as_register()); ++ } ++ } ++} ++ ++int LIR_Assembler::array_element_size(BasicType type) const { ++ int elem_size = type2aelembytes(type); ++ return exact_log2(elem_size); ++} ++ ++void LIR_Assembler::emit_op3(LIR_Op3* op) { ++ switch (op->code()) { ++ case lir_idiv: ++ case lir_irem: ++ arithmetic_idiv(op->code(), op->in_opr1(), op->in_opr2(), op->in_opr3(), ++ op->result_opr(), op->info()); ++ break; ++ case lir_fmad: ++ __ fmadd_d(op->result_opr()->as_double_reg(), op->in_opr1()->as_double_reg(), ++ op->in_opr2()->as_double_reg(), op->in_opr3()->as_double_reg()); ++ break; ++ case lir_fmaf: ++ __ fmadd_s(op->result_opr()->as_float_reg(), op->in_opr1()->as_float_reg(), ++ op->in_opr2()->as_float_reg(), op->in_opr3()->as_float_reg()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++} ++ ++void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { ++#ifdef ASSERT ++ assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); ++ if (op->block() != NULL) _branch_target_blocks.append(op->block()); ++ assert(op->cond() == lir_cond_always, "must be"); ++#endif ++ ++ if (op->info() != NULL) ++ add_debug_info_for_branch(op->info()); ++ ++ __ b_far(*(op->label())); ++} ++ ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++#ifdef ASSERT ++ assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); ++ if (op->block() != NULL) _branch_target_blocks.append(op->block()); ++ if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock()); ++#endif ++ ++ if (op->info() != NULL) { ++ assert(op->in_opr1()->is_address() || op->in_opr2()->is_address(), ++ "shouldn't be codeemitinfo for non-address operands"); ++ add_debug_info_for_null_check_here(op->info()); // exception possible ++ } ++ ++ Label& L = *(op->label()); ++ Assembler::Condition acond; ++ LIR_Opr opr1 = op->in_opr1(); ++ LIR_Opr opr2 = op->in_opr2(); ++ assert(op->condition() != lir_cond_always, "must be"); ++ ++ if (op->code() == lir_cmp_float_branch) { ++ bool is_unordered = (op->ublock() == op->block()); ++ if (opr1->is_single_fpu()) { ++ FloatRegister reg1 = opr1->as_float_reg(); ++ assert(opr2->is_single_fpu(), "expect single float register"); ++ FloatRegister reg2 = opr2->as_float_reg(); ++ switch(op->condition()) { ++ case lir_cond_equal: ++ if (is_unordered) ++ __ fcmp_cueq_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_ceq_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_notEqual: ++ if (is_unordered) ++ __ fcmp_cune_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_cne_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_less: ++ if (is_unordered) ++ __ fcmp_cult_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_clt_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_lessEqual: ++ if (is_unordered) ++ __ fcmp_cule_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_cle_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_greaterEqual: ++ if (is_unordered) ++ __ fcmp_cule_s(FCC0, reg2, reg1); ++ else ++ __ fcmp_cle_s(FCC0, reg2, reg1); ++ break; ++ case lir_cond_greater: ++ if (is_unordered) ++ __ fcmp_cult_s(FCC0, reg2, reg1); ++ else ++ __ fcmp_clt_s(FCC0, reg2, reg1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (opr1->is_double_fpu()) { ++ FloatRegister reg1 = opr1->as_double_reg(); ++ assert(opr2->is_double_fpu(), "expect double float register"); ++ FloatRegister reg2 = opr2->as_double_reg(); ++ switch(op->condition()) { ++ case lir_cond_equal: ++ if (is_unordered) ++ __ fcmp_cueq_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_ceq_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_notEqual: ++ if (is_unordered) ++ __ fcmp_cune_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_cne_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_less: ++ if (is_unordered) ++ __ fcmp_cult_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_clt_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_lessEqual: ++ if (is_unordered) ++ __ fcmp_cule_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_cle_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_greaterEqual: ++ if (is_unordered) ++ __ fcmp_cule_d(FCC0, reg2, reg1); ++ else ++ __ fcmp_cle_d(FCC0, reg2, reg1); ++ break; ++ case lir_cond_greater: ++ if (is_unordered) ++ __ fcmp_cult_d(FCC0, reg2, reg1); ++ else ++ __ fcmp_clt_d(FCC0, reg2, reg1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ bcnez(FCC0, L); ++ } else { ++ if (opr1->is_constant() && opr2->is_single_cpu()) { ++ // tableswitch ++ Unimplemented(); ++ } else if (opr1->is_single_cpu() || opr1->is_double_cpu()) { ++ Register reg1 = as_reg(opr1); ++ Register reg2 = noreg; ++ jlong imm2 = 0; ++ if (opr2->is_single_cpu()) { ++ // cpu register - cpu register ++ reg2 = opr2->as_register(); ++ } else if (opr2->is_double_cpu()) { ++ // cpu register - cpu register ++ reg2 = opr2->as_register_lo(); ++ } else if (opr2->is_constant()) { ++ switch(opr2->type()) { ++ case T_INT: ++ case T_ADDRESS: ++ imm2 = opr2->as_constant_ptr()->as_jint(); ++ break; ++ case T_LONG: ++ imm2 = opr2->as_constant_ptr()->as_jlong(); ++ break; ++ case T_METADATA: ++ imm2 = (intptr_t)opr2->as_constant_ptr()->as_metadata(); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (opr2->as_constant_ptr()->as_jobject() != NULL) { ++ reg2 = SCR1; ++ jobject2reg(opr2->as_constant_ptr()->as_jobject(), reg2); ++ } else { ++ reg2 = R0; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ if (reg2 == noreg) { ++ if (imm2 == 0) { ++ reg2 = R0; ++ } else { ++ reg2 = SCR1; ++ __ li(reg2, imm2); ++ } ++ } ++ switch (op->condition()) { ++ case lir_cond_equal: ++ __ beq_far(reg1, reg2, L); break; ++ case lir_cond_notEqual: ++ __ bne_far(reg1, reg2, L); break; ++ case lir_cond_less: ++ __ blt_far(reg1, reg2, L, true); break; ++ case lir_cond_lessEqual: ++ __ bge_far(reg2, reg1, L, true); break; ++ case lir_cond_greaterEqual: ++ __ bge_far(reg1, reg2, L, true); break; ++ case lir_cond_greater: ++ __ blt_far(reg2, reg1, L, true); break; ++ case lir_cond_belowEqual: ++ __ bge_far(reg2, reg1, L, false); break; ++ case lir_cond_aboveEqual: ++ __ bge_far(reg1, reg2, L, false); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ } ++} ++ ++void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { ++ LIR_Opr src = op->in_opr(); ++ LIR_Opr dest = op->result_opr(); ++ LIR_Opr tmp = op->tmp(); ++ ++ switch (op->bytecode()) { ++ case Bytecodes::_i2f: ++ __ movgr2fr_w(dest->as_float_reg(), src->as_register()); ++ __ ffint_s_w(dest->as_float_reg(), dest->as_float_reg()); ++ break; ++ case Bytecodes::_i2d: ++ __ movgr2fr_w(dest->as_double_reg(), src->as_register()); ++ __ ffint_d_w(dest->as_double_reg(), dest->as_double_reg()); ++ break; ++ case Bytecodes::_l2d: ++ __ movgr2fr_d(dest->as_double_reg(), src->as_register_lo()); ++ __ ffint_d_l(dest->as_double_reg(), dest->as_double_reg()); ++ break; ++ case Bytecodes::_l2f: ++ __ movgr2fr_d(dest->as_float_reg(), src->as_register_lo()); ++ __ ffint_s_l(dest->as_float_reg(), dest->as_float_reg()); ++ break; ++ case Bytecodes::_f2d: ++ __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg()); ++ break; ++ case Bytecodes::_d2f: ++ __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg()); ++ break; ++ case Bytecodes::_i2c: ++ __ bstrpick_w(dest->as_register(), src->as_register(), 15, 0); ++ break; ++ case Bytecodes::_i2l: ++ _masm->block_comment("FIXME: This could be a no-op"); ++ __ slli_w(dest->as_register_lo(), src->as_register(), 0); ++ break; ++ case Bytecodes::_i2s: ++ __ ext_w_h(dest->as_register(), src->as_register()); ++ break; ++ case Bytecodes::_i2b: ++ __ ext_w_b(dest->as_register(), src->as_register()); ++ break; ++ case Bytecodes::_l2i: ++ __ slli_w(dest->as_register(), src->as_register_lo(), 0); ++ break; ++ case Bytecodes::_d2l: ++ __ ftintrz_l_d(tmp->as_double_reg(), src->as_double_reg()); ++ __ movfr2gr_d(dest->as_register_lo(), tmp->as_double_reg()); ++ break; ++ case Bytecodes::_f2i: ++ __ ftintrz_w_s(tmp->as_float_reg(), src->as_float_reg()); ++ __ movfr2gr_s(dest->as_register(), tmp->as_float_reg()); ++ break; ++ case Bytecodes::_f2l: ++ __ ftintrz_l_s(tmp->as_float_reg(), src->as_float_reg()); ++ __ movfr2gr_d(dest->as_register_lo(), tmp->as_float_reg()); ++ break; ++ case Bytecodes::_d2i: ++ __ ftintrz_w_d(tmp->as_double_reg(), src->as_double_reg()); ++ __ movfr2gr_s(dest->as_register(), tmp->as_double_reg()); ++ break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { ++ if (op->init_check()) { ++ __ ld_bu(SCR1, Address(op->klass()->as_register(), InstanceKlass::init_state_offset())); ++ __ li(SCR2, InstanceKlass::fully_initialized); ++ add_debug_info_for_null_check_here(op->stub()->info()); ++ __ bne_far(SCR1, SCR2, *op->stub()->entry()); ++ } ++ __ allocate_object(op->obj()->as_register(), op->tmp1()->as_register(), ++ op->tmp2()->as_register(), op->header_size(), ++ op->object_size(), op->klass()->as_register(), ++ *op->stub()->entry()); ++ __ bind(*op->stub()->continuation()); ++} ++ ++void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { ++ Register len = op->len()->as_register(); ++ if (UseSlowPath || ++ (!UseFastNewObjectArray && is_reference_type(op->type())) || ++ (!UseFastNewTypeArray && !is_reference_type(op->type()))) { ++ __ b(*op->stub()->entry()); ++ } else { ++ Register tmp1 = op->tmp1()->as_register(); ++ Register tmp2 = op->tmp2()->as_register(); ++ Register tmp3 = op->tmp3()->as_register(); ++ if (len == tmp1) { ++ tmp1 = tmp3; ++ } else if (len == tmp2) { ++ tmp2 = tmp3; ++ } else if (len == tmp3) { ++ // everything is ok ++ } else { ++ __ move(tmp3, len); ++ } ++ __ allocate_array(op->obj()->as_register(), len, tmp1, tmp2, ++ arrayOopDesc::header_size(op->type()), ++ array_element_size(op->type()), ++ op->klass()->as_register(), ++ *op->stub()->entry()); ++ } ++ __ bind(*op->stub()->continuation()); ++} ++ ++void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, ++ Register recv, Label* update_done) { ++ for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { ++ Label next_test; ++ // See if the receiver is receiver[n]. ++ __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); ++ __ ld_ptr(SCR1, Address(SCR2)); ++ __ bne(recv, SCR1, next_test); ++ Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); ++ __ ld_ptr(SCR2, data_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, data_addr); ++ __ b(*update_done); ++ __ bind(next_test); ++ } ++ ++ // Didn't find receiver; find next empty slot and fill it in ++ for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { ++ Label next_test; ++ __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); ++ Address recv_addr(SCR2); ++ __ ld_ptr(SCR1, recv_addr); ++ __ bnez(SCR1, next_test); ++ __ st_ptr(recv, recv_addr); ++ __ li(SCR1, DataLayout::counter_increment); ++ __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); ++ __ st_ptr(SCR1, Address(SCR2)); ++ __ b(*update_done); ++ __ bind(next_test); ++ } ++} ++ ++void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, ++ Label* failure, Label* obj_is_null) { ++ // we always need a stub for the failure case. ++ CodeStub* stub = op->stub(); ++ Register obj = op->object()->as_register(); ++ Register k_RInfo = op->tmp1()->as_register(); ++ Register klass_RInfo = op->tmp2()->as_register(); ++ Register dst = op->result_opr()->as_register(); ++ ciKlass* k = op->klass(); ++ Register Rtmp1 = noreg; ++ ++ // check if it needs to be profiled ++ ciMethodData* md; ++ ciProfileData* data; ++ ++ const bool should_profile = op->should_profile(); ++ ++ if (should_profile) { ++ ciMethod* method = op->profiled_method(); ++ assert(method != NULL, "Should have method"); ++ int bci = op->profiled_bci(); ++ md = method->method_data_or_null(); ++ assert(md != NULL, "Sanity"); ++ data = md->bci_to_data(bci); ++ assert(data != NULL, "need data for type check"); ++ assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); ++ } ++ ++ Label profile_cast_success, profile_cast_failure; ++ Label *success_target = should_profile ? &profile_cast_success : success; ++ Label *failure_target = should_profile ? &profile_cast_failure : failure; ++ ++ if (obj == k_RInfo) { ++ k_RInfo = dst; ++ } else if (obj == klass_RInfo) { ++ klass_RInfo = dst; ++ } ++ if (k->is_loaded() && !UseCompressedClassPointers) { ++ select_different_registers(obj, dst, k_RInfo, klass_RInfo); ++ } else { ++ Rtmp1 = op->tmp3()->as_register(); ++ select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1); ++ } ++ ++ assert_different_registers(obj, k_RInfo, klass_RInfo); ++ ++ if (should_profile) { ++ Label not_null; ++ __ bnez(obj, not_null); ++ // Object is null; update MDO and exit ++ Register mdo = klass_RInfo; ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); ++ __ ld_bu(SCR2, data_addr); ++ __ ori(SCR2, SCR2, BitData::null_seen_byte_constant()); ++ __ st_b(SCR2, data_addr); ++ __ b(*obj_is_null); ++ __ bind(not_null); ++ } else { ++ __ beqz(obj, *obj_is_null); ++ } ++ ++ if (!k->is_loaded()) { ++ klass2reg_with_patching(k_RInfo, op->info_for_patch()); ++ } else { ++ __ mov_metadata(k_RInfo, k->constant_encoding()); ++ } ++ __ verify_oop(obj); ++ ++ if (op->fast_check()) { ++ // get object class ++ // not a safepoint as obj null check happens earlier ++ __ load_klass(SCR2, obj); ++ __ bne_far(SCR2, k_RInfo, *failure_target); ++ // successful cast, fall through to profile or jump ++ } else { ++ // get object class ++ // not a safepoint as obj null check happens earlier ++ __ load_klass(klass_RInfo, obj); ++ if (k->is_loaded()) { ++ // See if we get an immediate positive hit ++ __ ld_ptr(SCR1, Address(klass_RInfo, int64_t(k->super_check_offset()))); ++ if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { ++ __ bne_far(k_RInfo, SCR1, *failure_target); ++ // successful cast, fall through to profile or jump ++ } else { ++ // See if we get an immediate positive hit ++ __ beq_far(k_RInfo, SCR1, *success_target); ++ // check for self ++ __ beq_far(klass_RInfo, k_RInfo, *success_target); ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(klass_RInfo, Address(SP, 0 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ // result is a boolean ++ __ beqz(klass_RInfo, *failure_target); ++ // successful cast, fall through to profile or jump ++ } ++ } else { ++ // perform the fast part of the checking logic ++ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); ++ // call out-of-line instance of __ check_klass_subtype_slow_path(...): ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ // result is a boolean ++ __ beqz(k_RInfo, *failure_target); ++ // successful cast, fall through to profile or jump ++ } ++ } ++ if (should_profile) { ++ Register mdo = klass_RInfo, recv = k_RInfo; ++ __ bind(profile_cast_success); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ __ load_klass(recv, obj); ++ Label update_done; ++ type_profile_helper(mdo, md, data, recv, success); ++ __ b(*success); ++ ++ __ bind(profile_cast_failure); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address counter_addr = Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); ++ __ ld_ptr(SCR2, counter_addr); ++ __ addi_d(SCR2, SCR2, -DataLayout::counter_increment); ++ __ st_ptr(SCR2, counter_addr); ++ __ b(*failure); ++ } ++ __ b(*success); ++} ++ ++void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { ++ const bool should_profile = op->should_profile(); ++ ++ LIR_Code code = op->code(); ++ if (code == lir_store_check) { ++ Register value = op->object()->as_register(); ++ Register array = op->array()->as_register(); ++ Register k_RInfo = op->tmp1()->as_register(); ++ Register klass_RInfo = op->tmp2()->as_register(); ++ Register Rtmp1 = op->tmp3()->as_register(); ++ CodeStub* stub = op->stub(); ++ ++ // check if it needs to be profiled ++ ciMethodData* md; ++ ciProfileData* data; ++ ++ if (should_profile) { ++ ciMethod* method = op->profiled_method(); ++ assert(method != NULL, "Should have method"); ++ int bci = op->profiled_bci(); ++ md = method->method_data_or_null(); ++ assert(md != NULL, "Sanity"); ++ data = md->bci_to_data(bci); ++ assert(data != NULL, "need data for type check"); ++ assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); ++ } ++ Label profile_cast_success, profile_cast_failure, done; ++ Label *success_target = should_profile ? &profile_cast_success : &done; ++ Label *failure_target = should_profile ? &profile_cast_failure : stub->entry(); ++ ++ if (should_profile) { ++ Label not_null; ++ __ bnez(value, not_null); ++ // Object is null; update MDO and exit ++ Register mdo = klass_RInfo; ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); ++ __ ld_bu(SCR2, data_addr); ++ __ ori(SCR2, SCR2, BitData::null_seen_byte_constant()); ++ __ st_b(SCR2, data_addr); ++ __ b(done); ++ __ bind(not_null); ++ } else { ++ __ beqz(value, done); ++ } ++ ++ add_debug_info_for_null_check_here(op->info_for_exception()); ++ __ load_klass(k_RInfo, array); ++ __ load_klass(klass_RInfo, value); ++ ++ // get instance klass (it's already uncompressed) ++ __ ld_ptr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset())); ++ // perform the fast part of the checking logic ++ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); ++ // call out-of-line instance of __ check_klass_subtype_slow_path(...): ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ // result is a boolean ++ __ beqz(k_RInfo, *failure_target); ++ // fall through to the success case ++ ++ if (should_profile) { ++ Register mdo = klass_RInfo, recv = k_RInfo; ++ __ bind(profile_cast_success); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ __ load_klass(recv, value); ++ Label update_done; ++ type_profile_helper(mdo, md, data, recv, &done); ++ __ b(done); ++ ++ __ bind(profile_cast_failure); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); ++ __ lea(SCR2, counter_addr); ++ __ ld_ptr(SCR1, Address(SCR2)); ++ __ addi_d(SCR1, SCR1, -DataLayout::counter_increment); ++ __ st_ptr(SCR1, Address(SCR2)); ++ __ b(*stub->entry()); ++ } ++ ++ __ bind(done); ++ } else if (code == lir_checkcast) { ++ Register obj = op->object()->as_register(); ++ Register dst = op->result_opr()->as_register(); ++ Label success; ++ emit_typecheck_helper(op, &success, op->stub()->entry(), &success); ++ __ bind(success); ++ if (dst != obj) { ++ __ move(dst, obj); ++ } ++ } else if (code == lir_instanceof) { ++ Register obj = op->object()->as_register(); ++ Register dst = op->result_opr()->as_register(); ++ Label success, failure, done; ++ emit_typecheck_helper(op, &success, &failure, &failure); ++ __ bind(failure); ++ __ move(dst, R0); ++ __ b(done); ++ __ bind(success); ++ __ li(dst, 1); ++ __ bind(done); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::casw(Register addr, Register newval, Register cmpval, bool sign) { ++ __ cmpxchg32(Address(addr, 0), cmpval, newval, SCR1, sign, ++ /* retold */ false, /* barrier */ true); ++} ++ ++void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) { ++ __ cmpxchg(Address(addr, 0), cmpval, newval, SCR1, ++ /* retold */ false, /* barrier */ true); ++} ++ ++void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { ++ assert(VM_Version::supports_cx8(), "wrong machine"); ++ Register addr; ++ if (op->addr()->is_register()) { ++ addr = as_reg(op->addr()); ++ } else { ++ assert(op->addr()->is_address(), "what else?"); ++ LIR_Address* addr_ptr = op->addr()->as_address_ptr(); ++ assert(addr_ptr->disp() == 0, "need 0 disp"); ++ assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); ++ addr = as_reg(addr_ptr->base()); ++ } ++ Register newval = as_reg(op->new_value()); ++ Register cmpval = as_reg(op->cmp_value()); ++ ++ if (op->code() == lir_cas_obj) { ++ if (UseCompressedOops) { ++ Register t1 = op->tmp1()->as_register(); ++ assert(op->tmp1()->is_valid(), "must be"); ++ __ encode_heap_oop(t1, cmpval); ++ cmpval = t1; ++ __ encode_heap_oop(SCR2, newval); ++ newval = SCR2; ++ casw(addr, newval, cmpval, false); ++ } else { ++ casl(addr, newval, cmpval); ++ } ++ } else if (op->code() == lir_cas_int) { ++ casw(addr, newval, cmpval, true); ++ } else { ++ casl(addr, newval, cmpval); ++ } ++} ++ ++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, ++ LIR_Opr result, BasicType type) { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, ++ LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ assert(result->is_single_cpu() || result->is_double_cpu(), "expect single register for result"); ++ assert(left->is_single_cpu() || left->is_double_cpu(), "must be"); ++ Register regd = (result->type() == T_LONG) ? result->as_register_lo() : result->as_register(); ++ Register regl = as_reg(left); ++ Register regr = noreg; ++ Register reg1 = noreg; ++ Register reg2 = noreg; ++ jlong immr = 0; ++ ++ // comparison operands ++ if (right->is_single_cpu()) { ++ // cpu register - cpu register ++ regr = right->as_register(); ++ } else if (right->is_double_cpu()) { ++ // cpu register - cpu register ++ regr = right->as_register_lo(); ++ } else if (right->is_constant()) { ++ switch(right->type()) { ++ case T_INT: ++ case T_ADDRESS: ++ immr = right->as_constant_ptr()->as_jint(); ++ break; ++ case T_LONG: ++ immr = right->as_constant_ptr()->as_jlong(); ++ break; ++ case T_METADATA: ++ immr = (intptr_t)right->as_constant_ptr()->as_metadata(); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (right->as_constant_ptr()->as_jobject() != NULL) { ++ regr = SCR1; ++ jobject2reg(right->as_constant_ptr()->as_jobject(), regr); ++ } else { ++ immr = 0; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ case lir_cond_notEqual: ++ if (!Assembler::is_simm(-immr, 12)) { ++ regr = SCR1; ++ __ li(regr, immr); ++ } ++ break; ++ default: ++ if (!Assembler::is_simm(immr, 12)) { ++ regr = SCR1; ++ __ li(regr, immr); ++ } ++ } ++ } ++ ++ // special cases ++ if (src1->is_constant() && src2->is_constant()) { ++ jlong val1 = 0, val2 = 0; ++ if (src1->type() == T_INT && src2->type() == T_INT) { ++ val1 = src1->as_jint(); ++ val2 = src2->as_jint(); ++ } else if (src1->type() == T_LONG && src2->type() == T_LONG) { ++ val1 = src1->as_jlong(); ++ val2 = src2->as_jlong(); ++ } ++ if (val1 == 0 && val2 == 1) { ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_notEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ __ xori(regd, regd, 1); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_less: ++ __ slti(regd, regl, immr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_lessEqual: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ break; ++ case lir_cond_greater: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greaterEqual: ++ __ slti(regd, regl, immr); ++ break; ++ case lir_cond_belowEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ sltu(regd, SCR1, regl); ++ } ++ break; ++ case lir_cond_aboveEqual: ++ __ sltui(regd, regl, immr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ switch (condition) { ++ case lir_cond_equal: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ break; ++ case lir_cond_notEqual: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ break; ++ case lir_cond_less: ++ __ slt(regd, regl, regr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_lessEqual: ++ __ slt(regd, regr, regl); ++ break; ++ case lir_cond_greater: ++ __ slt(regd, regr, regl); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greaterEqual: ++ __ slt(regd, regl, regr); ++ break; ++ case lir_cond_belowEqual: ++ __ sltu(regd, regr, regl); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltu(regd, regl, regr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ return; ++ } else if (val1 == 1 && val2 == 0) { ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ __ xori(regd, regd, 1); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_notEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_less: ++ __ slti(regd, regl, immr); ++ break; ++ case lir_cond_lessEqual: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greater: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ break; ++ case lir_cond_greaterEqual: ++ __ slti(regd, regl, immr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_belowEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ sltu(regd, SCR1, regl); ++ } ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltui(regd, regl, immr); ++ __ xori(regd, regd, 1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ switch (condition) { ++ case lir_cond_equal: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ break; ++ case lir_cond_notEqual: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ break; ++ case lir_cond_less: ++ __ slt(regd, regl, regr); ++ break; ++ case lir_cond_lessEqual: ++ __ slt(regd, regr, regl); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greater: ++ __ slt(regd, regr, regl); ++ break; ++ case lir_cond_greaterEqual: ++ __ slt(regd, regl, regr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_belowEqual: ++ __ sltu(regd, regr, regl); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltu(regd, regl, regr); ++ __ xori(regd, regd, 1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ return; ++ } ++ } ++ ++ // cmp ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ __ addi_d(SCR2, regl, -immr); ++ break; ++ case lir_cond_notEqual: ++ __ addi_d(SCR2, regl, -immr); ++ break; ++ case lir_cond_less: ++ __ slti(SCR2, regl, immr); ++ break; ++ case lir_cond_lessEqual: ++ __ li(SCR1, immr); ++ __ slt(SCR2, SCR1, regl); ++ break; ++ case lir_cond_greater: ++ __ li(SCR1, immr); ++ __ slt(SCR2, SCR1, regl); ++ break; ++ case lir_cond_greaterEqual: ++ __ slti(SCR2, regl, immr); ++ break; ++ case lir_cond_belowEqual: ++ __ li(SCR1, immr); ++ __ sltu(SCR2, SCR1, regl); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltui(SCR2, regl, immr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ switch (condition) { ++ case lir_cond_equal: ++ __ sub_d(SCR2, regl, regr); ++ break; ++ case lir_cond_notEqual: ++ __ sub_d(SCR2, regl, regr); ++ break; ++ case lir_cond_less: ++ __ slt(SCR2, regl, regr); ++ break; ++ case lir_cond_lessEqual: ++ __ slt(SCR2, regr, regl); ++ break; ++ case lir_cond_greater: ++ __ slt(SCR2, regr, regl); ++ break; ++ case lir_cond_greaterEqual: ++ __ slt(SCR2, regl, regr); ++ break; ++ case lir_cond_belowEqual: ++ __ sltu(SCR2, regr, regl); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltu(SCR2, regl, regr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ ++ // value operands ++ if (src1->is_stack()) { ++ stack2reg(src1, result, result->type()); ++ reg1 = regd; ++ } else if (src1->is_constant()) { ++ const2reg(src1, result, lir_patch_none, NULL); ++ reg1 = regd; ++ } else { ++ reg1 = (src1->type() == T_LONG) ? src1->as_register_lo() : src1->as_register(); ++ } ++ ++ if (src2->is_stack()) { ++ stack2reg(src2, FrameMap::scr1_opr, result->type()); ++ reg2 = SCR1; ++ } else if (src2->is_constant()) { ++ LIR_Opr tmp = src2->type() == T_LONG ? FrameMap::scr1_long_opr : FrameMap::scr1_opr; ++ const2reg(src2, tmp, lir_patch_none, NULL); ++ reg2 = SCR1; ++ } else { ++ reg2 = (src2->type() == T_LONG) ? src2->as_register_lo() : src2->as_register(); ++ } ++ ++ // cmove ++ switch (condition) { ++ case lir_cond_equal: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_notEqual: ++ __ maskeqz(regd, reg1, SCR2); ++ __ masknez(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_less: ++ __ maskeqz(regd, reg1, SCR2); ++ __ masknez(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_lessEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_greater: ++ __ maskeqz(regd, reg1, SCR2); ++ __ masknez(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_greaterEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_belowEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_aboveEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ __ OR(regd, regd, SCR2); ++} ++ ++void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, ++ CodeEmitInfo* info, bool pop_fpu_stack) { ++ assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); ++ ++ if (left->is_single_cpu()) { ++ Register lreg = left->as_register(); ++ Register dreg = as_reg(dest); ++ ++ if (right->is_single_cpu()) { ++ // cpu register - cpu register ++ assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be"); ++ Register rreg = right->as_register(); ++ switch (code) { ++ case lir_add: __ add_w (dest->as_register(), lreg, rreg); break; ++ case lir_sub: __ sub_w (dest->as_register(), lreg, rreg); break; ++ case lir_mul: __ mul_w (dest->as_register(), lreg, rreg); break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (right->is_double_cpu()) { ++ Register rreg = right->as_register_lo(); ++ // single_cpu + double_cpu: can happen with obj+long ++ assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); ++ switch (code) { ++ case lir_add: __ add_d(dreg, lreg, rreg); break; ++ case lir_sub: __ sub_d(dreg, lreg, rreg); break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (right->is_constant()) { ++ // cpu register - constant ++ jlong c; ++ ++ // FIXME: This is fugly: we really need to factor all this logic. ++ switch(right->type()) { ++ case T_LONG: ++ c = right->as_constant_ptr()->as_jlong(); ++ break; ++ case T_INT: ++ case T_ADDRESS: ++ c = right->as_constant_ptr()->as_jint(); ++ break; ++ default: ++ ShouldNotReachHere(); ++ c = 0; // unreachable ++ break; ++ } ++ ++ assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); ++ if (c == 0 && dreg == lreg) { ++ COMMENT("effective nop elided"); ++ return; ++ } ++ ++ switch(left->type()) { ++ case T_INT: ++ switch (code) { ++ case lir_add: __ addi_w(dreg, lreg, c); break; ++ case lir_sub: __ addi_w(dreg, lreg, -c); break; ++ default: ShouldNotReachHere(); ++ } ++ break; ++ case T_OBJECT: ++ case T_ADDRESS: ++ switch (code) { ++ case lir_add: __ addi_d(dreg, lreg, c); break; ++ case lir_sub: __ addi_d(dreg, lreg, -c); break; ++ default: ShouldNotReachHere(); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (left->is_double_cpu()) { ++ Register lreg_lo = left->as_register_lo(); ++ ++ if (right->is_double_cpu()) { ++ // cpu register - cpu register ++ Register rreg_lo = right->as_register_lo(); ++ switch (code) { ++ case lir_add: __ add_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_sub: __ sub_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_mul: __ mul_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_div: __ div_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_rem: __ mod_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ } else if (right->is_constant()) { ++ jlong c = right->as_constant_ptr()->as_jlong(); ++ Register dreg = as_reg(dest); ++ switch (code) { ++ case lir_add: ++ case lir_sub: ++ if (c == 0 && dreg == lreg_lo) { ++ COMMENT("effective nop elided"); ++ return; ++ } ++ code == lir_add ? __ addi_d(dreg, lreg_lo, c) : __ addi_d(dreg, lreg_lo, -c); ++ break; ++ case lir_div: ++ assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); ++ if (c == 1) { ++ // move lreg_lo to dreg if divisor is 1 ++ __ move(dreg, lreg_lo); ++ } else { ++ unsigned int shift = exact_log2_long(c); ++ // use scr1 as intermediate result register ++ __ srai_d(SCR1, lreg_lo, 63); ++ __ srli_d(SCR1, SCR1, 64 - shift); ++ __ add_d(SCR1, lreg_lo, SCR1); ++ __ srai_d(dreg, SCR1, shift); ++ } ++ break; ++ case lir_rem: ++ assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); ++ if (c == 1) { ++ // move 0 to dreg if divisor is 1 ++ __ move(dreg, R0); ++ } else { ++ // use scr1/2 as intermediate result register ++ __ sub_d(SCR1, R0, lreg_lo); ++ __ slt(SCR2, SCR1, R0); ++ __ andi(dreg, lreg_lo, c - 1); ++ __ andi(SCR1, SCR1, c - 1); ++ __ sub_d(SCR1, R0, SCR1); ++ __ maskeqz(dreg, dreg, SCR2); ++ __ masknez(SCR1, SCR1, SCR2); ++ __ OR(dreg, dreg, SCR1); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (left->is_single_fpu()) { ++ assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register"); ++ switch (code) { ++ case lir_add: __ fadd_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_sub: __ fsub_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_mul: __ fmul_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_div: __ fdiv_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (left->is_double_fpu()) { ++ if (right->is_double_fpu()) { ++ // fpu register - fpu register ++ switch (code) { ++ case lir_add: __ fadd_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_sub: __ fsub_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_mul_strictfp: // fall through ++ case lir_mul: __ fmul_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_div_strictfp: // fall through ++ case lir_div: __ fdiv_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ default: ShouldNotReachHere(); ++ } ++ } else { ++ if (right->is_constant()) { ++ ShouldNotReachHere(); ++ } ++ ShouldNotReachHere(); ++ } ++ } else if (left->is_single_stack() || left->is_address()) { ++ assert(left == dest, "left and dest must be equal"); ++ ShouldNotReachHere(); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, ++ int dest_index, bool pop_fpu_stack) { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) { ++ switch(code) { ++ case lir_abs : __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break; ++ case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) { ++ assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register"); ++ Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); ++ ++ if (dst->is_single_cpu()) { ++ Register Rdst = dst->as_register(); ++ if (right->is_constant()) { ++ switch (code) { ++ case lir_logic_and: ++ if (Assembler::is_uimm(right->as_jint(), 12)) { ++ __ andi(Rdst, Rleft, right->as_jint()); ++ } else { ++ __ li(AT, right->as_jint()); ++ __ AND(Rdst, Rleft, AT); ++ } ++ break; ++ case lir_logic_or: __ ori(Rdst, Rleft, right->as_jint()); break; ++ case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jint()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } else { ++ Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); ++ switch (code) { ++ case lir_logic_and: __ AND(Rdst, Rleft, Rright); break; ++ case lir_logic_or: __ OR(Rdst, Rleft, Rright); break; ++ case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } ++ } else { ++ Register Rdst = dst->as_register_lo(); ++ if (right->is_constant()) { ++ switch (code) { ++ case lir_logic_and: ++ if (Assembler::is_uimm(right->as_jlong(), 12)) { ++ __ andi(Rdst, Rleft, right->as_jlong()); ++ } else { ++ // We can guarantee that transform from HIR LogicOp is in range of ++ // uimm(12), but the common code directly generates LIR LogicAnd, ++ // and the right-operand is mask with all ones in the high bits. ++ __ li(AT, right->as_jlong()); ++ __ AND(Rdst, Rleft, AT); ++ } ++ break; ++ case lir_logic_or: __ ori(Rdst, Rleft, right->as_jlong()); break; ++ case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jlong()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } else { ++ Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); ++ switch (code) { ++ case lir_logic_and: __ AND(Rdst, Rleft, Rright); break; ++ case lir_logic_or: __ OR(Rdst, Rleft, Rright); break; ++ case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } ++ } ++} ++ ++void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, ++ LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) { ++ // opcode check ++ assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem"); ++ bool is_irem = (code == lir_irem); ++ ++ // operand check ++ assert(left->is_single_cpu(), "left must be register"); ++ assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant"); ++ assert(result->is_single_cpu(), "result must be register"); ++ Register lreg = left->as_register(); ++ Register dreg = result->as_register(); ++ ++ // power-of-2 constant check and codegen ++ if (right->is_constant()) { ++ int c = right->as_constant_ptr()->as_jint(); ++ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ if (is_irem) { ++ if (c == 1) { ++ // move 0 to dreg if divisor is 1 ++ __ move(dreg, R0); ++ } else { ++ // use scr1/2 as intermediate result register ++ __ sub_w(SCR1, R0, lreg); ++ __ slt(SCR2, SCR1, R0); ++ __ andi(dreg, lreg, c - 1); ++ __ andi(SCR1, SCR1, c - 1); ++ __ sub_w(SCR1, R0, SCR1); ++ __ maskeqz(dreg, dreg, SCR2); ++ __ masknez(SCR1, SCR1, SCR2); ++ __ OR(dreg, dreg, SCR1); ++ } ++ } else { ++ if (c == 1) { ++ // move lreg to dreg if divisor is 1 ++ __ move(dreg, lreg); ++ } else { ++ unsigned int shift = exact_log2(c); ++ // use scr1 as intermediate result register ++ __ srai_w(SCR1, lreg, 31); ++ __ srli_w(SCR1, SCR1, 32 - shift); ++ __ add_w(SCR1, lreg, SCR1); ++ __ srai_w(dreg, SCR1, shift); ++ } ++ } ++ } else { ++ Register rreg = right->as_register(); ++ if (is_irem) ++ __ mod_w(dreg, lreg, rreg); ++ else ++ __ div_w(dreg, lreg, rreg); ++ } ++} ++ ++void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){ ++ if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { ++ bool is_unordered_less = (code == lir_ucmp_fd2i); ++ if (left->is_single_fpu()) { ++ if (is_unordered_less) { ++ __ fcmp_clt_s(FCC0, right->as_float_reg(), left->as_float_reg()); ++ __ fcmp_cult_s(FCC1, left->as_float_reg(), right->as_float_reg()); ++ } else { ++ __ fcmp_cult_s(FCC0, right->as_float_reg(), left->as_float_reg()); ++ __ fcmp_clt_s(FCC1, left->as_float_reg(), right->as_float_reg()); ++ } ++ } else if (left->is_double_fpu()) { ++ if (is_unordered_less) { ++ __ fcmp_clt_d(FCC0, right->as_double_reg(), left->as_double_reg()); ++ __ fcmp_cult_d(FCC1, left->as_double_reg(), right->as_double_reg()); ++ } else { ++ __ fcmp_cult_d(FCC0, right->as_double_reg(), left->as_double_reg()); ++ __ fcmp_clt_d(FCC1, left->as_double_reg(), right->as_double_reg()); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ movcf2gr(dst->as_register(), FCC0); ++ __ movcf2gr(SCR1, FCC1); ++ __ sub_d(dst->as_register(), dst->as_register(), SCR1); ++ } else if (code == lir_cmp_l2i) { ++ __ slt(SCR1, left->as_register_lo(), right->as_register_lo()); ++ __ slt(dst->as_register(), right->as_register_lo(), left->as_register_lo()); ++ __ sub_d(dst->as_register(), dst->as_register(), SCR1); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::align_call(LIR_Code code) {} ++ ++void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { ++ address call = __ trampoline_call(AddressLiteral(op->addr(), rtype)); ++ if (call == NULL) { ++ bailout("trampoline stub overflow"); ++ return; ++ } ++ add_call_info(code_offset(), op->info()); ++} ++ ++void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { ++ address call = __ ic_call(op->addr()); ++ if (call == NULL) { ++ bailout("trampoline stub overflow"); ++ return; ++ } ++ add_call_info(code_offset(), op->info()); ++} ++ ++void LIR_Assembler::emit_static_call_stub() { ++ address call_pc = __ pc(); ++ address stub = __ start_a_stub(call_stub_size()); ++ if (stub == NULL) { ++ bailout("static call stub overflow"); ++ return; ++ } ++ ++ int start = __ offset(); ++ ++ __ relocate(static_stub_Relocation::spec(call_pc)); ++ ++ // Code stream for loading method may be changed. ++ __ ibar(0); ++ ++ // Rmethod contains Method*, it should be relocated for GC ++ // static stub relocation also tags the Method* in the code-stream. ++ __ mov_metadata(Rmethod, NULL); ++ // This is recognized as unresolved by relocs/nativeInst/ic code ++ __ patchable_jump(__ pc()); ++ ++ assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(), ++ "stub too big"); ++ __ end_a_stub(); ++} ++ ++void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { ++ assert(exceptionOop->as_register() == A0, "must match"); ++ assert(exceptionPC->as_register() == A1, "must match"); ++ ++ // exception object is not added to oop map by LinearScan ++ // (LinearScan assumes that no oops are in fixed registers) ++ info->add_register_oop(exceptionOop); ++ Runtime1::StubID unwind_id; ++ ++ // get current pc information ++ // pc is only needed if the method has an exception handler, the unwind code does not need it. ++ if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) { ++ // As no instructions have been generated yet for this LIR node it's ++ // possible that an oop map already exists for the current offset. ++ // In that case insert an dummy NOP here to ensure all oop map PCs ++ // are unique. See JDK-8237483. ++ __ nop(); ++ } ++ Label L; ++ int pc_for_athrow_offset = __ offset(); ++ __ bind(L); ++ __ lipc(exceptionPC->as_register(), L); ++ add_call_info(pc_for_athrow_offset, info); // for exception handler ++ ++ __ verify_not_null_oop(A0); ++ // search an exception handler (A0: exception oop, A1: throwing pc) ++ if (compilation()->has_fpu_code()) { ++ unwind_id = Runtime1::handle_exception_id; ++ } else { ++ unwind_id = Runtime1::handle_exception_nofpu_id; ++ } ++ __ call(Runtime1::entry_for(unwind_id), relocInfo::runtime_call_type); ++ ++ // FIXME: enough room for two byte trap ???? ++ __ nop(); ++} ++ ++void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { ++ assert(exceptionOop->as_register() == A0, "must match"); ++ __ b(_unwind_handler_entry); ++} ++ ++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { ++ Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); ++ Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); ++ ++ switch (left->type()) { ++ case T_INT: { ++ switch (code) { ++ case lir_shl: __ sll_w(dreg, lreg, count->as_register()); break; ++ case lir_shr: __ sra_w(dreg, lreg, count->as_register()); break; ++ case lir_ushr: __ srl_w(dreg, lreg, count->as_register()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ case T_LONG: ++ case T_ADDRESS: ++ case T_OBJECT: ++ switch (code) { ++ case lir_shl: __ sll_d(dreg, lreg, count->as_register()); break; ++ case lir_shr: __ sra_d(dreg, lreg, count->as_register()); break; ++ case lir_ushr: __ srl_d(dreg, lreg, count->as_register()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++} ++ ++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { ++ Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); ++ Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); ++ ++ switch (left->type()) { ++ case T_INT: { ++ switch (code) { ++ case lir_shl: __ slli_w(dreg, lreg, count); break; ++ case lir_shr: __ srai_w(dreg, lreg, count); break; ++ case lir_ushr: __ srli_w(dreg, lreg, count); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ case T_LONG: ++ case T_ADDRESS: ++ case T_OBJECT: ++ switch (code) { ++ case lir_shl: __ slli_d(dreg, lreg, count); break; ++ case lir_shr: __ srai_d(dreg, lreg, count); break; ++ case lir_ushr: __ srli_d(dreg, lreg, count); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++} ++ ++void LIR_Assembler::store_parameter(Register r, int offset_from_sp_in_words) { ++ assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); ++ int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; ++ assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); ++ __ st_ptr(r, Address(SP, offset_from_sp_in_bytes)); ++} ++ ++void LIR_Assembler::store_parameter(jint c, int offset_from_sp_in_words) { ++ assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); ++ int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; ++ assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); ++ __ li(SCR2, c); ++ __ st_ptr(SCR2, Address(SP, offset_from_sp_in_bytes)); ++} ++ ++void LIR_Assembler::store_parameter(jobject o, int offset_from_sp_in_words) { ++ ShouldNotReachHere(); ++} ++ ++// This code replaces a call to arraycopy; no exception may ++// be thrown in this code, they must be thrown in the System.arraycopy ++// activation frame; we could save some checks if this would not be the case ++void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { ++ Register j_rarg0 = T0; ++ Register j_rarg1 = A0; ++ Register j_rarg2 = A1; ++ Register j_rarg3 = A2; ++ Register j_rarg4 = A3; ++ ++ ciArrayKlass* default_type = op->expected_type(); ++ Register src = op->src()->as_register(); ++ Register dst = op->dst()->as_register(); ++ Register src_pos = op->src_pos()->as_register(); ++ Register dst_pos = op->dst_pos()->as_register(); ++ Register length = op->length()->as_register(); ++ Register tmp = op->tmp()->as_register(); ++ ++ CodeStub* stub = op->stub(); ++ int flags = op->flags(); ++ BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; ++ if (is_reference_type(basic_type)) ++ basic_type = T_OBJECT; ++ ++ // if we don't know anything, just go through the generic arraycopy ++ if (default_type == NULL) { ++ Label done; ++ assert(src == T0 && src_pos == A0, "mismatch in calling convention"); ++ ++ // Save the arguments in case the generic arraycopy fails and we ++ // have to fall back to the JNI stub ++ __ st_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ st_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ st_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ address copyfunc_addr = StubRoutines::generic_arraycopy(); ++ assert(copyfunc_addr != NULL, "generic arraycopy stub required"); ++ ++ // The arguments are in java calling convention so we shift them ++ // to C convention ++ assert_different_registers(A4, j_rarg0, j_rarg1, j_rarg2, j_rarg3); ++ __ move(A4, j_rarg4); ++ assert_different_registers(A3, j_rarg0, j_rarg1, j_rarg2); ++ __ move(A3, j_rarg3); ++ assert_different_registers(A2, j_rarg0, j_rarg1); ++ __ move(A2, j_rarg2); ++ assert_different_registers(A1, j_rarg0); ++ __ move(A1, j_rarg1); ++ __ move(A0, j_rarg0); ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ li(SCR2, (address)&Runtime1::_generic_arraycopystub_cnt); ++ __ increment(SCR2, 1); ++ } ++#endif ++ __ call(copyfunc_addr, relocInfo::runtime_call_type); ++ ++ __ beqz(A0, *stub->continuation()); ++ __ move(tmp, A0); ++ ++ // Reload values from the stack so they are where the stub ++ // expects them. ++ __ ld_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ ld_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ ld_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ // tmp is -1^K where K == partial copied count ++ __ nor(SCR1, tmp, R0); ++ // adjust length down and src/end pos up by partial copied count ++ __ sub_w(length, length, SCR1); ++ __ add_w(src_pos, src_pos, SCR1); ++ __ add_w(dst_pos, dst_pos, SCR1); ++ __ b(*stub->entry()); ++ ++ __ bind(*stub->continuation()); ++ return; ++ } ++ ++ assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), ++ "must be true at this point"); ++ ++ int elem_size = type2aelembytes(basic_type); ++ Address::ScaleFactor scale = Address::times(elem_size); ++ ++ Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes()); ++ Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes()); ++ Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes()); ++ Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes()); ++ ++ // test for NULL ++ if (flags & LIR_OpArrayCopy::src_null_check) { ++ __ beqz(src, *stub->entry()); ++ } ++ if (flags & LIR_OpArrayCopy::dst_null_check) { ++ __ beqz(dst, *stub->entry()); ++ } ++ ++ // If the compiler was not able to prove that exact type of the source or the destination ++ // of the arraycopy is an array type, check at runtime if the source or the destination is ++ // an instance type. ++ if (flags & LIR_OpArrayCopy::type_check) { ++ if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { ++ __ load_klass(tmp, dst); ++ __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); ++ __ li(SCR2, (jlong) Klass::_lh_neutral_value); ++ __ bge_far(SCR1, SCR2, *stub->entry(), true); ++ } ++ ++ if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { ++ __ load_klass(tmp, src); ++ __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); ++ __ li(SCR2, (jlong) Klass::_lh_neutral_value); ++ __ bge_far(SCR1, SCR2, *stub->entry(), true); ++ } ++ } ++ ++ // check if negative ++ if (flags & LIR_OpArrayCopy::src_pos_positive_check) { ++ __ blt_far(src_pos, R0, *stub->entry(), true); ++ } ++ if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { ++ __ blt_far(dst_pos, R0, *stub->entry(), true); ++ } ++ ++ if (flags & LIR_OpArrayCopy::length_positive_check) { ++ __ blt_far(length, R0, *stub->entry(), true); ++ } ++ ++ if (flags & LIR_OpArrayCopy::src_range_check) { ++ __ add_w(tmp, src_pos, length); ++ __ ld_wu(SCR1, src_length_addr); ++ __ blt_far(SCR1, tmp, *stub->entry(), false); ++ } ++ if (flags & LIR_OpArrayCopy::dst_range_check) { ++ __ add_w(tmp, dst_pos, length); ++ __ ld_wu(SCR1, dst_length_addr); ++ __ blt_far(SCR1, tmp, *stub->entry(), false); ++ } ++ ++ if (flags & LIR_OpArrayCopy::type_check) { ++ // We don't know the array types are compatible ++ if (basic_type != T_OBJECT) { ++ // Simple test for basic type arrays ++ if (UseCompressedClassPointers) { ++ __ ld_wu(tmp, src_klass_addr); ++ __ ld_wu(SCR1, dst_klass_addr); ++ } else { ++ __ ld_ptr(tmp, src_klass_addr); ++ __ ld_ptr(SCR1, dst_klass_addr); ++ } ++ __ bne_far(tmp, SCR1, *stub->entry()); ++ } else { ++ // For object arrays, if src is a sub class of dst then we can ++ // safely do the copy. ++ Label cont, slow; ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(dst, Address(SP, 0 * wordSize)); ++ __ st_ptr(src, Address(SP, 1 * wordSize)); ++ ++ __ load_klass(src, src); ++ __ load_klass(dst, dst); ++ ++ __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL); ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(dst, Address(SP, 0 * wordSize)); ++ __ st_ptr(src, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(dst, Address(SP, 0 * wordSize)); ++ __ ld_ptr(src, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ ++ __ bnez(dst, cont); ++ ++ __ bind(slow); ++ __ ld_ptr(dst, Address(SP, 0 * wordSize)); ++ __ ld_ptr(src, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ ++ address copyfunc_addr = StubRoutines::checkcast_arraycopy(); ++ if (copyfunc_addr != NULL) { // use stub if available ++ // src is not a sub class of dst so we have to do a ++ // per-element check. ++ ++ int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray; ++ if ((flags & mask) != mask) { ++ // Check that at least both of them object arrays. ++ assert(flags & mask, "one of the two should be known to be an object array"); ++ ++ if (!(flags & LIR_OpArrayCopy::src_objarray)) { ++ __ load_klass(tmp, src); ++ } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { ++ __ load_klass(tmp, dst); ++ } ++ int lh_offset = in_bytes(Klass::layout_helper_offset()); ++ Address klass_lh_addr(tmp, lh_offset); ++ jint objArray_lh = Klass::array_layout_helper(T_OBJECT); ++ __ ld_w(SCR1, klass_lh_addr); ++ __ li(SCR2, objArray_lh); ++ __ XOR(SCR1, SCR1, SCR2); ++ __ bnez(SCR1, *stub->entry()); ++ } ++ ++ // Spill because stubs can use any register they like and it's ++ // easier to restore just those that we care about. ++ __ st_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ st_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ st_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ __ lea(A0, Address(src, src_pos, scale)); ++ __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A0, dst, dst_pos, length); ++ __ load_klass(A4, dst); ++ assert_different_registers(A4, dst, dst_pos, length); ++ __ lea(A1, Address(dst, dst_pos, scale)); ++ __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A1, length); ++ __ bstrpick_d(A2, length, 31, 0); ++ __ ld_ptr(A4, Address(A4, ObjArrayKlass::element_klass_offset())); ++ __ ld_w(A3, Address(A4, Klass::super_check_offset_offset())); ++ __ call(copyfunc_addr, relocInfo::runtime_call_type); ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ Label failed; ++ __ bnez(A0, failed); ++ __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_cnt); ++ __ increment(SCR2, 1); ++ __ bind(failed); ++ } ++#endif ++ ++ __ beqz(A0, *stub->continuation()); ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_attempt_cnt); ++ __ increment(SCR2, 1); ++ } ++#endif ++ assert_different_registers(dst, dst_pos, length, src_pos, src, tmp, SCR1); ++ __ move(tmp, A0); ++ ++ // Restore previously spilled arguments ++ __ ld_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ ld_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ ld_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ // return value is -1^K where K is partial copied count ++ __ nor(SCR1, tmp, R0); ++ // adjust length down and src/end pos up by partial copied count ++ __ sub_w(length, length, SCR1); ++ __ add_w(src_pos, src_pos, SCR1); ++ __ add_w(dst_pos, dst_pos, SCR1); ++ } ++ ++ __ b(*stub->entry()); ++ ++ __ bind(cont); ++ __ ld_ptr(dst, Address(SP, 0 * wordSize)); ++ __ ld_ptr(src, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ } ++ } ++ ++#ifdef ASSERT ++ if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { ++ // Sanity check the known type with the incoming class. For the ++ // primitive case the types must match exactly with src.klass and ++ // dst.klass each exactly matching the default type. For the ++ // object array case, if no type check is needed then either the ++ // dst type is exactly the expected type and the src type is a ++ // subtype which we can't check or src is the same array as dst ++ // but not necessarily exactly of type default_type. ++ Label known_ok, halt; ++ __ mov_metadata(tmp, default_type->constant_encoding()); ++ if (UseCompressedClassPointers) { ++ __ encode_klass_not_null(tmp); ++ } ++ ++ if (basic_type != T_OBJECT) { ++ ++ if (UseCompressedClassPointers) { ++ __ ld_wu(SCR1, dst_klass_addr); ++ } else { ++ __ ld_ptr(SCR1, dst_klass_addr); ++ } ++ __ bne(tmp, SCR1, halt); ++ if (UseCompressedClassPointers) { ++ __ ld_wu(SCR1, src_klass_addr); ++ } else { ++ __ ld_ptr(SCR1, src_klass_addr); ++ } ++ __ beq(tmp, SCR1, known_ok); ++ } else { ++ if (UseCompressedClassPointers) { ++ __ ld_wu(SCR1, dst_klass_addr); ++ } else { ++ __ ld_ptr(SCR1, dst_klass_addr); ++ } ++ __ beq(tmp, SCR1, known_ok); ++ __ beq(src, dst, known_ok); ++ } ++ __ bind(halt); ++ __ stop("incorrect type information in arraycopy"); ++ __ bind(known_ok); ++ } ++#endif ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ li(SCR2, Runtime1::arraycopy_count_address(basic_type)); ++ __ increment(SCR2, 1); ++ } ++#endif ++ ++ __ lea(A0, Address(src, src_pos, scale)); ++ __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A0, dst, dst_pos, length); ++ __ lea(A1, Address(dst, dst_pos, scale)); ++ __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A1, length); ++ __ bstrpick_d(A2, length, 31, 0); ++ ++ bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; ++ bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; ++ const char *name; ++ address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); ++ ++ CodeBlob *cb = CodeCache::find_blob(entry); ++ if (cb) { ++ __ call(entry, relocInfo::runtime_call_type); ++ } else { ++ __ call_VM_leaf(entry, 3); ++ } ++ ++ __ bind(*stub->continuation()); ++} ++ ++void LIR_Assembler::emit_lock(LIR_OpLock* op) { ++ Register obj = op->obj_opr()->as_register(); // may not be an oop ++ Register hdr = op->hdr_opr()->as_register(); ++ Register lock = op->lock_opr()->as_register(); ++ if (!UseFastLocking) { ++ __ b(*op->stub()->entry()); ++ } else if (op->code() == lir_lock) { ++ Register scratch = noreg; ++ if (UseBiasedLocking) { ++ scratch = op->scratch_opr()->as_register(); ++ } ++ assert(BasicLock::displaced_header_offset_in_bytes() == 0, ++ "lock_reg must point to the displaced header"); ++ // add debug info for NullPointerException only if one is possible ++ int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); ++ if (op->info() != NULL) { ++ add_debug_info_for_null_check(null_check_offset, op->info()); ++ } ++ // done ++ } else if (op->code() == lir_unlock) { ++ assert(BasicLock::displaced_header_offset_in_bytes() == 0, ++ "lock_reg must point to the displaced header"); ++ __ unlock_object(hdr, obj, lock, *op->stub()->entry()); ++ } else { ++ Unimplemented(); ++ } ++ __ bind(*op->stub()->continuation()); ++} ++ ++void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ++ ciMethod* method = op->profiled_method(); ++ ciMethod* callee = op->profiled_callee(); ++ int bci = op->profiled_bci(); ++ ++ // Update counter for all call types ++ ciMethodData* md = method->method_data_or_null(); ++ assert(md != NULL, "Sanity"); ++ ciProfileData* data = md->bci_to_data(bci); ++ assert(data != NULL && data->is_CounterData(), "need CounterData for calls"); ++ assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); ++ Register mdo = op->mdo()->as_register(); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); ++ // Perform additional virtual call profiling for invokevirtual and ++ // invokeinterface bytecodes ++ if (op->should_profile_receiver_type()) { ++ assert(op->recv()->is_single_cpu(), "recv must be allocated"); ++ Register recv = op->recv()->as_register(); ++ assert_different_registers(mdo, recv); ++ assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); ++ ciKlass* known_klass = op->known_holder(); ++ if (C1OptimizeVirtualCallProfiling && known_klass != NULL) { ++ // We know the type that will be seen at this call site; we can ++ // statically update the MethodData* rather than needing to do ++ // dynamic tests on the receiver type ++ ++ // NOTE: we should probably put a lock around this search to ++ // avoid collisions by concurrent compilations ++ ciVirtualCallData* vc_data = (ciVirtualCallData*) data; ++ uint i; ++ for (i = 0; i < VirtualCallData::row_limit(); i++) { ++ ciKlass* receiver = vc_data->receiver(i); ++ if (known_klass->equals(receiver)) { ++ Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); ++ __ ld_ptr(SCR2, data_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, data_addr); ++ return; ++ } ++ } ++ ++ // Receiver type not found in profile data; select an empty slot ++ ++ // Note that this is less efficient than it should be because it ++ // always does a write to the receiver part of the ++ // VirtualCallData rather than just the first time ++ for (i = 0; i < VirtualCallData::row_limit(); i++) { ++ ciKlass* receiver = vc_data->receiver(i); ++ if (receiver == NULL) { ++ Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); ++ __ mov_metadata(SCR2, known_klass->constant_encoding()); ++ __ lea(SCR1, recv_addr); ++ __ st_ptr(SCR2, SCR1, 0); ++ Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); ++ __ ld_ptr(SCR2, data_addr); ++ __ addi_d(SCR2, SCR1, DataLayout::counter_increment); ++ __ st_ptr(SCR2, data_addr); ++ return; ++ } ++ } ++ } else { ++ __ load_klass(recv, recv); ++ Label update_done; ++ type_profile_helper(mdo, md, data, recv, &update_done); ++ // Receiver did not match any saved receiver and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ __ ld_ptr(SCR2, counter_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, counter_addr); ++ ++ __ bind(update_done); ++ } ++ } else { ++ // Static call ++ __ ld_ptr(SCR2, counter_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, counter_addr); ++ } ++} ++ ++void LIR_Assembler::emit_delay(LIR_OpDelay*) { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { ++ __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no)); ++} ++ ++void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { ++ assert(op->crc()->is_single_cpu(), "crc must be register"); ++ assert(op->val()->is_single_cpu(), "byte value must be register"); ++ assert(op->result_opr()->is_single_cpu(), "result must be register"); ++ Register crc = op->crc()->as_register(); ++ Register val = op->val()->as_register(); ++ Register res = op->result_opr()->as_register(); ++ ++ assert_different_registers(val, crc, res); ++ __ li(res, StubRoutines::crc_table_addr()); ++ __ nor(crc, crc, R0); // ~crc ++ __ update_byte_crc32(crc, val, res); ++ __ nor(res, crc, R0); // ~crc ++} ++ ++void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { ++ COMMENT("emit_profile_type {"); ++ Register obj = op->obj()->as_register(); ++ Register tmp = op->tmp()->as_pointer_register(); ++ Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); ++ ciKlass* exact_klass = op->exact_klass(); ++ intptr_t current_klass = op->current_klass(); ++ bool not_null = op->not_null(); ++ bool no_conflict = op->no_conflict(); ++ ++ Label update, next, none; ++ ++ bool do_null = !not_null; ++ bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; ++ bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; ++ ++ assert(do_null || do_update, "why are we here?"); ++ assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); ++ assert(mdo_addr.base() != SCR1, "wrong register"); ++ ++ __ verify_oop(obj); ++ ++ if (tmp != obj) { ++ __ move(tmp, obj); ++ } ++ if (do_null) { ++ __ bnez(tmp, update); ++ if (!TypeEntries::was_null_seen(current_klass)) { ++ __ ld_ptr(SCR2, mdo_addr); ++ __ ori(SCR2, SCR2, TypeEntries::null_seen); ++ __ st_ptr(SCR2, mdo_addr); ++ } ++ if (do_update) { ++#ifndef ASSERT ++ __ b(next); ++ } ++#else ++ __ b(next); ++ } ++ } else { ++ __ bnez(tmp, update); ++ __ stop("unexpected null obj"); ++#endif ++ } ++ ++ __ bind(update); ++ ++ if (do_update) { ++#ifdef ASSERT ++ if (exact_klass != NULL) { ++ Label ok; ++ __ load_klass(tmp, tmp); ++ __ mov_metadata(SCR1, exact_klass->constant_encoding()); ++ __ XOR(SCR1, tmp, SCR1); ++ __ beqz(SCR1, ok); ++ __ stop("exact klass and actual klass differ"); ++ __ bind(ok); ++ } ++#endif ++ if (!no_conflict) { ++ if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { ++ if (exact_klass != NULL) { ++ __ mov_metadata(tmp, exact_klass->constant_encoding()); ++ } else { ++ __ load_klass(tmp, tmp); ++ } ++ ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(tmp, tmp, SCR2); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ __ bstrpick_d(SCR1, tmp, 63, 2); ++ // klass seen before, nothing to do. The unknown bit may have been ++ // set already but no need to check. ++ __ beqz(SCR1, next); ++ ++ __ andi(SCR1, tmp, TypeEntries::type_unknown); ++ __ bnez(SCR1, next); // already unknown. Nothing to do anymore. ++ ++ if (TypeEntries::is_type_none(current_klass)) { ++ __ beqz(SCR2, none); ++ __ li(SCR1, (u1)TypeEntries::null_seen); ++ __ beq(SCR2, SCR1, none); ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ membar_acquire(); ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(tmp, tmp, SCR2); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ __ bstrpick_d(SCR1, tmp, 63, 2); ++ __ beqz(SCR1, next); ++ } ++ } else { ++ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && ++ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); ++ ++ __ ld_ptr(tmp, mdo_addr); ++ __ andi(SCR2, tmp, TypeEntries::type_unknown); ++ __ bnez(SCR2, next); // already unknown. Nothing to do anymore. ++ } ++ ++ // different than before. Cannot keep accurate profile. ++ __ ld_ptr(SCR2, mdo_addr); ++ __ ori(SCR2, SCR2, TypeEntries::type_unknown); ++ __ st_ptr(SCR2, mdo_addr); ++ ++ if (TypeEntries::is_type_none(current_klass)) { ++ __ b(next); ++ ++ __ bind(none); ++ // first time here. Set profile type. ++ __ st_ptr(tmp, mdo_addr); ++ } ++ } else { ++ // There's a single possible klass at this profile point ++ assert(exact_klass != NULL, "should be"); ++ if (TypeEntries::is_type_none(current_klass)) { ++ __ mov_metadata(tmp, exact_klass->constant_encoding()); ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(tmp, tmp, SCR2); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ __ bstrpick_d(SCR1, tmp, 63, 2); ++ __ beqz(SCR1, next); ++#ifdef ASSERT ++ { ++ Label ok; ++ __ ld_ptr(SCR1, mdo_addr); ++ __ beqz(SCR1, ok); ++ __ li(SCR2, (u1)TypeEntries::null_seen); ++ __ beq(SCR1, SCR2, ok); ++ // may have been set by another thread ++ membar_acquire(); ++ __ mov_metadata(SCR1, exact_klass->constant_encoding()); ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(SCR2, SCR1, SCR2); ++ assert(TypeEntries::type_mask == -2, "must be"); ++ __ bstrpick_d(SCR2, SCR2, 63, 1); ++ __ beqz(SCR2, ok); ++ ++ __ stop("unexpected profiling mismatch"); ++ __ bind(ok); ++ } ++#endif ++ // first time here. Set profile type. ++ __ st_ptr(tmp, mdo_addr); ++ } else { ++ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && ++ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); ++ ++ __ ld_ptr(tmp, mdo_addr); ++ __ andi(SCR1, tmp, TypeEntries::type_unknown); ++ __ bnez(SCR1, next); // already unknown. Nothing to do anymore. ++ ++ __ ori(tmp, tmp, TypeEntries::type_unknown); ++ __ st_ptr(tmp, mdo_addr); ++ // FIXME: Write barrier needed here? ++ } ++ } ++ ++ __ bind(next); ++ } ++ COMMENT("} emit_profile_type"); ++} ++ ++void LIR_Assembler::align_backward_branch_target() {} ++ ++void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) { ++ // tmp must be unused ++ assert(tmp->is_illegal(), "wasting a register if tmp is allocated"); ++ ++ if (left->is_single_cpu()) { ++ assert(dest->is_single_cpu(), "expect single result reg"); ++ __ sub_w(dest->as_register(), R0, left->as_register()); ++ } else if (left->is_double_cpu()) { ++ assert(dest->is_double_cpu(), "expect double result reg"); ++ __ sub_d(dest->as_register_lo(), R0, left->as_register_lo()); ++ } else if (left->is_single_fpu()) { ++ assert(dest->is_single_fpu(), "expect single float result reg"); ++ __ fneg_s(dest->as_float_reg(), left->as_float_reg()); ++ } else { ++ assert(left->is_double_fpu(), "expect double float operand reg"); ++ assert(dest->is_double_fpu(), "expect double float result reg"); ++ __ fneg_d(dest->as_double_reg(), left->as_double_reg()); ++ } ++} ++ ++void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, ++ CodeEmitInfo* info) { ++ if (patch_code != lir_patch_none) { ++ deoptimize_trap(info); ++ return; ++ } ++ ++ __ lea(dest->as_register_lo(), as_Address(addr->as_address_ptr())); ++} ++ ++void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, ++ LIR_Opr tmp, CodeEmitInfo* info) { ++ assert(!tmp->is_valid(), "don't need temporary"); ++ __ call(dest, relocInfo::runtime_call_type); ++ if (info != NULL) { ++ add_call_info_here(info); ++ } ++} ++ ++void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, ++ CodeEmitInfo* info) { ++ if (dest->is_address() || src->is_address()) { ++ move_op(src, dest, type, lir_patch_none, info, ++ /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++#ifdef ASSERT ++// emit run-time assertion ++void LIR_Assembler::emit_assert(LIR_OpAssert* op) { ++ assert(op->code() == lir_assert, "must be"); ++ Label ok; ++ ++ if (op->in_opr1()->is_valid()) { ++ assert(op->in_opr2()->is_valid(), "both operands must be valid"); ++ assert(op->in_opr1()->is_cpu_register() || op->in_opr2()->is_cpu_register(), "must be"); ++ Register reg1 = as_reg(op->in_opr1()); ++ Register reg2 = as_reg(op->in_opr2()); ++ switch (op->condition()) { ++ case lir_cond_equal: __ beq(reg1, reg2, ok); break; ++ case lir_cond_notEqual: __ bne(reg1, reg2, ok); break; ++ case lir_cond_less: __ blt(reg1, reg2, ok); break; ++ case lir_cond_lessEqual: __ bge(reg2, reg1, ok); break; ++ case lir_cond_greaterEqual: __ bge(reg1, reg2, ok); break; ++ case lir_cond_greater: __ blt(reg2, reg1, ok); break; ++ case lir_cond_belowEqual: __ bgeu(reg2, reg1, ok); break; ++ case lir_cond_aboveEqual: __ bgeu(reg1, reg2, ok); break; ++ default: ShouldNotReachHere(); ++ } ++ } else { ++ assert(op->in_opr2()->is_illegal(), "both operands must be illegal"); ++ assert(op->condition() == lir_cond_always, "no other conditions allowed"); ++ } ++ if (op->halt()) { ++ const char* str = __ code_string(op->msg()); ++ __ stop(str); ++ } else { ++ breakpoint(); ++ } ++ __ bind(ok); ++} ++#endif ++ ++#ifndef PRODUCT ++#define COMMENT(x) do { __ block_comment(x); } while (0) ++#else ++#define COMMENT(x) ++#endif ++ ++void LIR_Assembler::membar() { ++ COMMENT("membar"); ++ __ membar(Assembler::AnyAny); ++} ++ ++void LIR_Assembler::membar_acquire() { ++ __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore)); ++} ++ ++void LIR_Assembler::membar_release() { ++ __ membar(Assembler::Membar_mask_bits(Assembler::LoadStore|Assembler::StoreStore)); ++} ++ ++void LIR_Assembler::membar_loadload() { ++ __ membar(Assembler::LoadLoad); ++} ++ ++void LIR_Assembler::membar_storestore() { ++ __ membar(MacroAssembler::StoreStore); ++} ++ ++void LIR_Assembler::membar_loadstore() { ++ __ membar(MacroAssembler::LoadStore); ++} ++ ++void LIR_Assembler::membar_storeload() { ++ __ membar(MacroAssembler::StoreLoad); ++} ++ ++void LIR_Assembler::on_spin_wait() { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::get_thread(LIR_Opr result_reg) { ++ __ move(result_reg->as_register(), TREG); ++} ++ ++void LIR_Assembler::peephole(LIR_List *lir) { ++} ++ ++void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, ++ LIR_Opr dest, LIR_Opr tmp_op) { ++ Address addr = as_Address(src->as_address_ptr()); ++ BasicType type = src->type(); ++ Register dst = as_reg(dest); ++ Register tmp = as_reg(tmp_op); ++ bool is_oop = is_reference_type(type); ++ ++ if (Assembler::is_simm(addr.disp(), 12)) { ++ __ addi_d(tmp, addr.base(), addr.disp()); ++ } else { ++ __ li(tmp, addr.disp()); ++ __ add_d(tmp, addr.base(), tmp); ++ } ++ if (addr.index() != noreg) { ++ if (addr.scale() > Address::times_1) ++ __ alsl_d(tmp, addr.index(), tmp, addr.scale() - 1); ++ else ++ __ add_d(tmp, tmp, addr.index()); ++ } ++ ++ switch(type) { ++ case T_INT: ++ break; ++ case T_LONG: ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (UseCompressedOops) { ++ // unsigned int ++ } else { ++ // long ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ if (code == lir_xadd) { ++ Register inc = noreg; ++ if (data->is_constant()) { ++ inc = SCR1; ++ __ li(inc, as_long(data)); ++ } else { ++ inc = as_reg(data); ++ } ++ switch(type) { ++ case T_INT: ++ __ amadd_db_w(dst, inc, tmp); ++ break; ++ case T_LONG: ++ __ amadd_db_d(dst, inc, tmp); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (UseCompressedOops) { ++ __ amadd_db_w(dst, inc, tmp); ++ __ lu32i_d(dst, 0); ++ } else { ++ __ amadd_db_d(dst, inc, tmp); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (code == lir_xchg) { ++ Register obj = as_reg(data); ++ if (is_oop && UseCompressedOops) { ++ __ encode_heap_oop(SCR2, obj); ++ obj = SCR2; ++ } ++ switch(type) { ++ case T_INT: ++ __ amswap_db_w(dst, obj, tmp); ++ break; ++ case T_LONG: ++ __ amswap_db_d(dst, obj, tmp); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (UseCompressedOops) { ++ __ amswap_db_w(dst, obj, tmp); ++ __ lu32i_d(dst, 0); ++ } else { ++ __ amswap_db_d(dst, obj, tmp); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ if (is_oop && UseCompressedOops) { ++ __ decode_heap_oop(dst); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++#undef __ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp 2024-01-30 10:00:11.834765144 +0800 +@@ -0,0 +1,83 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP ++ ++// ArrayCopyStub needs access to bailout ++friend class ArrayCopyStub; ++ ++ private: ++ int array_element_size(BasicType type) const; ++ ++ void arith_fpu_implementation(LIR_Code code, int left_index, int right_index, ++ int dest_index, bool pop_fpu_stack); ++ ++ // helper functions which checks for overflow and sets bailout if it ++ // occurs. Always returns a valid embeddable pointer but in the ++ // bailout case the pointer won't be to unique storage. ++ address float_constant(float f); ++ address double_constant(double d); ++ ++ address int_constant(jlong n); ++ ++ bool is_literal_address(LIR_Address* addr); ++ ++ // Ensure we have a valid Address (base+offset) to a stack-slot. ++ Address stack_slot_address(int index, uint shift, int adjust = 0); ++ ++ // Record the type of the receiver in ReceiverTypeData ++ void type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, ++ Register recv, Label* update_done); ++ void add_debug_info_for_branch(address adr, CodeEmitInfo* info); ++ ++ void casw(Register addr, Register newval, Register cmpval, bool sign); ++ void casl(Register addr, Register newval, Register cmpval); ++ ++ void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL); ++ ++ static const int max_tableswitches = 20; ++ struct tableswitch switches[max_tableswitches]; ++ int tableswitch_count; ++ ++ void init() { tableswitch_count = 0; } ++ ++ void deoptimize_trap(CodeEmitInfo *info); ++ ++ enum { ++ // call stub: CompiledStaticCall::to_interp_stub_size() + ++ // CompiledStaticCall::to_trampoline_stub_size() ++ _call_stub_size = 13 * NativeInstruction::nop_instruction_size, ++ _call_aot_stub_size = 0, ++ _exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175), ++ _deopt_handler_size = 7 * NativeInstruction::nop_instruction_size ++ }; ++ ++public: ++ void store_parameter(Register r, int offset_from_sp_in_words); ++ void store_parameter(jint c, int offset_from_sp_in_words); ++ void store_parameter(jobject c, int offset_from_sp_in_words); ++ ++#endif // CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp 2024-01-30 10:00:11.834765144 +0800 +@@ -0,0 +1,1396 @@ ++/* ++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "c1/c1_Compilation.hpp" ++#include "c1/c1_FrameMap.hpp" ++#include "c1/c1_Instruction.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_LIRGenerator.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "c1/c1_ValueStack.hpp" ++#include "ci/ciArray.hpp" ++#include "ci/ciObjArrayKlass.hpp" ++#include "ci/ciTypeArrayKlass.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#ifdef ASSERT ++#define __ gen()->lir(__FILE__, __LINE__)-> ++#else ++#define __ gen()->lir()-> ++#endif ++ ++// Item will be loaded into a byte register; Intel only ++void LIRItem::load_byte_item() { ++ load_item(); ++} ++ ++void LIRItem::load_nonconstant() { ++ LIR_Opr r = value()->operand(); ++ if (r->is_constant()) { ++ _result = r; ++ } else { ++ load_item(); ++ } ++} ++ ++//-------------------------------------------------------------- ++// LIRGenerator ++//-------------------------------------------------------------- ++ ++LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::a0_oop_opr; } ++LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::a1_opr; } ++LIR_Opr LIRGenerator::divInOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::divOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::remOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::shiftCountOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::syncLockOpr() { return new_register(T_INT); } ++LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::a0_opr; } ++LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; } ++ ++LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) { ++ LIR_Opr opr; ++ switch (type->tag()) { ++ case intTag: opr = FrameMap::a0_opr; break; ++ case objectTag: opr = FrameMap::a0_oop_opr; break; ++ case longTag: opr = FrameMap::long0_opr; break; ++ case floatTag: opr = FrameMap::fpu0_float_opr; break; ++ case doubleTag: opr = FrameMap::fpu0_double_opr; break; ++ case addressTag: ++ default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr; ++ } ++ ++ assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); ++ return opr; ++} ++ ++LIR_Opr LIRGenerator::rlock_byte(BasicType type) { ++ LIR_Opr reg = new_register(T_INT); ++ set_vreg_flag(reg, LIRGenerator::byte_reg); ++ return reg; ++} ++ ++//--------- loading items into registers -------------------------------- ++ ++bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { ++ if (v->type()->as_IntConstant() != NULL) { ++ return v->type()->as_IntConstant()->value() == 0L; ++ } else if (v->type()->as_LongConstant() != NULL) { ++ return v->type()->as_LongConstant()->value() == 0L; ++ } else if (v->type()->as_ObjectConstant() != NULL) { ++ return v->type()->as_ObjectConstant()->value()->is_null_object(); ++ } else { ++ return false; ++ } ++} ++ ++bool LIRGenerator::can_inline_as_constant(Value v) const { ++ // FIXME: Just a guess ++ if (v->type()->as_IntConstant() != NULL) { ++ return Assembler::is_simm(v->type()->as_IntConstant()->value(), 12); ++ } else if (v->type()->as_LongConstant() != NULL) { ++ return v->type()->as_LongConstant()->value() == 0L; ++ } else if (v->type()->as_ObjectConstant() != NULL) { ++ return v->type()->as_ObjectConstant()->value()->is_null_object(); ++ } else { ++ return false; ++ } ++} ++ ++bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; } ++ ++LIR_Opr LIRGenerator::safepoint_poll_register() { ++ return LIR_OprFact::illegalOpr; ++} ++ ++LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, ++ int shift, int disp, BasicType type) { ++ assert(base->is_register(), "must be"); ++ intx large_disp = disp; ++ ++ // accumulate fixed displacements ++ if (index->is_constant()) { ++ LIR_Const *constant = index->as_constant_ptr(); ++ if (constant->type() == T_INT) { ++ large_disp += index->as_jint() << shift; ++ } else { ++ assert(constant->type() == T_LONG, "should be"); ++ jlong c = index->as_jlong() << shift; ++ if ((jlong)((jint)c) == c) { ++ large_disp += c; ++ index = LIR_OprFact::illegalOpr; ++ } else { ++ LIR_Opr tmp = new_register(T_LONG); ++ __ move(index, tmp); ++ index = tmp; ++ // apply shift and displacement below ++ } ++ } ++ } ++ ++ if (index->is_register()) { ++ // apply the shift and accumulate the displacement ++ if (shift > 0) { ++ LIR_Opr tmp = new_pointer_register(); ++ __ shift_left(index, shift, tmp); ++ index = tmp; ++ } ++ if (large_disp != 0) { ++ LIR_Opr tmp = new_pointer_register(); ++ if (Assembler::is_simm(large_disp, 12)) { ++ __ add(index, LIR_OprFact::intptrConst(large_disp), tmp); ++ index = tmp; ++ } else { ++ __ move(LIR_OprFact::intptrConst(large_disp), tmp); ++ __ add(tmp, index, tmp); ++ index = tmp; ++ } ++ large_disp = 0; ++ } ++ } else if (large_disp != 0 && !Assembler::is_simm(large_disp, 12)) { ++ // index is illegal so replace it with the displacement loaded into a register ++ index = new_pointer_register(); ++ __ move(LIR_OprFact::intptrConst(large_disp), index); ++ large_disp = 0; ++ } ++ ++ // at this point we either have base + index or base + displacement ++ if (large_disp == 0 && index->is_register()) { ++ return new LIR_Address(base, index, type); ++ } else { ++ assert(Assembler::is_simm(large_disp, 12), "must be"); ++ return new LIR_Address(base, large_disp, type); ++ } ++} ++ ++LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, BasicType type) { ++ int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); ++ int elem_size = type2aelembytes(type); ++ int shift = exact_log2(elem_size); ++ ++ LIR_Address* addr; ++ if (index_opr->is_constant()) { ++ addr = new LIR_Address(array_opr, offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type); ++ } else { ++ if (offset_in_bytes) { ++ LIR_Opr tmp = new_pointer_register(); ++ __ add(array_opr, LIR_OprFact::intConst(offset_in_bytes), tmp); ++ array_opr = tmp; ++ offset_in_bytes = 0; ++ } ++ addr = new LIR_Address(array_opr, index_opr, LIR_Address::scale(type), offset_in_bytes, type); ++ } ++ return addr; ++} ++ ++LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { ++ LIR_Opr r; ++ if (type == T_LONG) { ++ r = LIR_OprFact::longConst(x); ++ if (!Assembler::is_simm(x, 12)) { ++ LIR_Opr tmp = new_register(type); ++ __ move(r, tmp); ++ return tmp; ++ } ++ } else if (type == T_INT) { ++ r = LIR_OprFact::intConst(x); ++ if (!Assembler::is_simm(x, 12)) { ++ // This is all rather nasty. We don't know whether our constant ++ // is required for a logical or an arithmetic operation, wo we ++ // don't know what the range of valid values is!! ++ LIR_Opr tmp = new_register(type); ++ __ move(r, tmp); ++ return tmp; ++ } ++ } else { ++ ShouldNotReachHere(); ++ r = NULL; // unreachable ++ } ++ return r; ++} ++ ++void LIRGenerator::increment_counter(address counter, BasicType type, int step) { ++ LIR_Opr pointer = new_pointer_register(); ++ __ move(LIR_OprFact::intptrConst(counter), pointer); ++ LIR_Address* addr = new LIR_Address(pointer, type); ++ increment_counter(addr, step); ++} ++ ++void LIRGenerator::increment_counter(LIR_Address* addr, int step) { ++ LIR_Opr imm = NULL; ++ switch(addr->type()) { ++ case T_INT: ++ imm = LIR_OprFact::intConst(step); ++ break; ++ case T_LONG: ++ imm = LIR_OprFact::longConst(step); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ LIR_Opr reg = new_register(addr->type()); ++ __ load(addr, reg); ++ __ add(reg, imm, reg); ++ __ store(reg, addr); ++} ++ ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, ++ int disp, int c, T tgt, CodeEmitInfo* info) { ++ LIR_Opr reg = new_register(T_INT); ++ __ load(generate_address(base, disp, T_INT), reg, info); ++ __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); ++ ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, ++ int disp, BasicType type, T tgt, CodeEmitInfo* info) { ++ LIR_Opr reg1 = new_register(T_INT); ++ __ load(generate_address(base, disp, type), reg1, info); ++ __ cmp_branch(condition, reg, reg1, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); ++ ++bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { ++ if (is_power_of_2(c - 1)) { ++ __ shift_left(left, exact_log2(c - 1), tmp); ++ __ add(tmp, left, result); ++ return true; ++ } else if (is_power_of_2(c + 1)) { ++ __ shift_left(left, exact_log2(c + 1), tmp); ++ __ sub(tmp, left, result); ++ return true; ++ } else { ++ return false; ++ } ++} ++ ++void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) { ++ BasicType type = item->type(); ++ __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type)); ++} ++ ++void LIRGenerator::array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info, ++ ciMethod* profiled_method, int profiled_bci) { ++ LIR_Opr tmp1 = new_register(objectType); ++ LIR_Opr tmp2 = new_register(objectType); ++ LIR_Opr tmp3 = new_register(objectType); ++ __ store_check(value, array, tmp1, tmp2, tmp3, store_check_info, profiled_method, profiled_bci); ++} ++ ++//---------------------------------------------------------------------- ++// visitor functions ++//---------------------------------------------------------------------- ++ ++void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { ++ assert(x->is_pinned(),""); ++ LIRItem obj(x->obj(), this); ++ obj.load_item(); ++ ++ set_no_result(x); ++ ++ // "lock" stores the address of the monitor stack slot, so this is not an oop ++ LIR_Opr lock = new_register(T_INT); ++ // Need a scratch register for biased locking ++ LIR_Opr scratch = LIR_OprFact::illegalOpr; ++ if (UseBiasedLocking) { ++ scratch = new_register(T_INT); ++ } ++ ++ CodeEmitInfo* info_for_exception = NULL; ++ if (x->needs_null_check()) { ++ info_for_exception = state_for(x); ++ } ++ // this CodeEmitInfo must not have the xhandlers because here the ++ // object is already locked (xhandlers expect object to be unlocked) ++ CodeEmitInfo* info = state_for(x, x->state(), true); ++ monitor_enter(obj.result(), lock, syncTempOpr(), scratch, ++ x->monitor_no(), info_for_exception, info); ++} ++ ++void LIRGenerator::do_MonitorExit(MonitorExit* x) { ++ assert(x->is_pinned(),""); ++ ++ LIRItem obj(x->obj(), this); ++ obj.dont_load_item(); ++ ++ LIR_Opr lock = new_register(T_INT); ++ LIR_Opr obj_temp = new_register(T_INT); ++ set_no_result(x); ++ monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no()); ++} ++ ++void LIRGenerator::do_NegateOp(NegateOp* x) { ++ LIRItem from(x->x(), this); ++ from.load_item(); ++ LIR_Opr result = rlock_result(x); ++ __ negate (from.result(), result); ++} ++ ++// for _fadd, _fmul, _fsub, _fdiv, _frem ++// _dadd, _dmul, _dsub, _ddiv, _drem ++void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { ++ if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) { ++ // float remainder is implemented as a direct call into the runtime ++ LIRItem right(x->x(), this); ++ LIRItem left(x->y(), this); ++ ++ BasicTypeList signature(2); ++ if (x->op() == Bytecodes::_frem) { ++ signature.append(T_FLOAT); ++ signature.append(T_FLOAT); ++ } else { ++ signature.append(T_DOUBLE); ++ signature.append(T_DOUBLE); ++ } ++ CallingConvention* cc = frame_map()->c_calling_convention(&signature); ++ ++ const LIR_Opr result_reg = result_register_for(x->type()); ++ left.load_item_force(cc->at(1)); ++ right.load_item(); ++ ++ __ move(right.result(), cc->at(0)); ++ ++ address entry; ++ if (x->op() == Bytecodes::_frem) { ++ entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem); ++ } else { ++ entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem); ++ } ++ ++ LIR_Opr result = rlock_result(x); ++ __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args()); ++ __ move(result_reg, result); ++ return; ++ } ++ ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ LIRItem* left_arg = &left; ++ LIRItem* right_arg = &right; ++ ++ // Always load right hand side. ++ right.load_item(); ++ ++ if (!left.is_register()) ++ left.load_item(); ++ ++ LIR_Opr reg = rlock(x); ++ ++ arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp()); ++ ++ set_result(x, round_item(reg)); ++} ++ ++// for _ladd, _lmul, _lsub, _ldiv, _lrem ++void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { ++ // missing test if instr is commutative and if we should swap ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ++ if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { ++ left.load_item(); ++ bool need_zero_check = true; ++ if (right.is_constant()) { ++ jlong c = right.get_jlong_constant(); ++ // no need to do div-by-zero check if the divisor is a non-zero constant ++ if (c != 0) need_zero_check = false; ++ // do not load right if the divisor is a power-of-2 constant ++ if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) { ++ right.dont_load_item(); ++ } else { ++ right.load_item(); ++ } ++ } else { ++ right.load_item(); ++ } ++ if (need_zero_check) { ++ CodeEmitInfo* info = state_for(x); ++ CodeStub* stub = new DivByZeroStub(info); ++ __ cmp_branch(lir_cond_equal, right.result(), LIR_OprFact::longConst(0), T_LONG, stub); ++ } ++ ++ rlock_result(x); ++ switch (x->op()) { ++ case Bytecodes::_lrem: ++ __ rem (left.result(), right.result(), x->operand()); ++ break; ++ case Bytecodes::_ldiv: ++ __ div (left.result(), right.result(), x->operand()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } else { ++ assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, ++ "expect lmul, ladd or lsub"); ++ // add, sub, mul ++ left.load_item(); ++ if (!right.is_register()) { ++ if (x->op() == Bytecodes::_lmul || !right.is_constant() || ++ (x->op() == Bytecodes::_ladd && !Assembler::is_simm(right.get_jlong_constant(), 12)) || ++ (x->op() == Bytecodes::_lsub && !Assembler::is_simm(-right.get_jlong_constant(), 12))) { ++ right.load_item(); ++ } else { // add, sub ++ assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub"); ++ // don't load constants to save register ++ right.load_nonconstant(); ++ } ++ } ++ rlock_result(x); ++ arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); ++ } ++} ++ ++// for: _iadd, _imul, _isub, _idiv, _irem ++void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { ++ // Test if instr is commutative and if we should swap ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ LIRItem* left_arg = &left; ++ LIRItem* right_arg = &right; ++ if (x->is_commutative() && left.is_stack() && right.is_register()) { ++ // swap them if left is real stack (or cached) and right is real register(not cached) ++ left_arg = &right; ++ right_arg = &left; ++ } ++ ++ left_arg->load_item(); ++ ++ // do not need to load right, as we can handle stack and constants ++ if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) { ++ rlock_result(x); ++ bool need_zero_check = true; ++ if (right.is_constant()) { ++ jint c = right.get_jint_constant(); ++ // no need to do div-by-zero check if the divisor is a non-zero constant ++ if (c != 0) need_zero_check = false; ++ // do not load right if the divisor is a power-of-2 constant ++ if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) { ++ right_arg->dont_load_item(); ++ } else { ++ right_arg->load_item(); ++ } ++ } else { ++ right_arg->load_item(); ++ } ++ if (need_zero_check) { ++ CodeEmitInfo* info = state_for(x); ++ CodeStub* stub = new DivByZeroStub(info); ++ __ cmp_branch(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0), T_INT, stub); ++ } ++ ++ LIR_Opr ill = LIR_OprFact::illegalOpr; ++ if (x->op() == Bytecodes::_irem) { ++ __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); ++ } else if (x->op() == Bytecodes::_idiv) { ++ __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); ++ } ++ } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) { ++ if (right.is_constant() && ++ ((x->op() == Bytecodes::_iadd && Assembler::is_simm(right.get_jint_constant(), 12)) || ++ (x->op() == Bytecodes::_isub && Assembler::is_simm(-right.get_jint_constant(), 12)))) { ++ right.load_nonconstant(); ++ } else { ++ right.load_item(); ++ } ++ rlock_result(x); ++ arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr); ++ } else { ++ assert (x->op() == Bytecodes::_imul, "expect imul"); ++ if (right.is_constant()) { ++ jint c = right.get_jint_constant(); ++ if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) { ++ right_arg->dont_load_item(); ++ } else { ++ // Cannot use constant op. ++ right_arg->load_item(); ++ } ++ } else { ++ right.load_item(); ++ } ++ rlock_result(x); ++ arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT)); ++ } ++} ++ ++void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) { ++ // when an operand with use count 1 is the left operand, then it is ++ // likely that no move for 2-operand-LIR-form is necessary ++ if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) { ++ x->swap_operands(); ++ } ++ ++ ValueTag tag = x->type()->tag(); ++ assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters"); ++ switch (tag) { ++ case floatTag: ++ case doubleTag: do_ArithmeticOp_FPU(x); return; ++ case longTag: do_ArithmeticOp_Long(x); return; ++ case intTag: do_ArithmeticOp_Int(x); return; ++ default: ShouldNotReachHere(); return; ++ } ++} ++ ++// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr ++void LIRGenerator::do_ShiftOp(ShiftOp* x) { ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ++ left.load_item(); ++ ++ rlock_result(x); ++ if (right.is_constant()) { ++ right.dont_load_item(); ++ int c; ++ switch (x->op()) { ++ case Bytecodes::_ishl: ++ c = right.get_jint_constant() & 0x1f; ++ __ shift_left(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_ishr: ++ c = right.get_jint_constant() & 0x1f; ++ __ shift_right(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_iushr: ++ c = right.get_jint_constant() & 0x1f; ++ __ unsigned_shift_right(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_lshl: ++ c = right.get_jint_constant() & 0x3f; ++ __ shift_left(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_lshr: ++ c = right.get_jint_constant() & 0x3f; ++ __ shift_right(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_lushr: ++ c = right.get_jint_constant() & 0x3f; ++ __ unsigned_shift_right(left.result(), c, x->operand()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ right.load_item(); ++ LIR_Opr tmp = new_register(T_INT); ++ switch (x->op()) { ++ case Bytecodes::_ishl: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); ++ __ shift_left(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_ishr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); ++ __ shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_iushr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); ++ __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_lshl: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); ++ __ shift_left(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_lshr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); ++ __ shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_lushr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); ++ __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++} ++ ++// _iand, _land, _ior, _lor, _ixor, _lxor ++void LIRGenerator::do_LogicOp(LogicOp* x) { ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ++ left.load_item(); ++ ++ rlock_result(x); ++ if (right.is_constant() ++ && ((right.type()->tag() == intTag ++ && Assembler::is_uimm(right.get_jint_constant(), 12)) ++ || (right.type()->tag() == longTag ++ && Assembler::is_uimm(right.get_jlong_constant(), 12)))) { ++ right.dont_load_item(); ++ } else { ++ right.load_item(); ++ } ++ switch (x->op()) { ++ case Bytecodes::_iand: ++ case Bytecodes::_land: ++ __ logical_and(left.result(), right.result(), x->operand()); break; ++ case Bytecodes::_ior: ++ case Bytecodes::_lor: ++ __ logical_or (left.result(), right.result(), x->operand()); break; ++ case Bytecodes::_ixor: ++ case Bytecodes::_lxor: ++ __ logical_xor(left.result(), right.result(), x->operand()); break; ++ default: Unimplemented(); ++ } ++} ++ ++// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg ++void LIRGenerator::do_CompareOp(CompareOp* x) { ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ValueTag tag = x->x()->type()->tag(); ++ if (tag == longTag) { ++ left.set_destroys_register(); ++ } ++ left.load_item(); ++ right.load_item(); ++ LIR_Opr reg = rlock_result(x); ++ ++ if (x->x()->type()->is_float_kind()) { ++ Bytecodes::Code code = x->op(); ++ __ fcmp2int(left.result(), right.result(), reg, ++ (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl)); ++ } else if (x->x()->type()->tag() == longTag) { ++ __ lcmp2int(left.result(), right.result(), reg); ++ } else { ++ Unimplemented(); ++ } ++} ++ ++LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr, ++ LIRItem& cmp_value, LIRItem& new_value) { ++ LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience ++ new_value.load_item(); ++ cmp_value.load_item(); ++ LIR_Opr result = new_register(T_INT); ++ if (is_reference_type(type)) { ++ __ cas_obj(addr, cmp_value.result(), new_value.result(), ++ new_register(T_INT), new_register(T_INT), result); ++ } else if (type == T_INT) { ++ __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), ++ new_value.result(), ill, ill); ++ } else if (type == T_LONG) { ++ __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(), ++ new_value.result(), ill, ill); ++ } else { ++ ShouldNotReachHere(); ++ Unimplemented(); ++ } ++ __ move(FrameMap::scr1_opr, result); ++ return result; ++} ++ ++LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) { ++ bool is_oop = is_reference_type(type); ++ LIR_Opr result = new_register(type); ++ value.load_item(); ++ assert(type == T_INT || is_oop || type == T_LONG , "unexpected type"); ++ LIR_Opr tmp = new_register(T_INT); ++ __ xchg(addr, value.result(), result, tmp); ++ return result; ++} ++ ++LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) { ++ LIR_Opr result = new_register(type); ++ value.load_item(); ++ assert(type == T_INT || type == T_LONG , "unexpected type"); ++ LIR_Opr tmp = new_register(T_INT); ++ __ xadd(addr, value.result(), result, tmp); ++ return result; ++} ++ ++void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { ++ assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), ++ "wrong type"); ++ if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog || ++ x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos || ++ x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan || ++ x->id() == vmIntrinsics::_dlog10) { ++ do_LibmIntrinsic(x); ++ return; ++ } ++ switch (x->id()) { ++ case vmIntrinsics::_dabs: ++ case vmIntrinsics::_dsqrt: { ++ assert(x->number_of_arguments() == 1, "wrong type"); ++ LIRItem value(x->argument_at(0), this); ++ value.load_item(); ++ LIR_Opr dst = rlock_result(x); ++ ++ switch (x->id()) { ++ case vmIntrinsics::_dsqrt: ++ __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); ++ break; ++ case vmIntrinsics::_dabs: ++ __ abs(value.result(), dst, LIR_OprFact::illegalOpr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ } ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { ++ LIRItem value(x->argument_at(0), this); ++ value.set_destroys_register(); ++ ++ LIR_Opr calc_result = rlock_result(x); ++ LIR_Opr result_reg = result_register_for(x->type()); ++ ++ CallingConvention* cc = NULL; ++ ++ if (x->id() == vmIntrinsics::_dpow) { ++ LIRItem value1(x->argument_at(1), this); ++ ++ value1.set_destroys_register(); ++ ++ BasicTypeList signature(2); ++ signature.append(T_DOUBLE); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); ++ value1.load_item_force(cc->at(1)); ++ } else { ++ BasicTypeList signature(1); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); ++ } ++ ++ switch (x->id()) { ++ case vmIntrinsics::_dexp: ++ if (StubRoutines::dexp() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dlog: ++ if (StubRoutines::dlog() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dlog10: ++ if (StubRoutines::dlog10() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dpow: ++ if (StubRoutines::dpow() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dsin: ++ if (StubRoutines::dsin() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dcos: ++ if (StubRoutines::dcos() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dtan: ++ if (StubRoutines::dtan() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ default: ShouldNotReachHere(); ++ } ++ __ move(result_reg, calc_result); ++} ++ ++void LIRGenerator::do_ArrayCopy(Intrinsic* x) { ++ Register j_rarg0 = RT0; ++ Register j_rarg1 = A0; ++ Register j_rarg2 = A1; ++ Register j_rarg3 = A2; ++ Register j_rarg4 = A3; ++ Register j_rarg5 = A4; ++ ++ assert(x->number_of_arguments() == 5, "wrong type"); ++ ++ // Make all state_for calls early since they can emit code ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ LIRItem src(x->argument_at(0), this); ++ LIRItem src_pos(x->argument_at(1), this); ++ LIRItem dst(x->argument_at(2), this); ++ LIRItem dst_pos(x->argument_at(3), this); ++ LIRItem length(x->argument_at(4), this); ++ ++ // operands for arraycopy must use fixed registers, otherwise ++ // LinearScan will fail allocation (because arraycopy always needs a ++ // call) ++ ++ // The java calling convention will give us enough registers ++ // so that on the stub side the args will be perfect already. ++ // On the other slow/special case side we call C and the arg ++ // positions are not similar enough to pick one as the best. ++ // Also because the java calling convention is a "shifted" version ++ // of the C convention we can process the java args trivially into C ++ // args without worry of overwriting during the xfer ++ ++ src.load_item_force (FrameMap::as_oop_opr(j_rarg0)); ++ src_pos.load_item_force (FrameMap::as_opr(j_rarg1)); ++ dst.load_item_force (FrameMap::as_oop_opr(j_rarg2)); ++ dst_pos.load_item_force (FrameMap::as_opr(j_rarg3)); ++ length.load_item_force (FrameMap::as_opr(j_rarg4)); ++ ++ LIR_Opr tmp = FrameMap::as_opr(j_rarg5); ++ ++ set_no_result(x); ++ ++ int flags; ++ ciArrayKlass* expected_type; ++ arraycopy_helper(x, &flags, &expected_type); ++ ++ __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), ++ length.result(), tmp, expected_type, flags, info); // does add_safepoint ++} ++ ++void LIRGenerator::do_update_CRC32(Intrinsic* x) { ++ assert(UseCRC32Intrinsics, "why are we here?"); ++ // Make all state_for calls early since they can emit code ++ LIR_Opr result = rlock_result(x); ++ int flags = 0; ++ switch (x->id()) { ++ case vmIntrinsics::_updateCRC32: { ++ LIRItem crc(x->argument_at(0), this); ++ LIRItem val(x->argument_at(1), this); ++ // val is destroyed by update_crc32 ++ val.set_destroys_register(); ++ crc.load_item(); ++ val.load_item(); ++ __ update_crc32(crc.result(), val.result(), result); ++ break; ++ } ++ case vmIntrinsics::_updateBytesCRC32: ++ case vmIntrinsics::_updateByteBufferCRC32: { ++ bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32); ++ ++ LIRItem crc(x->argument_at(0), this); ++ LIRItem buf(x->argument_at(1), this); ++ LIRItem off(x->argument_at(2), this); ++ LIRItem len(x->argument_at(3), this); ++ buf.load_item(); ++ off.load_nonconstant(); ++ ++ LIR_Opr index = off.result(); ++ int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; ++ if(off.result()->is_constant()) { ++ index = LIR_OprFact::illegalOpr; ++ offset += off.result()->as_jint(); ++ } ++ LIR_Opr base_op = buf.result(); ++ ++ if (index->is_valid()) { ++ LIR_Opr tmp = new_register(T_LONG); ++ __ convert(Bytecodes::_i2l, index, tmp); ++ index = tmp; ++ } ++ ++ if (offset) { ++ LIR_Opr tmp = new_pointer_register(); ++ __ add(base_op, LIR_OprFact::intConst(offset), tmp); ++ base_op = tmp; ++ offset = 0; ++ } ++ ++ LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE); ++ BasicTypeList signature(3); ++ signature.append(T_INT); ++ signature.append(T_ADDRESS); ++ signature.append(T_INT); ++ CallingConvention* cc = frame_map()->c_calling_convention(&signature); ++ const LIR_Opr result_reg = result_register_for(x->type()); ++ ++ LIR_Opr addr = new_pointer_register(); ++ __ leal(LIR_OprFact::address(a), addr); ++ ++ crc.load_item_force(cc->at(0)); ++ __ move(addr, cc->at(1)); ++ len.load_item_force(cc->at(2)); ++ ++ __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args()); ++ __ move(result_reg, result); ++ ++ break; ++ } ++ default: { ++ ShouldNotReachHere(); ++ } ++ } ++} ++ ++void LIRGenerator::do_update_CRC32C(Intrinsic* x) { ++ assert(UseCRC32CIntrinsics, "why are we here?"); ++ // Make all state_for calls early since they can emit code ++ LIR_Opr result = rlock_result(x); ++ int flags = 0; ++ switch (x->id()) { ++ case vmIntrinsics::_updateBytesCRC32C: ++ case vmIntrinsics::_updateDirectByteBufferCRC32C: { ++ bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C); ++ int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; ++ ++ LIRItem crc(x->argument_at(0), this); ++ LIRItem buf(x->argument_at(1), this); ++ LIRItem off(x->argument_at(2), this); ++ LIRItem end(x->argument_at(3), this); ++ ++ buf.load_item(); ++ off.load_nonconstant(); ++ end.load_nonconstant(); ++ ++ // len = end - off ++ LIR_Opr len = end.result(); ++ LIR_Opr tmpA = new_register(T_INT); ++ LIR_Opr tmpB = new_register(T_INT); ++ __ move(end.result(), tmpA); ++ __ move(off.result(), tmpB); ++ __ sub(tmpA, tmpB, tmpA); ++ len = tmpA; ++ ++ LIR_Opr index = off.result(); ++ if(off.result()->is_constant()) { ++ index = LIR_OprFact::illegalOpr; ++ offset += off.result()->as_jint(); ++ } ++ LIR_Opr base_op = buf.result(); ++ ++ if (index->is_valid()) { ++ LIR_Opr tmp = new_register(T_LONG); ++ __ convert(Bytecodes::_i2l, index, tmp); ++ index = tmp; ++ } ++ ++ if (offset) { ++ LIR_Opr tmp = new_pointer_register(); ++ __ add(base_op, LIR_OprFact::intConst(offset), tmp); ++ base_op = tmp; ++ offset = 0; ++ } ++ ++ LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE); ++ BasicTypeList signature(3); ++ signature.append(T_INT); ++ signature.append(T_ADDRESS); ++ signature.append(T_INT); ++ CallingConvention* cc = frame_map()->c_calling_convention(&signature); ++ const LIR_Opr result_reg = result_register_for(x->type()); ++ ++ LIR_Opr addr = new_pointer_register(); ++ __ leal(LIR_OprFact::address(a), addr); ++ ++ crc.load_item_force(cc->at(0)); ++ __ move(addr, cc->at(1)); ++ __ move(len, cc->at(2)); ++ ++ __ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), getThreadTemp(), result_reg, cc->args()); ++ __ move(result_reg, result); ++ ++ break; ++ } ++ default: { ++ ShouldNotReachHere(); ++ } ++ } ++} ++ ++void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) { ++ assert(x->number_of_arguments() == 3, "wrong type"); ++ assert(UseFMA, "Needs FMA instructions support."); ++ LIRItem value(x->argument_at(0), this); ++ LIRItem value1(x->argument_at(1), this); ++ LIRItem value2(x->argument_at(2), this); ++ ++ value.load_item(); ++ value1.load_item(); ++ value2.load_item(); ++ ++ LIR_Opr calc_input = value.result(); ++ LIR_Opr calc_input1 = value1.result(); ++ LIR_Opr calc_input2 = value2.result(); ++ LIR_Opr calc_result = rlock_result(x); ++ ++ switch (x->id()) { ++ case vmIntrinsics::_fmaD: ++ __ fmad(calc_input, calc_input1, calc_input2, calc_result); ++ break; ++ case vmIntrinsics::_fmaF: ++ __ fmaf(calc_input, calc_input1, calc_input2, calc_result); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) { ++ fatal("vectorizedMismatch intrinsic is not implemented on this platform"); ++} ++ ++// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f ++// _i2b, _i2c, _i2s ++void LIRGenerator::do_Convert(Convert* x) { ++ LIRItem value(x->value(), this); ++ value.load_item(); ++ LIR_Opr input = value.result(); ++ LIR_Opr result = rlock(x); ++ ++ // arguments of lir_convert ++ LIR_Opr conv_input = input; ++ LIR_Opr conv_result = result; ++ ++ switch (x->op()) { ++ case Bytecodes::_f2i: ++ case Bytecodes::_f2l: ++ __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_FLOAT)); ++ break; ++ case Bytecodes::_d2i: ++ case Bytecodes::_d2l: ++ __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_DOUBLE)); ++ break; ++ default: ++ __ convert(x->op(), conv_input, conv_result); ++ break; ++ } ++ ++ assert(result->is_virtual(), "result must be virtual register"); ++ set_result(x, result); ++} ++ ++void LIRGenerator::do_NewInstance(NewInstance* x) { ++#ifndef PRODUCT ++ if (PrintNotLoaded && !x->klass()->is_loaded()) { ++ tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci()); ++ } ++#endif ++ CodeEmitInfo* info = state_for(x, x->state()); ++ LIR_Opr reg = result_register_for(x->type()); ++ new_instance(reg, x->klass(), x->is_unresolved(), ++ FrameMap::t0_oop_opr, ++ FrameMap::t1_oop_opr, ++ FrameMap::a4_oop_opr, ++ LIR_OprFact::illegalOpr, ++ FrameMap::a3_metadata_opr, info); ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ LIRItem length(x->length(), this); ++ length.load_item_force(FrameMap::s0_opr); ++ ++ LIR_Opr reg = result_register_for(x->type()); ++ LIR_Opr tmp1 = FrameMap::t0_oop_opr; ++ LIR_Opr tmp2 = FrameMap::t1_oop_opr; ++ LIR_Opr tmp3 = FrameMap::a5_oop_opr; ++ LIR_Opr tmp4 = reg; ++ LIR_Opr klass_reg = FrameMap::a3_metadata_opr; ++ LIR_Opr len = length.result(); ++ BasicType elem_type = x->elt_type(); ++ ++ __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg); ++ ++ CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info); ++ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path); ++ ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { ++ LIRItem length(x->length(), this); ++ // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction ++ // and therefore provide the state before the parameters have been consumed ++ CodeEmitInfo* patching_info = NULL; ++ if (!x->klass()->is_loaded() || PatchALot) { ++ patching_info = state_for(x, x->state_before()); ++ } ++ ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ LIR_Opr reg = result_register_for(x->type()); ++ LIR_Opr tmp1 = FrameMap::t0_oop_opr; ++ LIR_Opr tmp2 = FrameMap::t1_oop_opr; ++ LIR_Opr tmp3 = FrameMap::a5_oop_opr; ++ LIR_Opr tmp4 = reg; ++ LIR_Opr klass_reg = FrameMap::a3_metadata_opr; ++ ++ length.load_item_force(FrameMap::s0_opr); ++ LIR_Opr len = length.result(); ++ ++ CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); ++ ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); ++ if (obj == ciEnv::unloaded_ciobjarrayklass()) { ++ BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); ++ } ++ klass2reg_with_patching(klass_reg, obj, patching_info); ++ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); ++ ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_NewMultiArray(NewMultiArray* x) { ++ Values* dims = x->dims(); ++ int i = dims->length(); ++ LIRItemList* items = new LIRItemList(i, i, NULL); ++ while (i-- > 0) { ++ LIRItem* size = new LIRItem(dims->at(i), this); ++ items->at_put(i, size); ++ } ++ ++ // Evaluate state_for early since it may emit code. ++ CodeEmitInfo* patching_info = NULL; ++ if (!x->klass()->is_loaded() || PatchALot) { ++ patching_info = state_for(x, x->state_before()); ++ ++ // Cannot re-use same xhandlers for multiple CodeEmitInfos, so ++ // clone all handlers (NOTE: Usually this is handled transparently ++ // by the CodeEmitInfo cloning logic in CodeStub constructors but ++ // is done explicitly here because a stub isn't being used). ++ x->set_exception_handlers(new XHandlers(x->exception_handlers())); ++ } ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ i = dims->length(); ++ while (i-- > 0) { ++ LIRItem* size = items->at(i); ++ size->load_item(); ++ ++ store_stack_parameter(size->result(), in_ByteSize(i*4)); ++ } ++ ++ LIR_Opr klass_reg = FrameMap::a0_metadata_opr; ++ klass2reg_with_patching(klass_reg, x->klass(), patching_info); ++ ++ LIR_Opr rank = FrameMap::s0_opr; ++ __ move(LIR_OprFact::intConst(x->rank()), rank); ++ LIR_Opr varargs = FrameMap::a2_opr; ++ __ move(FrameMap::sp_opr, varargs); ++ LIR_OprList* args = new LIR_OprList(3); ++ args->append(klass_reg); ++ args->append(rank); ++ args->append(varargs); ++ LIR_Opr reg = result_register_for(x->type()); ++ __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id), ++ LIR_OprFact::illegalOpr, ++ reg, args, info); ++ ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_BlockBegin(BlockBegin* x) { ++ // nothing to do for now ++} ++ ++void LIRGenerator::do_CheckCast(CheckCast* x) { ++ LIRItem obj(x->obj(), this); ++ ++ CodeEmitInfo* patching_info = NULL; ++ if (!x->klass()->is_loaded() || ++ (PatchALot && !x->is_incompatible_class_change_check() && ++ !x->is_invokespecial_receiver_check())) { ++ // must do this before locking the destination register as an oop register, ++ // and before the obj is loaded (the latter is for deoptimization) ++ patching_info = state_for(x, x->state_before()); ++ } ++ obj.load_item(); ++ ++ // info for exceptions ++ CodeEmitInfo* info_for_exception = ++ (x->needs_exception_state() ? state_for(x) : ++ state_for(x, x->state_before(), true /*ignore_xhandler*/)); ++ ++ CodeStub* stub; ++ if (x->is_incompatible_class_change_check()) { ++ assert(patching_info == NULL, "can't patch this"); ++ stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, ++ LIR_OprFact::illegalOpr, info_for_exception); ++ } else if (x->is_invokespecial_receiver_check()) { ++ assert(patching_info == NULL, "can't patch this"); ++ stub = new DeoptimizeStub(info_for_exception, ++ Deoptimization::Reason_class_check, ++ Deoptimization::Action_none); ++ } else { ++ stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, ++ obj.result(), info_for_exception); ++ } ++ LIR_Opr reg = rlock_result(x); ++ LIR_Opr tmp3 = LIR_OprFact::illegalOpr; ++ if (!x->klass()->is_loaded() || UseCompressedClassPointers) { ++ tmp3 = new_register(objectType); ++ } ++ __ checkcast(reg, obj.result(), x->klass(), ++ new_register(objectType), new_register(objectType), tmp3, ++ x->direct_compare(), info_for_exception, patching_info, stub, ++ x->profiled_method(), x->profiled_bci()); ++} ++ ++void LIRGenerator::do_InstanceOf(InstanceOf* x) { ++ LIRItem obj(x->obj(), this); ++ ++ // result and test object may not be in same register ++ LIR_Opr reg = rlock_result(x); ++ CodeEmitInfo* patching_info = NULL; ++ if ((!x->klass()->is_loaded() || PatchALot)) { ++ // must do this before locking the destination register as an oop register ++ patching_info = state_for(x, x->state_before()); ++ } ++ obj.load_item(); ++ LIR_Opr tmp3 = LIR_OprFact::illegalOpr; ++ if (!x->klass()->is_loaded() || UseCompressedClassPointers) { ++ tmp3 = new_register(objectType); ++ } ++ __ instanceof(reg, obj.result(), x->klass(), ++ new_register(objectType), new_register(objectType), tmp3, ++ x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci()); ++} ++ ++void LIRGenerator::do_If(If* x) { ++ assert(x->number_of_sux() == 2, "inconsistency"); ++ ValueTag tag = x->x()->type()->tag(); ++ bool is_safepoint = x->is_safepoint(); ++ ++ If::Condition cond = x->cond(); ++ ++ LIRItem xitem(x->x(), this); ++ LIRItem yitem(x->y(), this); ++ LIRItem* xin = &xitem; ++ LIRItem* yin = &yitem; ++ ++ if (tag == longTag) { ++ // for longs, only conditions "eql", "neq", "lss", "geq" are valid; ++ // mirror for other conditions ++ if (cond == If::gtr || cond == If::leq) { ++ cond = Instruction::mirror(cond); ++ xin = &yitem; ++ yin = &xitem; ++ } ++ xin->set_destroys_register(); ++ } ++ xin->load_item(); ++ ++ if (tag == longTag) { ++ if (yin->is_constant() && yin->get_jlong_constant() == 0) { ++ yin->dont_load_item(); ++ } else { ++ yin->load_item(); ++ } ++ } else if (tag == intTag) { ++ if (yin->is_constant() && yin->get_jint_constant() == 0) { ++ yin->dont_load_item(); ++ } else { ++ yin->load_item(); ++ } ++ } else { ++ yin->load_item(); ++ } ++ ++ set_no_result(x); ++ ++ LIR_Opr left = xin->result(); ++ LIR_Opr right = yin->result(); ++ ++ // add safepoint before generating condition code so it can be recomputed ++ if (x->is_safepoint()) { ++ // increment backedge counter if needed ++ increment_backedge_counter_conditionally(lir_cond(cond), left, right, state_for(x, x->state_before()), ++ x->tsux()->bci(), x->fsux()->bci(), x->profiled_bci()); ++ __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); ++ } ++ ++ // Generate branch profiling. Profiling code doesn't kill flags. ++ profile_branch(x, cond, left, right); ++ move_to_phi(x->state()); ++ if (x->x()->type()->is_float_kind()) { ++ __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux(), x->usux()); ++ } else { ++ __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux()); ++ } ++ assert(x->default_sux() == x->fsux(), "wrong destination above"); ++ __ jump(x->default_sux()); ++} ++ ++LIR_Opr LIRGenerator::getThreadPointer() { ++ return FrameMap::as_pointer_opr(TREG); ++} ++ ++void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); } ++ ++void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, ++ CodeEmitInfo* info) { ++ __ volatile_store_mem_reg(value, address, info); ++} ++ ++void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, ++ CodeEmitInfo* info) { ++ // 8179954: We need to make sure that the code generated for ++ // volatile accesses forms a sequentially-consistent set of ++ // operations when combined with STLR and LDAR. Without a leading ++ // membar it's possible for a simple Dekker test to fail if loads ++ // use LD;DMB but stores use STLR. This can happen if C2 compiles ++ // the stores in one method and C1 compiles the loads in another. ++ if (!UseBarriersForVolatile) { ++ __ membar(); ++ } ++ __ volatile_load_mem_reg(address, result, info); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp 2024-01-30 10:00:11.834765144 +0800 +@@ -0,0 +1,75 @@ ++/* ++ * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/register.hpp" ++#include "c1/c1_LIR.hpp" ++ ++FloatRegister LIR_OprDesc::as_float_reg() const { ++ return as_FloatRegister(fpu_regnr()); ++} ++ ++FloatRegister LIR_OprDesc::as_double_reg() const { ++ return as_FloatRegister(fpu_regnrLo()); ++} ++ ++// Reg2 unused. ++LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { ++ assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); ++ return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | ++ (reg1 << LIR_OprDesc::reg2_shift) | ++ LIR_OprDesc::double_type | ++ LIR_OprDesc::fpu_register | ++ LIR_OprDesc::double_size); ++} ++ ++#ifndef PRODUCT ++void LIR_Address::verify() const { ++ assert(base()->is_cpu_register(), "wrong base operand"); ++ assert(index()->is_illegal() || index()->is_double_cpu() || ++ index()->is_single_cpu(), "wrong index operand"); ++ assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || ++ base()->type() == T_LONG || base()->type() == T_METADATA, ++ "wrong type for addresses"); ++} ++#endif // PRODUCT ++ ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ append(new LIR_OpCmpBranch(condition, left, right, tgt, info)); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ append(new LIR_OpCmpBranch(condition, left, right, block, unordered)); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ append(new LIR_Op4(lir_cmp_cmove, condition, left, right, src1, src2, dst, type)); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,344 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interpreter.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/markOop.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++ ++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { ++ const int aligned_mask = BytesPerWord -1; ++ const int hdr_offset = oopDesc::mark_offset_in_bytes(); ++ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); ++ int null_check_offset = -1; ++ Label done; ++ ++ verify_oop(obj); ++ ++ // save object being locked into the BasicObjectLock ++ st_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ ++ if (UseBiasedLocking) { ++ assert(scratch != noreg, "should have scratch register at this point"); ++ null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); ++ } else { ++ null_check_offset = offset(); ++ } ++ ++ // Load object header ++ ld_ptr(hdr, Address(obj, hdr_offset)); ++ // and mark it as unlocked ++ ori(hdr, hdr, markOopDesc::unlocked_value); ++ // save unlocked object header into the displaced header location on the stack ++ st_ptr(hdr, Address(disp_hdr, 0)); ++ // test if object header is still the same (i.e. unlocked), and if so, store the ++ // displaced header address in the object header - if it is not the same, get the ++ // object header instead ++ lea(SCR2, Address(obj, hdr_offset)); ++ cmpxchg(Address(SCR2, 0), hdr, disp_hdr, SCR1, true, false, done); ++ // if the object header was the same, we're done ++ // if the object header was not the same, it is now in the hdr register ++ // => test if it is a stack pointer into the same stack (recursive locking), i.e.: ++ // ++ // 1) (hdr & aligned_mask) == 0 ++ // 2) sp <= hdr ++ // 3) hdr <= sp + page_size ++ // ++ // these 3 tests can be done by evaluating the following expression: ++ // ++ // (hdr - sp) & (aligned_mask - page_size) ++ // ++ // assuming both the stack pointer and page_size have their least ++ // significant 2 bits cleared and page_size is a power of 2 ++ sub_d(hdr, hdr, SP); ++ li(SCR1, aligned_mask - os::vm_page_size()); ++ andr(hdr, hdr, SCR1); ++ // for recursive locking, the result is zero => save it in the displaced header ++ // location (NULL in the displaced hdr location indicates recursive locking) ++ st_ptr(hdr, Address(disp_hdr, 0)); ++ // otherwise we don't care about the result and handle locking via runtime call ++ bnez(hdr, slow_case); ++ // done ++ bind(done); ++ if (PrintBiasedLockingStatistics) { ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, SCR1, SCR2); ++ } ++ return null_check_offset; ++} ++ ++void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { ++ const int aligned_mask = BytesPerWord -1; ++ const int hdr_offset = oopDesc::mark_offset_in_bytes(); ++ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); ++ Label done; ++ ++ if (UseBiasedLocking) { ++ // load object ++ ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ biased_locking_exit(obj, hdr, done); ++ } ++ ++ // load displaced header ++ ld_ptr(hdr, Address(disp_hdr, 0)); ++ // if the loaded hdr is NULL we had recursive locking ++ // if we had recursive locking, we are done ++ beqz(hdr, done); ++ if (!UseBiasedLocking) { ++ // load object ++ ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ } ++ verify_oop(obj); ++ // test if object header is pointing to the displaced header, and if so, restore ++ // the displaced header in the object - if the object header is not pointing to ++ // the displaced header, get the object header instead ++ // if the object header was not pointing to the displaced header, ++ // we do unlocking via runtime call ++ if (hdr_offset) { ++ lea(SCR1, Address(obj, hdr_offset)); ++ cmpxchg(Address(SCR1, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case); ++ } else { ++ cmpxchg(Address(obj, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case); ++ } ++ // done ++ bind(done); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, ++ int con_size_in_bytes, Register t1, Register t2, ++ Label& slow_case) { ++ if (UseTLAB) { ++ tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); ++ } else { ++ eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); ++ } ++} ++ ++void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, ++ Register t1, Register t2) { ++ assert_different_registers(obj, klass, len); ++ if (UseBiasedLocking && !len->is_valid()) { ++ assert_different_registers(obj, klass, len, t1, t2); ++ ld_ptr(t1, Address(klass, Klass::prototype_header_offset())); ++ } else { ++ // This assumes that all prototype bits fit in an int32_t ++ li(t1, (int32_t)(intptr_t)markOopDesc::prototype()); ++ } ++ st_ptr(t1, Address(obj, oopDesc::mark_offset_in_bytes())); ++ ++ if (UseCompressedClassPointers) { // Take care not to kill klass ++ encode_klass_not_null(t1, klass); ++ st_w(t1, Address(obj, oopDesc::klass_offset_in_bytes())); ++ } else { ++ st_ptr(klass, Address(obj, oopDesc::klass_offset_in_bytes())); ++ } ++ ++ if (len->is_valid()) { ++ st_w(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); ++ } else if (UseCompressedClassPointers) { ++ store_klass_gap(obj, R0); ++ } ++} ++ ++// preserves obj, destroys len_in_bytes ++// ++// Scratch registers: t1 = T0, t2 = T1 ++// ++void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, ++ int hdr_size_in_bytes, Register t1, Register t2) { ++ assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); ++ assert(t1 == T0 && t2 == T1, "must be"); ++ Label done; ++ ++ // len_in_bytes is positive and ptr sized ++ addi_d(len_in_bytes, len_in_bytes, -hdr_size_in_bytes); ++ beqz(len_in_bytes, done); ++ ++ // zero_words() takes ptr in t1 and count in bytes in t2 ++ lea(t1, Address(obj, hdr_size_in_bytes)); ++ addi_d(t2, len_in_bytes, -BytesPerWord); ++ ++ Label loop; ++ bind(loop); ++ stx_d(R0, t1, t2); ++ addi_d(t2, t2, -BytesPerWord); ++ bge(t2, R0, loop); ++ ++ bind(done); ++} ++ ++void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, ++ int object_size, Register klass, Label& slow_case) { ++ assert_different_registers(obj, t1, t2); ++ assert(header_size >= 0 && object_size >= header_size, "illegal sizes"); ++ ++ try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case); ++ ++ initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB); ++} ++ ++// Scratch registers: t1 = T0, t2 = T1 ++void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, ++ int con_size_in_bytes, Register t1, Register t2, ++ bool is_tlab_allocated) { ++ assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, ++ "con_size_in_bytes is not multiple of alignment"); ++ const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; ++ ++ initialize_header(obj, klass, noreg, t1, t2); ++ ++ if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { ++ // clear rest of allocated space ++ const Register index = t2; ++ if (var_size_in_bytes != noreg) { ++ move(index, var_size_in_bytes); ++ initialize_body(obj, index, hdr_size_in_bytes, t1, t2); ++ } else if (con_size_in_bytes > hdr_size_in_bytes) { ++ con_size_in_bytes -= hdr_size_in_bytes; ++ lea(t1, Address(obj, hdr_size_in_bytes)); ++ Label loop; ++ li(SCR1, con_size_in_bytes - BytesPerWord); ++ bind(loop); ++ stx_d(R0, t1, SCR1); ++ addi_d(SCR1, SCR1, -BytesPerWord); ++ bge(SCR1, R0, loop); ++ } ++ } ++ ++ membar(StoreStore); ++ ++ if (CURRENT_ENV->dtrace_alloc_probes()) { ++ assert(obj == A0, "must be"); ++ call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type); ++ } ++ ++ verify_oop(obj); ++} ++ ++void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, ++ int header_size, int f, Register klass, Label& slow_case) { ++ assert_different_registers(obj, len, t1, t2, klass); ++ ++ // determine alignment mask ++ assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work"); ++ ++ // check for negative or excessive length ++ li(SCR1, (int32_t)max_array_allocation_length); ++ bge_far(len, SCR1, slow_case, false); ++ ++ const Register arr_size = t2; // okay to be the same ++ // align object end ++ li(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask); ++ slli_w(SCR1, len, f); ++ add_d(arr_size, arr_size, SCR1); ++ bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0); ++ ++ try_allocate(obj, arr_size, 0, t1, t2, slow_case); ++ ++ initialize_header(obj, klass, len, t1, t2); ++ ++ // clear rest of allocated space ++ initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2); ++ ++ membar(StoreStore); ++ ++ if (CURRENT_ENV->dtrace_alloc_probes()) { ++ assert(obj == A0, "must be"); ++ call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type); ++ } ++ ++ verify_oop(obj); ++} ++ ++void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { ++ assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); ++ // Make sure there is enough stack space for this method's activation. ++ // Note that we do this before creating a frame. ++ generate_stack_overflow_check(bang_size_in_bytes); ++ MacroAssembler::build_frame(framesize); ++} ++ ++void C1_MacroAssembler::remove_frame(int framesize) { ++ MacroAssembler::remove_frame(framesize); ++} ++ ++void C1_MacroAssembler::verified_entry() { ++ // If we have to make this method not-entrant we'll overwrite its ++ // first instruction with a jump. For this action to be legal we ++ // must ensure that this first instruction is a b, bl, nop, break. ++ // Make it a NOP. ++ nop(); ++} ++ ++void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { ++ // rbp, + 0: link ++ // + 1: return address ++ // + 2: argument with offset 0 ++ // + 3: argument with offset 1 ++ // + 4: ... ++ ++ ld_ptr(reg, Address(FP, (offset_in_words + 2) * BytesPerWord)); ++} ++ ++#ifndef PRODUCT ++void C1_MacroAssembler::verify_stack_oop(int stack_offset) { ++ if (!VerifyOops) return; ++ verify_oop_addr(Address(SP, stack_offset), "oop"); ++} ++ ++void C1_MacroAssembler::verify_not_null_oop(Register r) { ++ if (!VerifyOops) return; ++ Label not_null; ++ bnez(r, not_null); ++ stop("non-null oop required"); ++ bind(not_null); ++ verify_oop(r); ++} ++ ++void C1_MacroAssembler::invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, ++ bool inv_a3, bool inv_a4, bool inv_a5) { ++#ifdef ASSERT ++ static int nn; ++ if (inv_a0) li(A0, 0xDEAD); ++ if (inv_s0) li(S0, 0xDEAD); ++ if (inv_a2) li(A2, nn++); ++ if (inv_a3) li(A3, 0xDEAD); ++ if (inv_a4) li(A4, 0xDEAD); ++ if (inv_a5) li(A5, 0xDEAD); ++#endif ++} ++#endif // ifndef PRODUCT +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,112 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP ++ ++using MacroAssembler::build_frame; ++using MacroAssembler::null_check; ++ ++// C1_MacroAssembler contains high-level macros for C1 ++ ++ private: ++ int _rsp_offset; // track rsp changes ++ // initialization ++ void pd_init() { _rsp_offset = 0; } ++ ++ public: ++ void try_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ ++ void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2); ++ void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2); ++ ++ // locking ++ // hdr : must be A0, contents destroyed ++ // obj : must point to the object to lock, contents preserved ++ // disp_hdr: must point to the displaced header location, contents preserved ++ // scratch : scratch register, contents destroyed ++ // returns code offset at which to add null check debug information ++ int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); ++ ++ // unlocking ++ // hdr : contents destroyed ++ // obj : must point to the object to lock, contents preserved ++ // disp_hdr: must be A0 & must point to the displaced header location, contents destroyed ++ void unlock_object(Register swap, Register obj, Register lock, Label& slow_case); ++ ++ void initialize_object( ++ Register obj, // result: pointer to object after successful allocation ++ Register klass, // object klass ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB ++ ); ++ ++ // allocation of fixed-size objects ++ // (can also be used to allocate fixed-size arrays, by setting ++ // hdr_size correctly and storing the array length afterwards) ++ // obj : will contain pointer to allocated object ++ // t1, t2 : scratch registers - contents destroyed ++ // header_size: size of object header in words ++ // object_size: total size of object in words ++ // slow_case : exit to slow case implementation if fast allocation fails ++ void allocate_object(Register obj, Register t1, Register t2, int header_size, ++ int object_size, Register klass, Label& slow_case); ++ ++ enum { ++ max_array_allocation_length = 0x00FFFFFF ++ }; ++ ++ // allocation of arrays ++ // obj : will contain pointer to allocated object ++ // len : array length in number of elements ++ // t : scratch register - contents destroyed ++ // header_size: size of object header in words ++ // f : element scale factor ++ // slow_case : exit to slow case implementation if fast allocation fails ++ void allocate_array(Register obj, Register len, Register t, Register t2, int header_size, ++ int f, Register klass, Label& slow_case); ++ ++ int rsp_offset() const { return _rsp_offset; } ++ void set_rsp_offset(int n) { _rsp_offset = n; } ++ ++ void invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, bool inv_a3, ++ bool inv_a4, bool inv_a5) PRODUCT_RETURN; ++ ++ // This platform only uses signal-based null checks. The Label is not needed. ++ void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); } ++ ++ void load_parameter(int offset_in_words, Register reg); ++ ++#endif // CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,1138 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "c1/c1_CodeStubs.hpp" ++#include "c1/c1_Defs.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "compiler/disassembler.hpp" ++#include "compiler/oopMap.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/universe.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "register_loongarch.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframe.hpp" ++#include "runtime/vframeArray.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T5 RT5 ++#define T6 RT6 ++#define T8 RT8 ++ ++// Implementation of StubAssembler ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) { ++ // setup registers ++ assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, ++ "registers must be different"); ++ assert(oop_result1 != TREG && metadata_result != TREG, "registers must be different"); ++ assert(args_size >= 0, "illegal args_size"); ++ bool align_stack = false; ++ ++ move(A0, TREG); ++ set_num_rt_args(0); // Nothing on stack ++ ++ Label retaddr; ++ set_last_Java_frame(SP, FP, retaddr); ++ ++ // do the call ++ call(entry, relocInfo::runtime_call_type); ++ bind(retaddr); ++ int call_offset = offset(); ++ // verify callee-saved register ++#ifdef ASSERT ++ { Label L; ++ get_thread(SCR1); ++ beq(TREG, SCR1, L); ++ stop("StubAssembler::call_RT: TREG not callee saved?"); ++ bind(L); ++ } ++#endif ++ reset_last_Java_frame(true); ++ ++ // check for pending exceptions ++ { Label L; ++ // check for pending exceptions (java_thread is set upon return) ++ ld_ptr(SCR1, Address(TREG, in_bytes(Thread::pending_exception_offset()))); ++ beqz(SCR1, L); ++ // exception pending => remove activation and forward to exception handler ++ // make sure that the vm_results are cleared ++ if (oop_result1->is_valid()) { ++ st_ptr(R0, Address(TREG, JavaThread::vm_result_offset())); ++ } ++ if (metadata_result->is_valid()) { ++ st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset())); ++ } ++ if (frame_size() == no_frame_size) { ++ leave(); ++ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ } else if (_stub_id == Runtime1::forward_exception_id) { ++ should_not_reach_here(); ++ } else { ++ jmp(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type); ++ } ++ bind(L); ++ } ++ // get oop results if there are any and reset the values in the thread ++ if (oop_result1->is_valid()) { ++ get_vm_result(oop_result1, TREG); ++ } ++ if (metadata_result->is_valid()) { ++ get_vm_result_2(metadata_result, TREG); ++ } ++ return call_offset; ++} ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, ++ address entry, Register arg1) { ++ move(A1, arg1); ++ return call_RT(oop_result1, metadata_result, entry, 1); ++} ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, ++ address entry, Register arg1, Register arg2) { ++ if (A1 == arg2) { ++ if (A2 == arg1) { ++ move(SCR1, arg1); ++ move(arg1, arg2); ++ move(arg2, SCR1); ++ } else { ++ move(A2, arg2); ++ move(A1, arg1); ++ } ++ } else { ++ move(A1, arg1); ++ move(A2, arg2); ++ } ++ return call_RT(oop_result1, metadata_result, entry, 2); ++} ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, ++ address entry, Register arg1, Register arg2, Register arg3) { ++ // if there is any conflict use the stack ++ if (arg1 == A2 || arg1 == A3 || ++ arg2 == A1 || arg2 == A3 || ++ arg3 == A1 || arg3 == A2) { ++ addi_d(SP, SP, -4 * wordSize); ++ st_ptr(arg1, Address(SP, 0 * wordSize)); ++ st_ptr(arg2, Address(SP, 1 * wordSize)); ++ st_ptr(arg3, Address(SP, 2 * wordSize)); ++ ld_ptr(arg1, Address(SP, 0 * wordSize)); ++ ld_ptr(arg2, Address(SP, 1 * wordSize)); ++ ld_ptr(arg3, Address(SP, 2 * wordSize)); ++ addi_d(SP, SP, 4 * wordSize); ++ } else { ++ move(A1, arg1); ++ move(A2, arg2); ++ move(A3, arg3); ++ } ++ return call_RT(oop_result1, metadata_result, entry, 3); ++} ++ ++enum return_state_t { ++ does_not_return, requires_return ++}; ++ ++// Implementation of StubFrame ++ ++class StubFrame: public StackObj { ++ private: ++ StubAssembler* _sasm; ++ bool _return_state; ++ ++ public: ++ StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, ++ return_state_t return_state=requires_return); ++ void load_argument(int offset_in_words, Register reg); ++ ++ ~StubFrame(); ++};; ++ ++void StubAssembler::prologue(const char* name, bool must_gc_arguments) { ++ set_info(name, must_gc_arguments); ++ enter(); ++} ++ ++void StubAssembler::epilogue() { ++ leave(); ++ jr(RA); ++} ++ ++#define __ _sasm-> ++ ++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, ++ return_state_t return_state) { ++ _sasm = sasm; ++ _return_state = return_state; ++ __ prologue(name, must_gc_arguments); ++} ++ ++// load parameters that were stored with LIR_Assembler::store_parameter ++// Note: offsets for store_parameter and load_argument must match ++void StubFrame::load_argument(int offset_in_words, Register reg) { ++ __ load_parameter(offset_in_words, reg); ++} ++ ++StubFrame::~StubFrame() { ++ if (_return_state == requires_return) { ++ __ epilogue(); ++ } else { ++ __ should_not_reach_here(); ++ } ++} ++ ++#undef __ ++ ++// Implementation of Runtime1 ++ ++#define __ sasm-> ++ ++const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2; ++ ++// Stack layout for saving/restoring all the registers needed during a runtime ++// call (this includes deoptimization) ++// Note: note that users of this frame may well have arguments to some runtime ++// while these values are on the stack. These positions neglect those arguments ++// but the code in save_live_registers will take the argument count into ++// account. ++// ++ ++enum reg_save_layout { ++ reg_save_frame_size = 32 /* float */ + 30 /* integer, except zr, tp */ ++}; ++ ++// Save off registers which might be killed by calls into the runtime. ++// Tries to smart of about FP registers. In particular we separate ++// saving and describing the FPU registers for deoptimization since we ++// have to save the FPU registers twice if we describe them. The ++// deopt blob is the only thing which needs to describe FPU registers. ++// In all other cases it should be sufficient to simply save their ++// current value. ++ ++static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs]; ++static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs]; ++static int reg_save_size_in_words; ++static int frame_size_in_bytes = -1; ++ ++static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { ++ int frame_size_in_bytes = reg_save_frame_size * BytesPerWord; ++ sasm->set_frame_size(frame_size_in_bytes / BytesPerWord); ++ int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size; ++ OopMap* oop_map = new OopMap(frame_size_in_slots, 0); ++ ++ for (int i = A0->encoding(); i <= T8->encoding(); i++) { ++ Register r = as_Register(i); ++ if (i != SCR1->encoding() && i != SCR2->encoding()) { ++ int sp_offset = cpu_reg_save_offsets[i]; ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); ++ } ++ } ++ ++ if (save_fpu_registers) { ++ for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { ++ FloatRegister r = as_FloatRegister(i); ++ int sp_offset = fpu_reg_save_offsets[i]; ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); ++ } ++ } ++ ++ return oop_map; ++} ++ ++static OopMap* save_live_registers(StubAssembler* sasm, ++ bool save_fpu_registers = true) { ++ __ block_comment("save_live_registers"); ++ ++ // integer registers except zr & ra & tp & sp ++ __ addi_d(SP, SP, -(32 - 4 + 32) * wordSize); ++ ++ for (int i = 4; i < 32; i++) ++ __ st_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); ++ ++ if (save_fpu_registers) { ++ for (int i = 0; i < 32; i++) ++ __ fst_d(as_FloatRegister(i), Address(SP, i * wordSize)); ++ } ++ ++ return generate_oop_map(sasm, save_fpu_registers); ++} ++ ++static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) { ++ if (restore_fpu_registers) { ++ for (int i = 0; i < 32; i ++) ++ __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize)); ++ } ++ ++ for (int i = 4; i < 32; i++) ++ __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); ++ ++ __ addi_d(SP, SP, (32 - 4 + 32) * wordSize); ++} ++ ++static void restore_live_registers_except_a0(StubAssembler* sasm, bool restore_fpu_registers = true) { ++ if (restore_fpu_registers) { ++ for (int i = 0; i < 32; i ++) ++ __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize)); ++ } ++ ++ for (int i = 5; i < 32; i++) ++ __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); ++ ++ __ addi_d(SP, SP, (32 - 4 + 32) * wordSize); ++} ++ ++void Runtime1::initialize_pd() { ++ int sp_offset = 0; ++ int i; ++ ++ // all float registers are saved explicitly ++ assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here"); ++ for (i = 0; i < FrameMap::nof_fpu_regs; i++) { ++ fpu_reg_save_offsets[i] = sp_offset; ++ sp_offset += 2; // SP offsets are in halfwords ++ } ++ ++ for (i = 4; i < FrameMap::nof_cpu_regs; i++) { ++ Register r = as_Register(i); ++ cpu_reg_save_offsets[i] = sp_offset; ++ sp_offset += 2; // SP offsets are in halfwords ++ } ++} ++ ++// target: the entry point of the method that creates and posts the exception oop ++// has_argument: true if the exception needs arguments (passed in SCR1 and SCR2) ++ ++OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, ++ bool has_argument) { ++ // make a frame and preserve the caller's caller-save registers ++ OopMap* oop_map = save_live_registers(sasm); ++ int call_offset; ++ if (!has_argument) { ++ call_offset = __ call_RT(noreg, noreg, target); ++ } else { ++ __ move(A1, SCR1); ++ __ move(A2, SCR2); ++ call_offset = __ call_RT(noreg, noreg, target); ++ } ++ OopMapSet* oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ return oop_maps; ++} ++ ++OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { ++ __ block_comment("generate_handle_exception"); ++ ++ // incoming parameters ++ const Register exception_oop = A0; ++ const Register exception_pc = A1; ++ // other registers used in this stub ++ ++ // Save registers, if required. ++ OopMapSet* oop_maps = new OopMapSet(); ++ OopMap* oop_map = NULL; ++ switch (id) { ++ case forward_exception_id: ++ // We're handling an exception in the context of a compiled frame. ++ // The registers have been saved in the standard places. Perform ++ // an exception lookup in the caller and dispatch to the handler ++ // if found. Otherwise unwind and dispatch to the callers ++ // exception handler. ++ oop_map = generate_oop_map(sasm, 1 /*thread*/); ++ ++ // load and clear pending exception oop into A0 ++ __ ld_ptr(exception_oop, Address(TREG, Thread::pending_exception_offset())); ++ __ st_ptr(R0, Address(TREG, Thread::pending_exception_offset())); ++ ++ // load issuing PC (the return address for this stub) into A1 ++ __ ld_ptr(exception_pc, Address(FP, 1 * BytesPerWord)); ++ ++ // make sure that the vm_results are cleared (may be unnecessary) ++ __ st_ptr(R0, Address(TREG, JavaThread::vm_result_offset())); ++ __ st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset())); ++ break; ++ case handle_exception_nofpu_id: ++ case handle_exception_id: ++ // At this point all registers MAY be live. ++ oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id); ++ break; ++ case handle_exception_from_callee_id: { ++ // At this point all registers except exception oop (A0) and ++ // exception pc (RA) are dead. ++ const int frame_size = 2 /*fp, return address*/; ++ oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0); ++ sasm->set_frame_size(frame_size); ++ break; ++ } ++ default: ShouldNotReachHere(); ++ } ++ ++ // verify that only A0 and A1 are valid at this time ++ __ invalidate_registers(false, true, true, true, true, true); ++ // verify that A0 contains a valid exception ++ __ verify_not_null_oop(exception_oop); ++ ++#ifdef ASSERT ++ // check that fields in JavaThread for exception oop and issuing pc are ++ // empty before writing to them ++ Label oop_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset())); ++ __ beqz(SCR1, oop_empty); ++ __ stop("exception oop already set"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); ++ __ beqz(SCR1, pc_empty); ++ __ stop("exception pc already set"); ++ __ bind(pc_empty); ++#endif ++ ++ // save exception oop and issuing pc into JavaThread ++ // (exception handler will load it from here) ++ __ st_ptr(exception_oop, Address(TREG, JavaThread::exception_oop_offset())); ++ __ st_ptr(exception_pc, Address(TREG, JavaThread::exception_pc_offset())); ++ ++ // patch throwing pc into return address (has bci & oop map) ++ __ st_ptr(exception_pc, Address(FP, 1 * BytesPerWord)); ++ ++ // compute the exception handler. ++ // the exception oop and the throwing pc are read from the fields in JavaThread ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc)); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ ++ // A0: handler address ++ // will be the deopt blob if nmethod was deoptimized while we looked up ++ // handler regardless of whether handler existed in the nmethod. ++ ++ // only A0 is valid at this time, all other registers have been destroyed by the runtime call ++ __ invalidate_registers(false, true, true, true, true, true); ++ ++ // patch the return address, this stub will directly return to the exception handler ++ __ st_ptr(A0, Address(FP, 1 * BytesPerWord)); ++ ++ switch (id) { ++ case forward_exception_id: ++ case handle_exception_nofpu_id: ++ case handle_exception_id: ++ // Restore the registers that were saved at the beginning. ++ restore_live_registers(sasm, id != handle_exception_nofpu_id); ++ break; ++ case handle_exception_from_callee_id: ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ return oop_maps; ++} ++ ++void Runtime1::generate_unwind_exception(StubAssembler *sasm) { ++ // incoming parameters ++ const Register exception_oop = A0; ++ // callee-saved copy of exception_oop during runtime call ++ const Register exception_oop_callee_saved = S0; ++ // other registers used in this stub ++ const Register exception_pc = A1; ++ const Register handler_addr = A3; ++ ++ // verify that only A0, is valid at this time ++ __ invalidate_registers(false, true, true, true, true, true); ++ ++#ifdef ASSERT ++ // check that fields in JavaThread for exception oop and issuing pc are empty ++ Label oop_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset())); ++ __ beqz(SCR1, oop_empty); ++ __ stop("exception oop must be empty"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); ++ __ beqz(SCR1, pc_empty); ++ __ stop("exception pc must be empty"); ++ __ bind(pc_empty); ++#endif ++ ++ // Save our return address because ++ // exception_handler_for_return_address will destroy it. We also ++ // save exception_oop ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(RA, Address(SP, 0 * wordSize)); ++ __ st_ptr(exception_oop, Address(SP, 1 * wordSize)); ++ ++ // search the exception handler address of the caller (using the return address) ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), TREG, RA); ++ // V0: exception handler address of the caller ++ ++ // Only V0 is valid at this time; all other registers have been ++ // destroyed by the call. ++ __ invalidate_registers(false, true, true, true, false, true); ++ ++ // move result of call into correct register ++ __ move(handler_addr, A0); ++ ++ // get throwing pc (= return address). ++ // RA has been destroyed by the call ++ __ ld_ptr(RA, Address(SP, 0 * wordSize)); ++ __ ld_ptr(exception_oop, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ __ move(A1, RA); ++ ++ __ verify_not_null_oop(exception_oop); ++ ++ // continue at exception handler (return address removed) ++ // note: do *not* remove arguments when unwinding the ++ // activation since the caller assumes having ++ // all arguments on the stack when entering the ++ // runtime to determine the exception handler ++ // (GC happens at call site with arguments!) ++ // A0: exception oop ++ // A1: throwing pc ++ // A3: exception handler ++ __ jr(handler_addr); ++} ++ ++OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { ++ // use the maximum number of runtime-arguments here because it is difficult to ++ // distinguish each RT-Call. ++ // Note: This number affects also the RT-Call in generate_handle_exception because ++ // the oop-map is shared for all calls. ++ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); ++ assert(deopt_blob != NULL, "deoptimization blob must have been created"); ++ ++ OopMap* oop_map = save_live_registers(sasm); ++ ++ __ move(A0, TREG); ++ Label retaddr; ++ __ set_last_Java_frame(SP, FP, retaddr); ++ // do the call ++ __ call(target, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ OopMapSet* oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(__ offset(), oop_map); ++ // verify callee-saved register ++#ifdef ASSERT ++ { Label L; ++ __ get_thread(SCR1); ++ __ beq(TREG, SCR1, L); ++ __ stop("StubAssembler::call_RT: rthread not callee saved?"); ++ __ bind(L); ++ } ++#endif ++ ++ __ reset_last_Java_frame(true); ++ ++#ifdef ASSERT ++ // check that fields in JavaThread for exception oop and issuing pc are empty ++ Label oop_empty; ++ __ ld_ptr(SCR1, Address(TREG, Thread::pending_exception_offset())); ++ __ beqz(SCR1, oop_empty); ++ __ stop("exception oop must be empty"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); ++ __ beqz(SCR1, pc_empty); ++ __ stop("exception pc must be empty"); ++ __ bind(pc_empty); ++#endif ++ ++ // Runtime will return true if the nmethod has been deoptimized, this is the ++ // expected scenario and anything else is an error. Note that we maintain a ++ // check on the result purely as a defensive measure. ++ Label no_deopt; ++ __ beqz(A0, no_deopt); // Have we deoptimized? ++ ++ // Perform a re-execute. The proper return address is already on the stack, ++ // we just need to restore registers, pop all of our frame but the return ++ // address and jump to the deopt blob. ++ restore_live_registers(sasm); ++ __ leave(); ++ __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); ++ ++ __ bind(no_deopt); ++ __ stop("deopt not performed"); ++ ++ return oop_maps; ++} ++ ++OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { ++ // for better readability ++ const bool must_gc_arguments = true; ++ const bool dont_gc_arguments = false; ++ ++ // default value; overwritten for some optimized stubs that are called ++ // from methods that do not use the fpu ++ bool save_fpu_registers = true; ++ ++ // stub code & info for the different stubs ++ OopMapSet* oop_maps = NULL; ++ OopMap* oop_map = NULL; ++ switch (id) { ++ { ++ case forward_exception_id: ++ { ++ oop_maps = generate_handle_exception(id, sasm); ++ __ leave(); ++ __ jr(RA); ++ } ++ break; ++ ++ case throw_div0_exception_id: ++ { ++ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); ++ } ++ break; ++ ++ case throw_null_pointer_exception_id: ++ { ++ StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); ++ } ++ break; ++ ++ case new_instance_id: ++ case fast_new_instance_id: ++ case fast_new_instance_init_check_id: ++ { ++ Register klass = A3; // Incoming ++ Register obj = A0; // Result ++ ++ if (id == new_instance_id) { ++ __ set_info("new_instance", dont_gc_arguments); ++ } else if (id == fast_new_instance_id) { ++ __ set_info("fast new_instance", dont_gc_arguments); ++ } else { ++ assert(id == fast_new_instance_init_check_id, "bad StubID"); ++ __ set_info("fast new_instance init check", dont_gc_arguments); ++ } ++ ++ // If TLAB is disabled, see if there is support for inlining contiguous ++ // allocations. ++ // Otherwise, just go to the slow path. ++ if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && ++ !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { ++ Label slow_path; ++ Register obj_size = S0; ++ Register t1 = T0; ++ Register t2 = T1; ++ assert_different_registers(klass, obj, obj_size, t1, t2); ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(S0, Address(SP, 0)); ++ ++ if (id == fast_new_instance_init_check_id) { ++ // make sure the klass is initialized ++ __ ld_bu(SCR1, Address(klass, InstanceKlass::init_state_offset())); ++ __ li(SCR2, InstanceKlass::fully_initialized); ++ __ bne_far(SCR1, SCR2, slow_path); ++ } ++ ++#ifdef ASSERT ++ // assert object can be fast path allocated ++ { ++ Label ok, not_ok; ++ __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset())); ++ __ bge(R0, obj_size, not_ok); // make sure it's an instance (LH > 0) ++ __ andi(SCR1, obj_size, Klass::_lh_instance_slow_path_bit); ++ __ beqz(SCR1, ok); ++ __ bind(not_ok); ++ __ stop("assert(can be fast path allocated)"); ++ __ should_not_reach_here(); ++ __ bind(ok); ++ } ++#endif // ASSERT ++ ++ // get the instance size (size is postive so movl is fine for 64bit) ++ __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset())); ++ ++ __ eden_allocate(obj, obj_size, 0, t1, slow_path); ++ ++ __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false); ++ __ verify_oop(obj); ++ __ ld_ptr(S0, Address(SP, 0)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ __ jr(RA); ++ ++ __ bind(slow_path); ++ __ ld_ptr(S0, Address(SP, 0)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ } ++ ++ __ enter(); ++ OopMap* map = save_live_registers(sasm); ++ int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers_except_a0(sasm); ++ __ verify_oop(obj); ++ __ leave(); ++ __ jr(RA); ++ ++ // A0,: new instance ++ } ++ ++ break; ++ ++ case counter_overflow_id: ++ { ++ Register bci = A0, method = A1; ++ __ enter(); ++ OopMap* map = save_live_registers(sasm); ++ // Retrieve bci ++ __ ld_w(bci, Address(FP, 2 * BytesPerWord)); ++ // And a pointer to the Method* ++ __ ld_d(method, Address(FP, 3 * BytesPerWord)); ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm); ++ __ leave(); ++ __ jr(RA); ++ } ++ break; ++ ++ case new_type_array_id: ++ case new_object_array_id: ++ { ++ Register length = S0; // Incoming ++ Register klass = A3; // Incoming ++ Register obj = A0; // Result ++ ++ if (id == new_type_array_id) { ++ __ set_info("new_type_array", dont_gc_arguments); ++ } else { ++ __ set_info("new_object_array", dont_gc_arguments); ++ } ++ ++#ifdef ASSERT ++ // assert object type is really an array of the proper kind ++ { ++ Label ok; ++ Register t0 = obj; ++ __ ld_w(t0, Address(klass, Klass::layout_helper_offset())); ++ __ srai_w(t0, t0, Klass::_lh_array_tag_shift); ++ int tag = ((id == new_type_array_id) ++ ? Klass::_lh_array_tag_type_value ++ : Klass::_lh_array_tag_obj_value); ++ __ li(SCR1, tag); ++ __ beq(t0, SCR1, ok); ++ __ stop("assert(is an array klass)"); ++ __ should_not_reach_here(); ++ __ bind(ok); ++ } ++#endif // ASSERT ++ ++ // If TLAB is disabled, see if there is support for inlining contiguous ++ // allocations. ++ // Otherwise, just go to the slow path. ++ if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { ++ Register arr_size = A5; ++ Register t1 = T0; ++ Register t2 = T1; ++ Label slow_path; ++ assert_different_registers(length, klass, obj, arr_size, t1, t2); ++ ++ // check that array length is small enough for fast path. ++ __ li(SCR1, C1_MacroAssembler::max_array_allocation_length); ++ __ blt_far(SCR1, length, slow_path, false); ++ ++ // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) ++ // since size is positive ldrw does right thing on 64bit ++ __ ld_w(t1, Address(klass, Klass::layout_helper_offset())); ++ // since size is positive movw does right thing on 64bit ++ __ move(arr_size, length); ++ __ sll_w(arr_size, length, t1); ++ __ bstrpick_d(t1, t1, Klass::_lh_header_size_shift + ++ exact_log2(Klass::_lh_header_size_mask + 1) - 1, ++ Klass::_lh_header_size_shift); ++ __ add_d(arr_size, arr_size, t1); ++ __ addi_d(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up ++ __ bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0); ++ ++ __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size ++ ++ __ initialize_header(obj, klass, length, t1, t2); ++ __ ld_bu(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); ++ assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); ++ assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); ++ __ andi(t1, t1, Klass::_lh_header_size_mask); ++ __ sub_d(arr_size, arr_size, t1); // body length ++ __ add_d(t1, t1, obj); // body start ++ __ initialize_body(t1, arr_size, 0, t1, t2); ++ __ membar(Assembler::StoreStore); ++ __ verify_oop(obj); ++ ++ __ jr(RA); ++ ++ __ bind(slow_path); ++ } ++ ++ __ enter(); ++ OopMap* map = save_live_registers(sasm); ++ int call_offset; ++ if (id == new_type_array_id) { ++ call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); ++ } else { ++ call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); ++ } ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers_except_a0(sasm); ++ ++ __ verify_oop(obj); ++ __ leave(); ++ __ jr(RA); ++ ++ // A0: new array ++ } ++ break; ++ ++ case new_multi_array_id: ++ { ++ StubFrame f(sasm, "new_multi_array", dont_gc_arguments); ++ // A0,: klass ++ // S0,: rank ++ // A2: address of 1st dimension ++ OopMap* map = save_live_registers(sasm); ++ __ move(A1, A0); ++ __ move(A3, A2); ++ __ move(A2, S0); ++ int call_offset = __ call_RT(A0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), A1, A2, A3); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers_except_a0(sasm); ++ ++ // A0,: new multi array ++ __ verify_oop(A0); ++ } ++ break; ++ ++ case register_finalizer_id: ++ { ++ __ set_info("register_finalizer", dont_gc_arguments); ++ ++ // This is called via call_runtime so the arguments ++ // will be place in C abi locations ++ ++ __ verify_oop(A0); ++ ++ // load the klass and check the has finalizer flag ++ Label register_finalizer; ++ Register t = A5; ++ __ load_klass(t, A0); ++ __ ld_w(t, Address(t, Klass::access_flags_offset())); ++ __ li(SCR1, JVM_ACC_HAS_FINALIZER); ++ __ andr(SCR1, t, SCR1); ++ __ bnez(SCR1, register_finalizer); ++ __ jr(RA); ++ ++ __ bind(register_finalizer); ++ __ enter(); ++ OopMap* oop_map = save_live_registers(sasm); ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), A0); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ ++ // Now restore all the live registers ++ restore_live_registers(sasm); ++ ++ __ leave(); ++ __ jr(RA); ++ } ++ break; ++ ++ case throw_class_cast_exception_id: ++ { ++ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); ++ } ++ break; ++ ++ case throw_incompatible_class_change_error_id: ++ { ++ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); ++ } ++ break; ++ ++ case slow_subtype_check_id: ++ { ++ // Typical calling sequence: ++ // __ push(klass_RInfo); // object klass or other subclass ++ // __ push(sup_k_RInfo); // array element klass or other superclass ++ // __ bl(slow_subtype_check); ++ // Note that the subclass is pushed first, and is therefore deepest. ++ enum layout { ++ a0_off, a0_off_hi, ++ a2_off, a2_off_hi, ++ a4_off, a4_off_hi, ++ a5_off, a5_off_hi, ++ sup_k_off, sup_k_off_hi, ++ klass_off, klass_off_hi, ++ framesize, ++ result_off = sup_k_off ++ }; ++ ++ __ set_info("slow_subtype_check", dont_gc_arguments); ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); ++ __ st_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); ++ __ st_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); ++ __ st_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); ++ ++ // This is called by pushing args and not with C abi ++ __ ld_ptr(A4, Address(SP, klass_off * VMRegImpl::stack_slot_size)); // subclass ++ __ ld_ptr(A0, Address(SP, sup_k_off * VMRegImpl::stack_slot_size)); // superclass ++ ++ Label miss; ++ __ check_klass_subtype_slow_path(A4, A0, A2, A5, NULL, &miss); ++ ++ // fallthrough on success: ++ __ li(SCR1, 1); ++ __ st_ptr(SCR1, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result ++ __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); ++ __ addi_d(SP, SP, 4 * wordSize); ++ __ jr(RA); ++ ++ __ bind(miss); ++ __ st_ptr(R0, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result ++ __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); ++ __ addi_d(SP, SP, 4 * wordSize); ++ __ jr(RA); ++ } ++ break; ++ ++ case monitorenter_nofpu_id: ++ save_fpu_registers = false; ++ // fall through ++ case monitorenter_id: ++ { ++ StubFrame f(sasm, "monitorenter", dont_gc_arguments); ++ OopMap* map = save_live_registers(sasm, save_fpu_registers); ++ ++ // Called with store_parameter and not C abi ++ ++ f.load_argument(1, A0); // A0,: object ++ f.load_argument(0, A1); // A1,: lock address ++ ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), A0, A1); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm, save_fpu_registers); ++ } ++ break; ++ ++ case monitorexit_nofpu_id: ++ save_fpu_registers = false; ++ // fall through ++ case monitorexit_id: ++ { ++ StubFrame f(sasm, "monitorexit", dont_gc_arguments); ++ OopMap* map = save_live_registers(sasm, save_fpu_registers); ++ ++ // Called with store_parameter and not C abi ++ ++ f.load_argument(0, A0); // A0,: lock address ++ ++ // note: really a leaf routine but must setup last java sp ++ // => use call_RT for now (speed can be improved by ++ // doing last java sp setup manually) ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), A0); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm, save_fpu_registers); ++ } ++ break; ++ ++ case deoptimize_id: ++ { ++ StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return); ++ OopMap* oop_map = save_live_registers(sasm); ++ f.load_argument(0, A1); ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), A1); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ restore_live_registers(sasm); ++ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); ++ assert(deopt_blob != NULL, "deoptimization blob must have been created"); ++ __ leave(); ++ __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); ++ } ++ break; ++ ++ case throw_range_check_failed_id: ++ { ++ StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); ++ } ++ break; ++ ++ case unwind_exception_id: ++ { ++ __ set_info("unwind_exception", dont_gc_arguments); ++ // note: no stubframe since we are about to leave the current ++ // activation and we are calling a leaf VM function only. ++ generate_unwind_exception(sasm); ++ } ++ break; ++ ++ case access_field_patching_id: ++ { ++ StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); ++ } ++ break; ++ ++ case load_klass_patching_id: ++ { ++ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); ++ } ++ break; ++ ++ case load_mirror_patching_id: ++ { ++ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); ++ } ++ break; ++ ++ case load_appendix_patching_id: ++ { ++ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); ++ } ++ break; ++ ++ case handle_exception_nofpu_id: ++ case handle_exception_id: ++ { ++ StubFrame f(sasm, "handle_exception", dont_gc_arguments); ++ oop_maps = generate_handle_exception(id, sasm); ++ } ++ break; ++ ++ case handle_exception_from_callee_id: ++ { ++ StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments); ++ oop_maps = generate_handle_exception(id, sasm); ++ } ++ break; ++ ++ case throw_index_exception_id: ++ { ++ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); ++ } ++ break; ++ ++ case throw_array_store_exception_id: ++ { ++ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return); ++ // tos + 0: link ++ // + 1: return address ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); ++ } ++ break; ++ ++ case predicate_failed_trap_id: ++ { ++ StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return); ++ ++ OopMap* map = save_live_registers(sasm); ++ ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm); ++ __ leave(); ++ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); ++ assert(deopt_blob != NULL, "deoptimization blob must have been created"); ++ ++ __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); ++ } ++ break; ++ ++ case dtrace_object_alloc_id: ++ { ++ // A0: object ++ StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); ++ save_live_registers(sasm); ++ ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), A0); ++ ++ restore_live_registers(sasm); ++ } ++ break; ++ ++ default: ++ { ++ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return); ++ __ li(A0, (int)id); ++ __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A0); ++ } ++ break; ++ } ++ } ++ return oop_maps; ++} ++ ++#undef __ ++ ++const char *Runtime1::pd_name_for_address(address entry) { ++ Unimplemented(); ++ return 0; ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,94 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the server compiler. ++// (see c2_globals.hpp). Alpha-sorted. ++define_pd_global(bool, BackgroundCompilation, true); ++define_pd_global(bool, UseTLAB, true); ++define_pd_global(bool, ResizeTLAB, true); ++define_pd_global(bool, CICompileOSR, true); ++define_pd_global(bool, InlineIntrinsics, true); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, true); ++define_pd_global(bool, UseOnStackReplacement, true); ++#ifdef CC_INTERP ++define_pd_global(bool, ProfileInterpreter, false); ++#else ++define_pd_global(bool, ProfileInterpreter, true); ++#endif // CC_INTERP ++define_pd_global(bool, TieredCompilation, true); ++define_pd_global(intx, CompileThreshold, 10000); ++define_pd_global(intx, BackEdgeThreshold, 100000); ++ ++define_pd_global(intx, OnStackReplacePercentage, 140); ++define_pd_global(intx, ConditionalMoveLimit, 3); ++define_pd_global(intx, FLOATPRESSURE, 6); ++define_pd_global(intx, FreqInlineSize, 325); ++define_pd_global(intx, MinJumpTableSize, 10); ++define_pd_global(intx, INTPRESSURE, 13); ++define_pd_global(intx, InteriorEntryAlignment, 16); ++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); ++define_pd_global(intx, LoopUnrollLimit, 60); ++define_pd_global(intx, LoopPercentProfileLimit, 10); ++// InitialCodeCacheSize derived from specjbb2000 run. ++define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize ++define_pd_global(intx, CodeCacheExpansionSize, 64*K); ++ ++// Ergonomics related flags ++define_pd_global(uint64_t,MaxRAM, 128ULL*G); ++define_pd_global(intx, RegisterCostAreaRatio, 16000); ++ ++// Peephole and CISC spilling both break the graph, and so makes the ++// scheduler sick. ++define_pd_global(bool, OptoPeephole, false); ++define_pd_global(bool, UseCISCSpill, false); ++define_pd_global(bool, OptoScheduling, false); ++define_pd_global(bool, OptoBundling, false); ++define_pd_global(bool, OptoRegScheduling, false); ++define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); ++define_pd_global(bool, IdealizeClearArrayNode, true); ++ ++define_pd_global(intx, ReservedCodeCacheSize, 48*M); ++define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); ++define_pd_global(intx, ProfiledCodeHeapSize, 22*M); ++define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); ++define_pd_global(uintx, CodeCacheMinBlockLength, 4); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++ ++define_pd_global(bool, TrapBasedRangeChecks, false); ++ ++// Heap related flags ++define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); ++ ++// Ergonomics related flags ++define_pd_global(bool, NeverActAsServerClassMachine, false); ++ ++#endif // CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "opto/compile.hpp" ++#include "opto/node.hpp" ++ ++// processor dependent initialization for LoongArch ++ ++extern void reg_mask_init(); ++ ++void Compile::pd_compiler2_init() { ++ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); ++ reg_mask_init(); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP ++#define CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP ++ ++private: ++ void pd_initialize() {} ++ ++public: ++ void flush_bundle(bool start_new_bundle) {} ++ ++#endif // CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,148 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/compiledIC.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nmethod.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/safepoint.hpp" ++ ++// ---------------------------------------------------------------------------- ++ ++#define __ _masm. ++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { ++ precond(cbuf.stubs()->start() != badAddress); ++ precond(cbuf.stubs()->end() != badAddress); ++ ++ if (mark == NULL) { ++ mark = cbuf.insts_mark(); // get mark within main instrs section ++ } ++ ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a stub. ++ MacroAssembler _masm(&cbuf); ++ ++ address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); ++ if (base == NULL) return NULL; // CodeBuffer::expand failed ++ // static stub relocation stores the instruction address of the call ++ ++ __ relocate(static_stub_Relocation::spec(mark), 0); ++ ++ // Code stream for loading method may be changed. ++ __ ibar(0); ++ ++ // Rmethod contains methodOop, it should be relocated for GC ++ // static stub relocation also tags the methodOop in the code-stream. ++ __ mov_metadata(Rmethod, NULL); ++ // This is recognized as unresolved by relocs/nativeInst/ic code ++ ++ cbuf.set_insts_mark(); ++ __ patchable_jump(__ pc()); ++ // Update current stubs pointer and restore code_end. ++ __ end_a_stub(); ++ return base; ++} ++#undef __ ++ ++int CompiledStaticCall::to_interp_stub_size() { ++ return NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeGeneralJump::instruction_size; ++} ++ ++int CompiledStaticCall::to_trampoline_stub_size() { ++ return NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size; ++} ++ ++// Relocation entries for call stub, compiled java to interpreter. ++int CompiledStaticCall::reloc_to_interp_stub() { ++ return 16; ++} ++ ++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { ++ address stub = find_stub(false /* is_aot */); ++ guarantee(stub != NULL, "stub not found"); ++ ++ if (TraceICs) { ++ ResourceMark rm; ++ tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", ++ p2i(instruction_address()), ++ callee->name_and_sig_as_C_string()); ++ } ++ ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ ++ assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), ++ "a) MT-unsafe modification of inline cache"); ++ assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, ++ "b) MT-unsafe modification of inline cache"); ++ ++ // Update stub. ++ method_holder->set_data((intptr_t)callee()); ++ jump->set_jump_destination(entry); ++ ++ // Update jump to call. ++ set_destination_mt_safe(stub); ++} ++ ++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { ++ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); ++ // Reset stub. ++ address stub = static_stub->addr(); ++ assert(stub != NULL, "stub not found"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ method_holder->set_data(0); ++ jump->set_jump_destination(jump->instruction_address()); ++} ++ ++//----------------------------------------------------------------------------- ++// Non-product mode code ++#ifndef PRODUCT ++ ++void CompiledDirectStaticCall::verify() { ++ // Verify call. ++ _call->verify(); ++ if (os::is_MP()) { ++ _call->verify_alignment(); ++ } ++ ++ // Verify stub. ++ address stub = find_stub(false /* is_aot */); ++ assert(stub != NULL, "no stub found for static call"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ ++ ++ // Verify state. ++ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); ++} ++ ++#endif // !PRODUCT +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/copy_loongarch.hpp b/src/hotspot/cpu/loongarch/copy_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/copy_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/copy_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_COPY_LOONGARCH_HPP ++#define CPU_LOONGARCH_COPY_LOONGARCH_HPP ++ ++// Inline functions for memory copy and fill. ++ ++// Contains inline asm implementations ++#include OS_CPU_HEADER_INLINE(copy) ++ ++// Template for atomic, element-wise copy. ++template ++static void copy_conjoint_atomic(const T* from, T* to, size_t count) { ++ if (from > to) { ++ while (count-- > 0) { ++ // Copy forwards ++ *to++ = *from++; ++ } ++ } else { ++ from += count - 1; ++ to += count - 1; ++ while (count-- > 0) { ++ // Copy backwards ++ *to-- = *from--; ++ } ++ } ++} ++ ++ ++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { ++ julong* to = (julong*) tohw; ++ julong v = ((julong) value << 32) | value; ++ while (count-- > 0) { ++ *to++ = v; ++ } ++} ++ ++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { ++ pd_fill_to_words(tohw, count, value); ++} ++ ++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { ++ (void)memset(to, value, count); ++} ++ ++static void pd_zero_to_words(HeapWord* tohw, size_t count) { ++ pd_fill_to_words(tohw, count, 0); ++} ++ ++static void pd_zero_to_bytes(void* to, size_t count) { ++ (void)memset(to, 0, count); ++} ++ ++#endif //CPU_LOONGARCH_COPY_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp b/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "compiler/disassembler.hpp" ++#include "depChecker_loongarch.hpp" ++ ++// Nothing to do on LoongArch +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp b/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP ++#define CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP ++ ++// Nothing to do on LoongArch ++ ++#endif // CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP ++ ++ static int pd_instruction_alignment() { ++ return sizeof(int); ++ } ++ ++ static const char* pd_cpu_opts() { ++ return "gpr-names=64"; ++ } ++ ++#endif // CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/frame_loongarch.cpp b/src/hotspot/cpu/loongarch/frame_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/frame_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/frame_loongarch.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,690 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/markOop.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/monitorChunk.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#ifdef ASSERT ++void RegisterMap::check_location_valid() { ++} ++#endif ++ ++ ++// Profiling/safepoint support ++// for Profiling - acting on another frame. walks sender frames ++// if valid. ++// frame profile_find_Java_sender_frame(JavaThread *thread); ++ ++bool frame::safe_for_sender(JavaThread *thread) { ++ address sp = (address)_sp; ++ address fp = (address)_fp; ++ address unextended_sp = (address)_unextended_sp; ++ ++ // consider stack guards when trying to determine "safe" stack pointers ++ static size_t stack_guard_size = os::uses_stack_guard_pages() ? ++ JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size() : 0; ++ size_t usable_stack_size = thread->stack_size() - stack_guard_size; ++ ++ // sp must be within the usable part of the stack (not in guards) ++ bool sp_safe = (sp < thread->stack_base()) && ++ (sp >= thread->stack_base() - usable_stack_size); ++ ++ ++ if (!sp_safe) { ++ return false; ++ } ++ ++ // unextended sp must be within the stack and above or equal sp ++ bool unextended_sp_safe = (unextended_sp < thread->stack_base()) && ++ (unextended_sp >= sp); ++ ++ if (!unextended_sp_safe) { ++ return false; ++ } ++ ++ // an fp must be within the stack and above (but not equal) sp ++ // second evaluation on fp+ is added to handle situation where fp is -1 ++ bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (java_frame_return_addr_offset * sizeof(void*))) < thread->stack_base()))); ++ ++ // We know sp/unextended_sp are safe only fp is questionable here ++ ++ // If the current frame is known to the code cache then we can attempt to ++ // construct the sender and do some validation of it. This goes a long way ++ // toward eliminating issues when we get in frame construction code ++ ++ if (_cb != NULL ) { ++ ++ // First check if frame is complete and tester is reliable ++ // Unfortunately we can only check frame complete for runtime stubs and nmethod ++ // other generic buffer blobs are more problematic so we just assume they are ++ // ok. adapter blobs never have a frame complete and are never ok. ++ ++ if (!_cb->is_frame_complete_at(_pc)) { ++ if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { ++ return false; ++ } ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!_cb->code_contains(_pc)) { ++ return false; ++ } ++ ++ // Entry frame checks ++ if (is_entry_frame()) { ++ // an entry frame must have a valid fp. ++ return fp_safe && is_entry_frame_valid(thread); ++ } ++ ++ intptr_t* sender_sp = NULL; ++ intptr_t* sender_unextended_sp = NULL; ++ address sender_pc = NULL; ++ intptr_t* saved_fp = NULL; ++ ++ if (is_interpreted_frame()) { ++ // fp must be safe ++ if (!fp_safe) { ++ return false; ++ } ++ ++ sender_pc = (address) this->fp()[java_frame_return_addr_offset]; ++ // for interpreted frames, the value below is the sender "raw" sp, ++ // which can be different from the sender unextended sp (the sp seen ++ // by the sender) because of current frame local variables ++ sender_sp = (intptr_t*) addr_at(java_frame_sender_sp_offset); ++ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; ++ saved_fp = (intptr_t*) this->fp()[java_frame_link_offset]; ++ ++ } else { ++ // must be some sort of compiled/runtime frame ++ // fp does not have to be safe (although it could be check for c1?) ++ ++ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc ++ if (_cb->frame_size() <= 0) { ++ return false; ++ } ++ ++ sender_sp = _unextended_sp + _cb->frame_size(); ++ // Is sender_sp safe? ++ if ((address)sender_sp >= thread->stack_base()) { ++ return false; ++ } ++ sender_unextended_sp = sender_sp; ++ // On LA the return_address is always the word on the stack ++ sender_pc = (address) *(sender_sp-1); ++ // Note: frame::java_frame_sender_sp_offset is only valid for compiled frame ++ saved_fp = (intptr_t*) *(sender_sp - frame::java_frame_sender_sp_offset); ++ } ++ ++ ++ // If the potential sender is the interpreter then we can do some more checking ++ if (Interpreter::contains(sender_pc)) { ++ ++ // FP is always saved in a recognizable place in any code we generate. However ++ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP ++ // is really a frame pointer. ++ ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ return sender.is_interpreted_frame_valid(thread); ++ ++ } ++ ++ // We must always be able to find a recognizable pc ++ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); ++ if (sender_pc == NULL || sender_blob == NULL) { ++ return false; ++ } ++ ++ // Could be a zombie method ++ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { ++ return false; ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!sender_blob->code_contains(sender_pc)) { ++ return false; ++ } ++ ++ // We should never be able to see an adapter if the current frame is something from code cache ++ if (sender_blob->is_adapter_blob()) { ++ return false; ++ } ++ ++ // Could be the call_stub ++ if (StubRoutines::returns_to_call_stub(sender_pc)) { ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ // Validate the JavaCallWrapper an entry frame must have ++ address jcw = (address)sender.entry_frame_call_wrapper(); ++ ++ bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp()); ++ ++ return jcw_safe; ++ } ++ ++ CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); ++ if (nm != NULL) { ++ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || ++ nm->method()->is_method_handle_intrinsic()) { ++ return false; ++ } ++ } ++ ++ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size ++ // because the return address counts against the callee's frame. ++ ++ if (sender_blob->frame_size() <= 0) { ++ assert(!sender_blob->is_compiled(), "should count return address at least"); ++ return false; ++ } ++ ++ // We should never be able to see anything here except an nmethod. If something in the ++ // code cache (current frame) is called by an entity within the code cache that entity ++ // should not be anything but the call stub (already covered), the interpreter (already covered) ++ // or an nmethod. ++ ++ if (!sender_blob->is_compiled()) { ++ return false; ++ } ++ ++ // Could put some more validation for the potential non-interpreted sender ++ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... ++ ++ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb ++ ++ // We've validated the potential sender that would be created ++ return true; ++ } ++ ++ // Must be native-compiled frame. Since sender will try and use fp to find ++ // linkages it must be safe ++ ++ if (!fp_safe) { ++ return false; ++ } ++ ++ // Will the pc we fetch be non-zero (which we'll find at the oldest frame) ++ ++ if ( (address) this->fp()[java_frame_return_addr_offset] == NULL) return false; ++ ++ ++ // could try and do some more potential verification of native frame if we could think of some... ++ ++ return true; ++ ++} ++ ++void frame::patch_pc(Thread* thread, address pc) { ++ address* pc_addr = &(((address*) sp())[-1]); ++ if (TracePcPatching) { ++ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", ++ p2i(pc_addr), p2i(*pc_addr), p2i(pc)); ++ } ++ // Either the return address is the original one or we are going to ++ // patch in the same address that's already there. ++ assert(_pc == *pc_addr || pc == *pc_addr, "must be"); ++ *pc_addr = pc; ++ _cb = CodeCache::find_blob(pc); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ assert(original_pc == _pc, "expected original PC to be stored before patching"); ++ _deopt_state = is_deoptimized; ++ // leave _pc as is ++ } else { ++ _deopt_state = not_deoptimized; ++ _pc = pc; ++ } ++} ++ ++bool frame::is_interpreted_frame() const { ++ return Interpreter::contains(pc()); ++} ++ ++int frame::frame_size(RegisterMap* map) const { ++ frame sender = this->sender(map); ++ return sender.sp() - sp(); ++} ++ ++intptr_t* frame::entry_frame_argument_at(int offset) const { ++ // convert offset to index to deal with tsi ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ // Entry frame's arguments are always in relation to unextended_sp() ++ return &unextended_sp()[index]; ++} ++ ++// sender_sp ++#ifdef CC_INTERP ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ // QQQ why does this specialize method exist if frame::sender_sp() does same thing? ++ // seems odd and if we always know interpreted vs. non then sender_sp() is really ++ // doing too much work. ++ return get_interpreterState()->sender_sp(); ++} ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return get_interpreterState()->monitor_base(); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ return (BasicObjectLock*) get_interpreterState()->stack_base(); ++} ++ ++#else // CC_INTERP ++ ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ return (intptr_t*) at(interpreter_frame_sender_sp_offset); ++} ++ ++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); ++} ++ ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); ++ // make sure the pointer points inside the frame ++ assert((intptr_t) fp() > (intptr_t) result, "result must < than frame pointer"); ++ assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer"); ++ return result; ++} ++ ++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { ++ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; ++} ++ ++// Used by template based interpreter deoptimization ++void frame::interpreter_frame_set_last_sp(intptr_t* sp) { ++ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; ++} ++#endif // CC_INTERP ++ ++frame frame::sender_for_entry_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); ++ assert(!entry_frame_is_first(), "next Java fp must be non zero"); ++ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); ++ map->clear(); ++ assert(map->include_argument_oops(), "should be set by clear"); ++ if (jfa->last_Java_pc() != NULL ) { ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); ++ return fr; ++ } ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp()); ++ return fr; ++} ++ ++frame frame::sender_for_interpreter_frame(RegisterMap* map) const { ++ // sp is the raw sp from the sender after adapter or interpreter extension ++ intptr_t* sender_sp = this->sender_sp(); ++ ++ // This is the sp before any possible extension (adapter/locals). ++ intptr_t* unextended_sp = interpreter_frame_sender_sp(); ++ ++ // The interpreter and compiler(s) always save FP in a known ++ // location on entry. We must record where that location is ++ // so this if FP was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves FP if we record where it is then ++ // we don't have to always save FP on entry and exit to c2 compiled ++ // code, on entry will be enough. ++#ifdef COMPILER2_OR_JVMCI ++ if (map->update_map()) { ++ update_map_with_saved_link(map, (intptr_t**) addr_at(java_frame_link_offset)); ++ } ++#endif // COMPILER2_OR_JVMCI ++ return frame(sender_sp, unextended_sp, link(), sender_pc()); ++} ++ ++ ++//------------------------------------------------------------------------------ ++// frame::verify_deopt_original_pc ++// ++// Verifies the calculated original PC of a deoptimization PC for the ++// given unextended SP. The unextended SP might also be the saved SP ++// for MethodHandle call sites. ++#ifdef ASSERT ++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { ++ frame fr; ++ ++ // This is ugly but it's better than to change {get,set}_original_pc ++ // to take an SP value as argument. And it's only a debugging ++ // method anyway. ++ fr._unextended_sp = unextended_sp; ++ ++ address original_pc = nm->get_original_pc(&fr); ++ assert(nm->insts_contains(original_pc), ++ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); ++} ++#endif ++ ++ ++//------------------------------------------------------------------------------ ++// frame::adjust_unextended_sp ++void frame::adjust_unextended_sp() { ++ // On LoongArch, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ if (_cb != NULL) { ++ CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); ++ if (sender_cm != NULL) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (sender_cm->is_deopt_entry(_pc) || ++ sender_cm->is_deopt_mh_entry(_pc)) { ++ DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------------------ ++// frame::update_map_with_saved_link ++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { ++ // The interpreter and compiler(s) always save fp in a known ++ // location on entry. We must record where that location is ++ // so that if fp was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves fp if we record where it is then ++ // we don't have to always save fp on entry and exit to c2 compiled ++ // code, on entry will be enough. ++ map->set_location(FP->as_VMReg(), (address) link_addr); ++ // this is weird "H" ought to be at a higher address however the ++ // oopMaps seems to have the "H" regs at the same address and the ++ // vanilla register. ++ // XXXX make this go away ++ if (true) { ++ map->set_location(FP->as_VMReg()->next(), (address) link_addr); ++ } ++} ++ ++//------------------------------sender_for_compiled_frame----------------------- ++frame frame::sender_for_compiled_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ ++ // frame owned by optimizing compiler ++ assert(_cb->frame_size() >= 0, "must have non-zero frame size"); ++ ++ intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); ++ intptr_t* unextended_sp = sender_sp; ++ ++ // On Loongson the return_address is always the word on the stack ++ // the fp in compiler points to sender fp, but in interpreter, fp points to return address, ++ // so getting sender for compiled frame is not same as interpreter frame. ++ // we hard code here temporarily ++ // spark ++ address sender_pc = (address) *(sender_sp-1); ++ ++ intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::java_frame_sender_sp_offset); ++ ++ if (map->update_map()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); ++ if (_cb->oop_maps() != NULL) { ++ OopMapSet::update_register_map(this, map); ++ } ++ ++ // Since the prolog does the save and restore of epb there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ update_map_with_saved_link(map, saved_fp_addr); ++ } ++ assert(sender_sp != sp(), "must have changed"); ++ return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); ++} ++ ++frame frame::sender(RegisterMap* map) const { ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map->set_include_argument_oops(false); ++ ++ if (is_entry_frame()) return sender_for_entry_frame(map); ++ if (is_interpreted_frame()) return sender_for_interpreter_frame(map); ++ assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); ++ ++ if (_cb != NULL) { ++ return sender_for_compiled_frame(map); ++ } ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return frame(sender_sp(), link(), sender_pc()); ++} ++ ++bool frame::is_interpreted_frame_valid(JavaThread* thread) const { ++// QQQ ++#ifdef CC_INTERP ++#else ++ assert(is_interpreted_frame(), "Not an interpreted frame"); ++ // These are reasonable sanity checks ++ if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (fp() + interpreter_frame_initial_sp_offset < sp()) { ++ return false; ++ } ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ // do some validation of frame elements ++ ++ // first the method ++ ++ Method* m = safe_interpreter_frame_method(); ++ ++ // validate the method we'd find in this potential sender ++ if (!Method::is_valid_method(m)) return false; ++ ++ // stack frames shouldn't be much larger than max_stack elements ++ ++ //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) { ++ if (fp() - sp() > 4096) { // stack frames shouldn't be large. ++ return false; ++ } ++ ++ // validate bci/bcp ++ ++ address bcp = interpreter_frame_bcp(); ++ if (m->validate_bci_from_bcp(bcp) < 0) { ++ return false; ++ } ++ ++ // validate ConstantPoolCache* ++ ++ ConstantPoolCache* cp = *interpreter_frame_cache_addr(); ++ ++ if (MetaspaceObj::is_valid(cp) == false) return false; ++ ++ // validate locals ++ ++ address locals = (address) *interpreter_frame_locals_addr(); ++ ++ if (locals > thread->stack_base() || locals < (address) fp()) return false; ++ ++ // We'd have to be pretty unlucky to be mislead at this point ++ ++#endif // CC_INTERP ++ return true; ++} ++ ++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { ++#ifdef CC_INTERP ++ // Needed for JVMTI. The result should always be in the interpreterState object ++ assert(false, "NYI"); ++ interpreterState istate = get_interpreterState(); ++#endif // CC_INTERP ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ Method* method = interpreter_frame_method(); ++ BasicType type = method->result_type(); ++ ++ intptr_t* tos_addr; ++ if (method->is_native()) { ++ // Prior to calling into the runtime to report the method_exit the possible ++ // return value is pushed to the native stack. If the result is a jfloat/jdouble ++ // then ST0 is saved. See the note in generate_native_result ++ tos_addr = (intptr_t*)sp(); ++ if (type == T_FLOAT || type == T_DOUBLE) { ++ tos_addr += 2; ++ } ++ } else { ++ tos_addr = (intptr_t*)interpreter_frame_tos_address(); ++ } ++ ++ switch (type) { ++ case T_OBJECT : ++ case T_ARRAY : { ++ oop obj; ++ if (method->is_native()) { ++#ifdef CC_INTERP ++ obj = istate->_oop_temp; ++#else ++ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); ++#endif // CC_INTERP ++ } else { ++ oop* obj_p = (oop*)tos_addr; ++ obj = (obj_p == NULL) ? (oop)NULL : *obj_p; ++ } ++ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); ++ *oop_result = obj; ++ break; ++ } ++ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; ++ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; ++ case T_CHAR : value_result->c = *(jchar*)tos_addr; break; ++ case T_SHORT : value_result->s = *(jshort*)tos_addr; break; ++ case T_INT : value_result->i = *(jint*)tos_addr; break; ++ case T_LONG : value_result->j = *(jlong*)tos_addr; break; ++ case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; ++ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; ++ case T_VOID : /* Nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ return type; ++} ++ ++ ++intptr_t* frame::interpreter_frame_tos_at(jint offset) const { ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ return &interpreter_frame_tos_address()[index]; ++} ++ ++#ifndef PRODUCT ++ ++#define DESCRIBE_FP_OFFSET(name) \ ++ values.describe(frame_no, fp() + frame::name##_offset, #name) ++ ++void frame::describe_pd(FrameValues& values, int frame_no) { ++ if (is_interpreted_frame()) { ++ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_method); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mirror); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mdp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_cache); ++ DESCRIBE_FP_OFFSET(interpreter_frame_locals); ++ DESCRIBE_FP_OFFSET(interpreter_frame_bcp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); ++ } ++} ++#endif ++ ++intptr_t *frame::initial_deoptimization_info() { ++ // used to reset the saved FP ++ return fp(); ++} ++ ++intptr_t* frame::real_fp() const { ++ if (_cb != NULL) { ++ // use the frame size if valid ++ int size = _cb->frame_size(); ++ if (size > 0) { ++ return unextended_sp() + size; ++ } ++ } ++ // else rely on fp() ++ assert(! is_compiled_frame(), "unknown compiled frame size"); ++ return fp(); ++} ++ ++#ifndef PRODUCT ++// This is a generic constructor which is only used by pns() in debug.cpp. ++frame::frame(void* sp, void* fp, void* pc) { ++ init((intptr_t*)sp, (intptr_t*)fp, (address)pc); ++} ++ ++void frame::pd_ps() {} ++#endif +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/frame_loongarch.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/frame_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/frame_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,171 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_HPP ++#define CPU_LOONGARCH_FRAME_LOONGARCH_HPP ++ ++#include "runtime/synchronizer.hpp" ++ ++// A frame represents a physical stack frame (an activation). Frames can be ++// C or Java frames, and the Java frames can be interpreted or compiled. ++// In contrast, vframes represent source-level activations, so that one physical frame ++// can correspond to multiple source level frames because of inlining. ++// A frame is comprised of {pc, fp, sp} ++// ------------------------------ Asm interpreter ---------------------------------------- ++// Layout of asm interpreter frame: ++// Low ++// [expression stack ] * <- sp ++// [monitors ] \ ++// ... | monitor block size ++// [monitors ] / ++// [monitor block size ] ++// [byte code index/pointr] = bcx() bcx_offset ++// [pointer to locals ] = locals() locals_offset ++// [constant pool cache ] = cache() cache_offset ++// [methodData ] = mdp() mdx_offset ++// [methodOop ] = method() method_offset ++// [last sp ] = last_sp() last_sp_offset ++// [old stack pointer ] (sender_sp) sender_sp_offset ++// [old frame pointer ] <- fp = link() ++// [return pc ] ++// [oop temp ] (only for native calls) ++// [locals and parameters ] ++// High <- sender sp ++// ------------------------------ Asm interpreter ---------------------------------------- ++// ++// ------------------------------ Native (C frame) --------------------------------------- ++// Layout of C frame: ++// High ++// | ++// - <----- fp <- sender sp ++// fp -8 | [ra] = sender_pc() ++// fp-16 | [fp (sender)] = link() ++// | [...] ++// | ++// - <----- sp ++// | ++// v ++// Low ++// ------------------------------ Native (C frame) --------------------------------------- ++ ++ public: ++ enum { ++ pc_return_offset = 0, ++ ++ // Java frames ++ java_frame_link_offset = 0, ++ java_frame_return_addr_offset = 1, ++ java_frame_sender_sp_offset = 2, ++ ++ // Native frames ++ native_frame_link_offset = -2, ++ native_frame_return_addr_offset = -1, ++ native_frame_sender_sp_offset = 0, ++ ++ // Interpreter frames ++ interpreter_frame_result_handler_offset = 3, // for native calls only ++ interpreter_frame_oop_temp_offset = 2, // for native calls only ++ ++ interpreter_frame_sender_fp_offset = 0, ++ interpreter_frame_sender_sp_offset = -1, ++ // outgoing sp before a call to an invoked method ++ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, ++ interpreter_frame_locals_offset = interpreter_frame_last_sp_offset - 1, ++ interpreter_frame_method_offset = interpreter_frame_locals_offset - 1, ++ interpreter_frame_mirror_offset = interpreter_frame_method_offset - 1, ++ interpreter_frame_mdp_offset = interpreter_frame_mirror_offset - 1, ++ interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1, ++ interpreter_frame_bcp_offset = interpreter_frame_cache_offset - 1, ++ interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, ++ ++ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, ++ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, ++ ++ // Entry frames ++ entry_frame_call_wrapper_offset = -9, ++ ++ // Native frames ++ ++ native_frame_initial_param_offset = 2 ++ ++ }; ++ ++ intptr_t ptr_at(int offset) const { ++ return *ptr_at_addr(offset); ++ } ++ ++ void ptr_at_put(int offset, intptr_t value) { ++ *ptr_at_addr(offset) = value; ++ } ++ ++ private: ++ // an additional field beyond _sp and _pc: ++ intptr_t* _fp; // frame pointer ++ // The interpreter and adapters will extend the frame of the caller. ++ // Since oopMaps are based on the sp of the caller before extension ++ // we need to know that value. However in order to compute the address ++ // of the return address we need the real "raw" sp. Since sparc already ++ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's ++ // original sp we use that convention. ++ ++ intptr_t* _unextended_sp; ++ void adjust_unextended_sp(); ++ ++ intptr_t* ptr_at_addr(int offset) const { ++ return (intptr_t*) addr_at(offset); ++ } ++#ifdef ASSERT ++ // Used in frame::sender_for_{interpreter,compiled}_frame ++ static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp); ++#endif ++ ++ public: ++ // Constructors ++ ++ frame(intptr_t* sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* fp); ++ ++ void init(intptr_t* sp, intptr_t* fp, address pc); ++ ++ // accessors for the instance variables ++ intptr_t* fp() const { return _fp; } ++ ++ inline address* sender_pc_addr() const; ++ ++ // expression stack tos if we are nested in a java call ++ intptr_t* interpreter_frame_last_sp() const; ++ ++ // helper to update a map with callee-saved FP ++ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); ++ ++ // deoptimization support ++ void interpreter_frame_set_last_sp(intptr_t* sp); ++ ++ static jint interpreter_frame_expression_stack_direction() { return -1; } ++ ++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp +--- a/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,252 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP ++ ++#include "code/codeCache.hpp" ++#include "code/vmreg.inline.hpp" ++ ++// Inline functions for Loongson frames: ++ ++// Constructors: ++ ++inline frame::frame() { ++ _pc = NULL; ++ _sp = NULL; ++ _unextended_sp = NULL; ++ _fp = NULL; ++ _cb = NULL; ++ _deopt_state = unknown; ++} ++ ++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { ++ init(sp, fp, pc); ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = unextended_sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = (address)(sp[-1]); ++ ++ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace ++ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly ++ // unlucky the junk value could be to a zombied method and we'll die on the ++ // find_blob call. This is also why we can have no asserts on the validity ++ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler ++ // -> pd_last_frame should use a specialized version of pd_last_frame which could ++ // call a specilaized frame constructor instead of this one. ++ // Then we could use the assert below. However this assert is of somewhat dubious ++ // value. ++ // assert(_pc != NULL, "no pc?"); ++ ++ _cb = CodeCache::find_blob(_pc); ++ adjust_unextended_sp(); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++// Accessors ++ ++inline bool frame::equal(frame other) const { ++ bool ret = sp() == other.sp() ++ && unextended_sp() == other.unextended_sp() ++ && fp() == other.fp() ++ && pc() == other.pc(); ++ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); ++ return ret; ++} ++ ++// Return unique id for this frame. The id must have a value where we can distinguish ++// identity and younger/older relationship. NULL represents an invalid (incomparable) ++// frame. ++inline intptr_t* frame::id(void) const { return unextended_sp(); } ++ ++// Relationals on frames based ++// Return true if the frame is younger (more recent activation) than the frame represented by id ++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() < id ; } ++ ++// Return true if the frame is older (less recent activation) than the frame represented by id ++inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() > id ; } ++ ++ ++ ++inline intptr_t* frame::link() const { ++ if (is_java_frame()) ++ return (intptr_t*) *(intptr_t **)addr_at(java_frame_link_offset); ++ return (intptr_t*) *(intptr_t **)addr_at(native_frame_link_offset); ++} ++ ++inline intptr_t* frame::link_or_null() const { ++ intptr_t** ptr = is_java_frame() ? (intptr_t **)addr_at(java_frame_link_offset) ++ : (intptr_t **)addr_at(native_frame_link_offset); ++ return os::is_readable_pointer(ptr) ? *ptr : NULL; ++} ++ ++inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } ++ ++// Return address: ++ ++inline address* frame::sender_pc_addr() const { ++ if (is_java_frame()) ++ return (address*) addr_at(java_frame_return_addr_offset); ++ return (address*) addr_at(native_frame_return_addr_offset); ++} ++ ++inline address frame::sender_pc() const { return *sender_pc_addr(); } ++ ++inline intptr_t* frame::sender_sp() const { ++ if (is_java_frame()) ++ return addr_at(java_frame_sender_sp_offset); ++ return addr_at(native_frame_sender_sp_offset); ++} ++ ++inline intptr_t** frame::interpreter_frame_locals_addr() const { ++ return (intptr_t**)addr_at(interpreter_frame_locals_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_last_sp() const { ++ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_bcp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_bcp_offset); ++} ++ ++ ++inline intptr_t* frame::interpreter_frame_mdp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_mdp_offset); ++} ++ ++ ++ ++// Constant pool cache ++ ++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { ++ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); ++} ++ ++// Method ++ ++inline Method** frame::interpreter_frame_method_addr() const { ++ return (Method**)addr_at(interpreter_frame_method_offset); ++} ++ ++// Mirror ++ ++inline oop* frame::interpreter_frame_mirror_addr() const { ++ return (oop*)addr_at(interpreter_frame_mirror_offset); ++} ++ ++// top of expression stack ++inline intptr_t* frame::interpreter_frame_tos_address() const { ++ intptr_t* last_sp = interpreter_frame_last_sp(); ++ if (last_sp == NULL ) { ++ return sp(); ++ } else { ++ // sp() may have been extended by an adapter ++ assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos"); ++ return last_sp; ++ } ++} ++ ++inline oop* frame::interpreter_frame_temp_oop_addr() const { ++ return (oop *)(fp() + interpreter_frame_oop_temp_offset); ++} ++ ++inline int frame::interpreter_frame_monitor_size() { ++ return BasicObjectLock::size(); ++} ++ ++ ++// expression stack ++// (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++inline intptr_t* frame::interpreter_frame_expression_stack() const { ++ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); ++ return monitor_end-1; ++} ++ ++// Entry frames ++ ++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { ++ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); ++} ++ ++// Compiled frames ++ ++inline oop frame::saved_oop_result(RegisterMap* map) const { ++ return *((oop*) map->location(V0->as_VMReg())); ++} ++ ++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { ++ *((oop*) map->location(V0->as_VMReg())) = obj; ++} ++ ++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,523 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/g1/g1BarrierSet.hpp" ++#include "gc/g1/g1BarrierSetAssembler.hpp" ++#include "gc/g1/g1BarrierSetRuntime.hpp" ++#include "gc/g1/g1CardTable.hpp" ++#include "gc/g1/g1ThreadLocalData.hpp" ++#include "gc/g1/heapRegion.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "utilities/macros.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/g1/c1/g1BarrierSetC1.hpp" ++#endif ++ ++#define __ masm-> ++ ++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, RegSet saved_regs) { ++ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; ++ ++ if (!dest_uninitialized) { ++#ifndef OPT_THREAD ++ Register thread = T9; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ Label filtered; ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ ld_w(AT, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ ld_b(AT, in_progress); ++ } ++ ++ __ beqz(AT, filtered); ++ ++ __ push(saved_regs); ++ if (count == A0) { ++ if (addr == A1) { ++ __ move(AT, A0); ++ __ move(A0, A1); ++ __ move(A1, AT); ++ } else { ++ __ move(A1, count); ++ __ move(A0, addr); ++ } ++ } else { ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ if (UseCompressedOops) { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); ++ } ++ __ pop(saved_regs); ++ ++ __ bind(filtered); ++ } ++} ++ ++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp, RegSet saved_regs) { ++ __ push(saved_regs); ++ if (count == A0) { ++ assert_different_registers(A1, addr); ++ __ move(A1, count); ++ __ move(A0, addr); ++ } else { ++ assert_different_registers(A0, count); ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); ++ __ pop(saved_regs); ++} ++ ++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ bool on_oop = type == T_OBJECT || type == T_ARRAY; ++ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; ++ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; ++ bool on_reference = on_weak || on_phantom; ++ ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ if (on_oop && on_reference) { ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // RA is live. It must be saved around calls. ++ __ enter(); // barrier may call runtime ++ // Generate the G1 pre-barrier code to log the value of ++ // the referent field in an SATB buffer. ++ g1_write_barrier_pre(masm /* masm */, ++ noreg /* obj */, ++ dst /* pre_val */, ++ thread /* thread */, ++ tmp1 /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ __ leave(); ++ } ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ ++ assert(thread == TREG, "must be"); ++ ++ Label done; ++ Label runtime; ++ ++ assert(pre_val != noreg, "check this code"); ++ ++ if (obj != noreg) { ++ assert_different_registers(obj, pre_val, tmp); ++ assert(pre_val != V0, "check this code"); ++ } ++ ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ ld_w(AT, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ ld_b(AT, in_progress); ++ } ++ __ beqz(AT, done); ++ ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ __ load_heap_oop(pre_val, Address(obj, 0)); ++ } ++ ++ // Is the previous value null? ++ __ beqz(pre_val, done); ++ ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) ++ ++ __ ld_d(tmp, index); ++ __ beqz(tmp, runtime); ++ ++ __ addi_d(tmp, tmp, -1 * wordSize); ++ __ st_d(tmp, index); ++ __ ld_d(AT, buffer); ++ ++ // Record the previous value ++ __ stx_d(pre_val, tmp, AT); ++ __ b(done); ++ ++ __ bind(runtime); ++ // save the live input values ++ if (tosca_live) __ push(V0); ++ ++ if (obj != noreg && obj != V0) __ push(obj); ++ ++ if (pre_val != V0) __ push(pre_val); ++ ++ // Calling the runtime using the regular call_VM_leaf mechanism generates ++ // code (generated by InterpreterMacroAssember::call_VM_leaf_base) ++ // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. ++ // ++ // If we care generating the pre-barrier without a frame (e.g. in the ++ // intrinsified Reference.get() routine) then ebp might be pointing to ++ // the caller frame and so this check will most likely fail at runtime. ++ // ++ // Expanding the call directly bypasses the generation of the check. ++ // So when we do not have have a full interpreter frame on the stack ++ // expand_call should be passed true. ++ ++ if (expand_call) { ++ assert(pre_val != A1, "smashed arg"); ++ if (thread != A1) __ move(A1, thread); ++ if (pre_val != A0) __ move(A0, pre_val); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } ++ ++ // save the live input values ++ if (pre_val != V0) ++ __ pop(pre_val); ++ ++ if (obj != noreg && obj != V0) ++ __ pop(obj); ++ ++ if (tosca_live) __ pop(V0); ++ ++ __ bind(done); ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2) { ++ assert_different_registers(tmp, tmp2, AT); ++ assert(thread == TREG, "must be"); ++ ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++ ++ CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); ++ assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ Label done; ++ Label runtime; ++ ++ // Does store cross heap regions? ++ __ xorr(AT, store_addr, new_val); ++ __ srli_d(AT, AT, HeapRegion::LogOfHRGrainBytes); ++ __ beqz(AT, done); ++ ++ // crosses regions, storing NULL? ++ __ beqz(new_val, done); ++ ++ // storing region crossing non-NULL, is card already dirty? ++ const Register card_addr = tmp; ++ const Register cardtable = tmp2; ++ ++ __ move(card_addr, store_addr); ++ __ srli_d(card_addr, card_addr, CardTable::card_shift); ++ // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT ++ // a valid address and therefore is not properly handled by the relocation code. ++ __ li(cardtable, (intptr_t)ct->card_table()->byte_map_base()); ++ __ add_d(card_addr, card_addr, cardtable); ++ ++ __ ld_bu(AT, card_addr, 0); ++ __ addi_d(AT, AT, -1 * (int)G1CardTable::g1_young_card_val()); ++ __ beqz(AT, done); ++ ++ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); ++ ++ __ membar(__ StoreLoad); ++ __ ld_bu(AT, card_addr, 0); ++ __ beqz(AT, done); ++ ++ // storing a region crossing, non-NULL oop, card is clean. ++ // dirty card and log. ++ __ st_b(R0, card_addr, 0); ++ ++ __ ld_d(AT, queue_index); ++ __ beqz(AT, runtime); ++ __ addi_d(AT, AT, -1 * wordSize); ++ __ st_d(AT, queue_index); ++ __ ld_d(tmp2, buffer); ++ __ ld_d(AT, queue_index); ++ __ stx_d(card_addr, tmp2, AT); ++ __ b(done); ++ ++ __ bind(runtime); ++ // save the live input values ++ __ push(store_addr); ++ __ push(new_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG); ++ __ pop(new_val); ++ __ pop(store_addr); ++ ++ __ bind(done); ++} ++ ++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool as_normal = (decorators & AS_NORMAL) != 0; ++ assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported"); ++ ++ bool needs_pre_barrier = as_normal; ++ bool needs_post_barrier = val != noreg && in_heap; ++ ++ Register tmp3 = RT3; ++ Register rthread = TREG; ++ // flatten object address if needed ++ // We do it regardless of precise because we need the registers ++ if (dst.index() == noreg && dst.disp() == 0) { ++ if (dst.base() != tmp3) { ++ __ move(tmp3, dst.base()); ++ } ++ } else { ++ __ lea(tmp3, dst); ++ } ++ ++ if (needs_pre_barrier) { ++ g1_write_barrier_pre(masm /*masm*/, ++ tmp3 /* obj */, ++ tmp2 /* pre_val */, ++ rthread /* thread */, ++ tmp1 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); ++ } ++ if (val == noreg) { ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); ++ } else { ++ Register new_val = val; ++ if (needs_post_barrier) { ++ // G1 barrier needs uncompressed oop for region cross check. ++ if (UseCompressedOops) { ++ new_val = tmp2; ++ __ move(new_val, val); ++ } ++ } ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); ++ if (needs_post_barrier) { ++ g1_write_barrier_post(masm /*masm*/, ++ tmp3 /* store_adr */, ++ new_val /* new_val */, ++ rthread /* thread */, ++ tmp1 /* tmp */, ++ tmp2 /* tmp2 */); ++ } ++ } ++} ++ ++#ifdef COMPILER1 ++ ++#undef __ ++#define __ ce->masm()-> ++ ++void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { ++ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ // At this point we know that marking is in progress. ++ // If do_load() is true then we have to emit the ++ // load of the previous value; otherwise it has already ++ // been loaded into _pre_val. ++ ++ __ bind(*stub->entry()); ++ ++ assert(stub->pre_val()->is_register(), "Precondition."); ++ ++ Register pre_val_reg = stub->pre_val()->as_register(); ++ ++ if (stub->do_load()) { ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); ++ } ++ __ beqz(pre_val_reg, *stub->continuation()); ++ ce->store_parameter(stub->pre_val()->as_register(), 0); ++ __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type); ++ __ b(*stub->continuation()); ++} ++ ++void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { ++ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ __ bind(*stub->entry()); ++ assert(stub->addr()->is_register(), "Precondition."); ++ assert(stub->new_val()->is_register(), "Precondition."); ++ Register new_val_reg = stub->new_val()->as_register(); ++ __ beqz(new_val_reg, *stub->continuation()); ++ ce->store_parameter(stub->addr()->as_pointer_register(), 0); ++ __ call(bs->post_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type); ++ __ b(*stub->continuation()); ++} ++ ++#undef __ ++ ++#define __ sasm-> ++ ++void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("g1_pre_barrier", false); ++ ++ // arg0 : previous value of memory ++ ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ ++ const Register pre_val = A0; ++ const Register thread = TREG; ++ const Register tmp = SCR2; ++ ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ Label done; ++ Label runtime; ++ ++ // Is marking still active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ ld_w(tmp, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ ld_b(tmp, in_progress); ++ } ++ __ beqz(tmp, done); ++ ++ // Can we store original value in the thread's buffer? ++ __ ld_ptr(tmp, queue_index); ++ __ beqz(tmp, runtime); ++ ++ __ addi_d(tmp, tmp, -wordSize); ++ __ st_ptr(tmp, queue_index); ++ __ ld_ptr(SCR1, buffer); ++ __ add_d(tmp, tmp, SCR1); ++ __ load_parameter(0, SCR1); ++ __ st_ptr(SCR1, Address(tmp, 0)); ++ __ b(done); ++ ++ __ bind(runtime); ++ __ pushad(); ++ __ load_parameter(0, pre_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ __ popad(); ++ __ bind(done); ++ ++ __ epilogue(); ++} ++ ++void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("g1_post_barrier", false); ++ ++ // arg0: store_address ++ Address store_addr(FP, 2 * BytesPerWord); ++ ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ ++ Label done; ++ Label runtime; ++ ++ // At this point we know new_value is non-NULL and the new_value crosses regions. ++ // Must check to see if card is already dirty ++ ++ const Register thread = TREG; ++ ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++ ++ const Register card_offset = SCR2; ++ // RA is free here, so we can use it to hold the byte_map_base. ++ const Register byte_map_base = RA; ++ ++ assert_different_registers(card_offset, byte_map_base, SCR1); ++ ++ __ load_parameter(0, card_offset); ++ __ srli_d(card_offset, card_offset, CardTable::card_shift); ++ __ load_byte_map_base(byte_map_base); ++ __ ldx_bu(SCR1, byte_map_base, card_offset); ++ __ addi_d(SCR1, SCR1, -(int)G1CardTable::g1_young_card_val()); ++ __ beqz(SCR1, done); ++ ++ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); ++ ++ __ membar(__ StoreLoad); ++ __ ldx_bu(SCR1, byte_map_base, card_offset); ++ __ beqz(SCR1, done); ++ ++ // storing region crossing non-NULL, card is clean. ++ // dirty card and log. ++ __ stx_b(R0, byte_map_base, card_offset); ++ ++ // Convert card offset into an address in card_addr ++ Register card_addr = card_offset; ++ __ add_d(card_addr, byte_map_base, card_addr); ++ ++ __ ld_ptr(SCR1, queue_index); ++ __ beqz(SCR1, runtime); ++ __ addi_d(SCR1, SCR1, -wordSize); ++ __ st_ptr(SCR1, queue_index); ++ ++ // Reuse RA to hold buffer_addr ++ const Register buffer_addr = RA; ++ ++ __ ld_ptr(buffer_addr, buffer); ++ __ stx_d(card_addr, buffer_addr, SCR1); ++ __ b(done); ++ ++ __ bind(runtime); ++ __ pushad(); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); ++ __ popad(); ++ __ bind(done); ++ __ epilogue(); ++} ++ ++#undef __ ++ ++#endif // COMPILER1 +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,71 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++class LIR_Assembler; ++class StubAssembler; ++class G1PreBarrierStub; ++class G1PostBarrierStub; ++ ++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { ++ protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, RegSet saved_regs); ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp, RegSet saved_regs); ++ ++ void g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ ++ void g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2); ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ public: ++ void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); ++ void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); ++ ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++}; ++ ++#endif // CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,255 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/thread.hpp" ++ ++#define __ masm-> ++ ++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ // RA is live. It must be saved around calls. ++ ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ ++ switch (type) { ++ case T_OBJECT: ++ case T_ARRAY: { ++ if (in_heap) { ++ if (UseCompressedOops) { ++ __ ld_wu(dst, src); ++ if (is_not_null) { ++ __ decode_heap_oop_not_null(dst); ++ } else { ++ __ decode_heap_oop(dst); ++ } ++ } else ++ { ++ __ ld_ptr(dst, src); ++ } ++ } else { ++ assert(in_native, "why else?"); ++ __ ld_ptr(dst, src); ++ } ++ break; ++ } ++ case T_BOOLEAN: __ ld_bu (dst, src); break; ++ case T_BYTE: __ ld_b (dst, src); break; ++ case T_CHAR: __ ld_hu (dst, src); break; ++ case T_SHORT: __ ld_h (dst, src); break; ++ case T_INT: __ ld_w (dst, src); break; ++ case T_LONG: __ ld_d (dst, src); break; ++ case T_ADDRESS: __ ld_ptr(dst, src); break; ++ case T_FLOAT: ++ assert(dst == noreg, "only to ftos"); ++ __ fld_s(FSF, src); ++ break; ++ case T_DOUBLE: ++ assert(dst == noreg, "only to dtos"); ++ __ fld_d(FSF, src); ++ break; ++ default: Unimplemented(); ++ } ++} ++ ++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ ++ switch (type) { ++ case T_OBJECT: ++ case T_ARRAY: { ++ if (in_heap) { ++ if (val == noreg) { ++ assert(!is_not_null, "inconsistent access"); ++ if (UseCompressedOops) { ++ __ st_w(R0, dst); ++ } else { ++ __ st_d(R0, dst); ++ } ++ } else { ++ if (UseCompressedOops) { ++ assert(!dst.uses(val), "not enough registers"); ++ if (is_not_null) { ++ __ encode_heap_oop_not_null(val); ++ } else { ++ __ encode_heap_oop(val); ++ } ++ __ st_w(val, dst); ++ } else ++ { ++ __ st_ptr(val, dst); ++ } ++ } ++ } else { ++ assert(in_native, "why else?"); ++ assert(val != noreg, "not supported"); ++ __ st_ptr(val, dst); ++ } ++ break; ++ } ++ case T_BOOLEAN: ++ __ andi(val, val, 0x1); // boolean is true if LSB is 1 ++ __ st_b(val, dst); ++ break; ++ case T_BYTE: ++ __ st_b(val, dst); ++ break; ++ case T_SHORT: ++ __ st_h(val, dst); ++ break; ++ case T_CHAR: ++ __ st_h(val, dst); ++ break; ++ case T_INT: ++ __ st_w(val, dst); ++ break; ++ case T_LONG: ++ __ st_d(val, dst); ++ break; ++ case T_FLOAT: ++ assert(val == noreg, "only tos"); ++ __ fst_s(FSF, dst); ++ break; ++ case T_DOUBLE: ++ assert(val == noreg, "only tos"); ++ __ fst_d(FSF, dst); ++ break; ++ case T_ADDRESS: ++ __ st_ptr(val, dst); ++ break; ++ default: Unimplemented(); ++ } ++} ++ ++void BarrierSetAssembler::obj_equals(MacroAssembler* masm, ++ Register obj1, Address obj2) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::obj_equals(MacroAssembler* masm, ++ Register obj1, Register obj2) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath) { ++ __ clear_jweak_tag(obj); ++ __ ld_ptr(obj, Address(obj, 0)); ++} ++ ++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. ++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Register t2, ++ Label& slow_case) { ++ assert_different_registers(obj, t2); ++ assert_different_registers(obj, var_size_in_bytes); ++ Register end = t2; ++ ++ // verify_tlab(); ++ ++ __ ld_ptr(obj, Address(TREG, JavaThread::tlab_top_offset())); ++ if (var_size_in_bytes == noreg) { ++ __ lea(end, Address(obj, con_size_in_bytes)); ++ } else { ++ __ lea(end, Address(obj, var_size_in_bytes, Address::times_1, 0)); ++ } ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::tlab_end_offset())); ++ __ blt_far(SCR1, end, slow_case, false); ++ ++ // update the tlab top pointer ++ __ st_ptr(end, Address(TREG, JavaThread::tlab_top_offset())); ++ ++ // recover var_size_in_bytes if necessary ++ if (var_size_in_bytes == end) { ++ __ sub_d(var_size_in_bytes, var_size_in_bytes, obj); ++ } ++ // verify_tlab(); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Label& slow_case) { ++ assert_different_registers(obj, var_size_in_bytes, t1); ++ if (!Universe::heap()->supports_inline_contig_alloc()) { ++ __ b_far(slow_case); ++ } else { ++ Register end = t1; ++ Register heap_end = SCR2; ++ Label retry; ++ __ bind(retry); ++ ++ __ li(SCR1, (address)Universe::heap()->end_addr()); ++ __ ld_d(heap_end, SCR1, 0); ++ ++ // Get the current top of the heap ++ __ li(SCR1, (address) Universe::heap()->top_addr()); ++ __ ll_d(obj, SCR1, 0); ++ ++ // Adjust it my the size of our new object ++ if (var_size_in_bytes == noreg) ++ __ addi_d(end, obj, con_size_in_bytes); ++ else ++ __ add_d(end, obj, var_size_in_bytes); ++ ++ // if end < obj then we wrapped around high memory ++ __ blt_far(end, obj, slow_case, false); ++ __ blt_far(heap_end, end, slow_case, false); ++ ++ // If heap top hasn't been changed by some other thread, update it. ++ __ sc_d(end, SCR1, 0); ++ __ beqz(end, retry); ++ ++ incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, t1); ++ } ++} ++ ++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ assert(t1->is_valid(), "need temp reg"); ++ ++ __ ld_ptr(t1, Address(TREG, in_bytes(JavaThread::allocated_bytes_offset()))); ++ if (var_size_in_bytes->is_valid()) ++ __ add_d(t1, t1, var_size_in_bytes); ++ else ++ __ addi_d(t1, t1, con_size_in_bytes); ++ __ st_ptr(t1, Address(TREG, in_bytes(JavaThread::allocated_bytes_offset()))); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,88 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "memory/allocation.hpp" ++#include "oops/access.hpp" ++ ++class InterpreterMacroAssembler; ++ ++class BarrierSetAssembler: public CHeapObj { ++private: ++ void incr_allocated_bytes(MacroAssembler* masm, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1); ++ ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, RegSet saved_regs) {} ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch, RegSet saved_regs) {} ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ ++ virtual void obj_equals(MacroAssembler* masm, ++ Register obj1, Register obj2); ++ virtual void obj_equals(MacroAssembler* masm, ++ Register obj1, Address obj2); ++ ++ virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) { ++ // Default implementation does not need to do anything. ++ } ++ ++ // Support for jniFastGetField to try resolving a jobject/jweak in native ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); ++ ++ virtual void tlab_allocate(MacroAssembler* masm, ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ ++ void eden_allocate(MacroAssembler* masm, ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ ++ virtual void barrier_stubs_init() {} ++}; ++ ++#endif // CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,140 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/cardTableBarrierSetAssembler.hpp" ++ ++#define __ masm-> ++ ++#define T4 RT4 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++ ++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp, ++ RegSet saved_regs) { ++ BarrierSet *bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ intptr_t disp = (intptr_t) ct->byte_map_base(); ++ ++ Label L_loop, L_done; ++ const Register end = count; ++ assert_different_registers(addr, end); ++ ++ __ beq(count, R0, L_done); // zero count - nothing to do ++ ++ if (ct->scanned_concurrently()) __ membar(__ StoreStore); ++ ++ __ li(tmp, disp); ++ ++ __ lea(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size ++ __ addi_d(end, end, -BytesPerHeapOop); // end - 1 to make inclusive ++ __ shr(addr, CardTable::card_shift); ++ __ shr(end, CardTable::card_shift); ++ __ sub_d(end, end, addr); // end --> cards count ++ ++ __ add_d(addr, addr, tmp); ++ ++ __ BIND(L_loop); ++ __ stx_b(R0, addr, count); ++ __ addi_d(count, count, -1); ++ __ bge(count, R0, L_loop); ++ ++ __ BIND(L_done); ++} ++ ++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) { ++ // Does a store check for the oop in register obj. The content of ++ // register obj is destroyed afterwards. ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ __ shr(obj, CardTable::card_shift); ++ ++ Address card_addr; ++ ++ intptr_t byte_map_base = (intptr_t)ct->byte_map_base(); ++ Register tmp = T4; ++ assert_different_registers(tmp, obj); ++ __ li(tmp, byte_map_base); ++ __ add_d(tmp, tmp, obj); ++ ++ assert(CardTable::dirty_card_val() == 0, "must be"); ++ ++ jbyte dirty = CardTable::dirty_card_val(); ++ if (UseCondCardMark) { ++ Label L_already_dirty; ++ __ membar(__ StoreLoad); ++ __ ld_b(AT, tmp, 0); ++ __ addi_d(AT, AT, -1 * dirty); ++ __ beq(AT, R0, L_already_dirty); ++ __ st_b(R0, tmp, 0); ++ __ bind(L_already_dirty); ++ } else { ++ if (ct->scanned_concurrently()) { ++ __ membar(Assembler::StoreStore); ++ } ++ __ st_b(R0, tmp, 0); ++ } ++} ++ ++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ ++ bool is_array = (decorators & IS_ARRAY) != 0; ++ bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; ++ bool precise = is_array || on_anonymous; ++ ++ bool needs_post_barrier = val != noreg && in_heap; ++ ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); ++ if (needs_post_barrier) { ++ // flatten object address if needed ++ if (!precise || (dst.index() == noreg && dst.disp() == 0)) { ++ store_check(masm, dst.base(), dst); ++ } else { ++ __ lea(tmp1, dst); ++ store_check(masm, tmp1, dst); ++ } ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { ++protected: ++ void store_check(MacroAssembler* masm, Register obj, Address dst); ++ ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp, ++ RegSet saved_regs); ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; ++ ++#endif // CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++#define __ masm-> ++ ++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, RegSet saved_regs) { ++ if (is_oop) { ++ gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs); ++ } ++} ++ ++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch, RegSet saved_regs) { ++ if (is_oop) { ++ gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch, saved_regs); ++ } ++} ++ ++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ if (type == T_OBJECT || type == T_ARRAY) { ++ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } else { ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++ ++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other ++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected ++// accesses, which are overridden in the concrete BarrierSetAssembler. ++ ++class ModRefBarrierSetAssembler: public BarrierSetAssembler { ++protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, RegSet saved_regs) {} ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp, RegSet saved_regs) {} ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) = 0; ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, RegSet saved_regs); ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch, RegSet saved_regs); ++ ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; ++ ++#endif // CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP ++#define CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP ++// Size of LoongArch Instructions ++const int BytesPerInstWord = 4; ++ ++const int StackAlignmentInBytes = (2*wordSize); ++ ++// Indicates whether the C calling conventions require that ++// 32-bit integer argument values are properly extended to 64 bits. ++// If set, SharedRuntime::c_calling_convention() must adapt ++// signatures accordingly. ++const bool CCallingConventionRequiresIntsAsLongs = false; ++ ++#define SUPPORTS_NATIVE_CX8 ++ ++// FIXME: LA ++// This makes the games we play when patching difficult, so when we ++// come across an access that needs patching we deoptimize. There are ++// ways we can avoid this, but these would slow down C1-compiled code ++// in the default case. We could revisit this decision if we get any ++// evidence that it's worth doing. ++#define DEOPTIMIZE_WHEN_PATCHING ++ ++#define SUPPORT_RESERVED_STACK_AREA ++ ++#define THREAD_LOCAL_POLL ++ ++#endif // CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/globals_loongarch.hpp b/src/hotspot/cpu/loongarch/globals_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/globals_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/globals_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,109 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, ShareVtableStubs, true); ++define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this ++ ++define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks ++define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86. ++define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast ++ ++define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. ++define_pd_global(intx, CodeEntryAlignment, 16); ++define_pd_global(intx, OptoLoopAlignment, 16); ++define_pd_global(intx, InlineFrequencyCount, 100); ++define_pd_global(intx, InlineSmallCode, 2000); ++ ++#define DEFAULT_STACK_YELLOW_PAGES (2) ++#define DEFAULT_STACK_RED_PAGES (1) ++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4)) ++#define DEFAULT_STACK_RESERVED_PAGES (1) ++ ++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES ++#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES ++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES ++#define MIN_STACK_RESERVED_PAGES (0) ++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); ++ ++define_pd_global(intx, StackYellowPages, 2); ++define_pd_global(intx, StackRedPages, 1); ++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); ++ ++define_pd_global(bool, RewriteBytecodes, true); ++define_pd_global(bool, RewriteFrequentPairs, true); ++define_pd_global(bool, UseMembar, true); ++// GC Ergo Flags ++define_pd_global(intx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread ++ ++define_pd_global(uintx, TypeProfileLevel, 111); ++ ++define_pd_global(bool, CompactStrings, true); ++ ++define_pd_global(bool, PreserveFramePointer, false); ++ ++define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); ++ ++define_pd_global(bool, ThreadLocalHandshakes, true); ++// Only c2 cares about this at the moment ++define_pd_global(intx, AllocatePrefetchStyle, 2); ++define_pd_global(intx, AllocatePrefetchDistance, -1); ++ ++#define ARCH_FLAGS(develop, \ ++ product, \ ++ diagnostic, \ ++ experimental, \ ++ notproduct, \ ++ range, \ ++ constraint, \ ++ writeable) \ ++ \ ++ product(bool, UseCodeCacheAllocOpt, true, \ ++ "Allocate code cache within 32-bit memory address space") \ ++ \ ++ product(bool, UseLSX, false, \ ++ "Use LSX 128-bit vector instructions") \ ++ \ ++ product(bool, UseLASX, false, \ ++ "Use LASX 256-bit vector instructions") \ ++ \ ++ product(bool, UseBarriersForVolatile, false, \ ++ "Use memory barriers to implement volatile accesses") \ ++ \ ++ product(bool, UseCRC32, false, \ ++ "Use CRC32 instructions for CRC32 computation") \ ++ \ ++ product(bool, UseActiveCoresMP, false, \ ++ "Eliminate barriers for single active cpu") ++ ++#endif // CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/icache_loongarch.cpp b/src/hotspot/cpu/loongarch/icache_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/icache_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/icache_loongarch.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "runtime/icache.hpp" ++ ++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) ++{ ++#define __ _masm-> ++ StubCodeMark mark(this, "ICache", "flush_icache_stub"); ++ address start = __ pc(); ++ ++ __ ibar(0); ++ __ ori(V0, A2, 0); ++ __ jr(RA); ++ ++ *flush_icache_stub = (ICache::flush_icache_stub_t)start; ++#undef __ ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/icache_loongarch.hpp b/src/hotspot/cpu/loongarch/icache_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/icache_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/icache_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_ICACHE_LOONGARCH_HPP ++#define CPU_LOONGARCH_ICACHE_LOONGARCH_HPP ++ ++// Interface for updating the instruction cache. Whenever the VM modifies ++// code, part of the processor instruction cache potentially has to be flushed. ++ ++class ICache : public AbstractICache { ++ public: ++ enum { ++ stub_size = 3 * BytesPerInstWord, // Size of the icache flush stub in bytes ++ line_size = 32, // flush instruction affects a dword ++ log2_line_size = 5 // log2(line_size) ++ }; ++}; ++ ++#endif // CPU_LOONGARCH_ICACHE_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/icBuffer.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/bytecodes.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/oop.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++int InlineCacheBuffer::ic_stub_code_size() { ++ return NativeMovConstReg::instruction_size + ++ NativeGeneralJump::instruction_size + ++ 1; ++ // so that code_end can be set in CodeBuffer ++ // 64bit 15 = 6 + 8 bytes + 1 byte ++ // 32bit 7 = 2 + 4 bytes + 1 byte ++} ++ ++ ++// we use T1 as cached oop(klass) now. this is the target of virtual call, ++// when reach here, the receiver in T0 ++// refer to shareRuntime_loongarch.cpp,gen_i2c2i_adapters ++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, ++ address entry_point) { ++ ResourceMark rm; ++ CodeBuffer code(code_begin, ic_stub_code_size()); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ // note: even though the code contains an embedded oop, we do not need reloc info ++ // because ++ // (1) the oop is old (i.e., doesn't matter for scavenges) ++ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear ++ // assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop"); ++#define __ masm-> ++ __ patchable_li52(T1, (long)cached_value); ++ // TODO: confirm reloc ++ __ jmp(entry_point, relocInfo::runtime_call_type); ++ __ flush(); ++#undef __ ++} ++ ++ ++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ return jump->jump_destination(); ++} ++ ++ ++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { ++ // creation also verifies the object ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); ++ // Verifies the jump ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ void* o= (void*)move->data(); ++ return o; ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,2043 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interp_masm_loongarch.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/markOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// Implementation of InterpreterMacroAssembler ++ ++#ifdef CC_INTERP ++void InterpreterMacroAssembler::get_method(Register reg) { ++} ++#endif // CC_INTERP ++ ++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) { ++ if (UseUnalignedAccesses) { ++ ld_hu(reg, BCP, offset); ++ } else { ++ ld_bu(reg, BCP, offset); ++ ld_bu(tmp, BCP, offset + 1); ++ bstrins_d(reg, tmp, 15, 8); ++ } ++} ++ ++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, int offset) { ++ if (UseUnalignedAccesses) { ++ ld_wu(reg, BCP, offset); ++ } else { ++ ldr_w(reg, BCP, offset); ++ ldl_w(reg, BCP, offset + 3); ++ lu32i_d(reg, 0); ++ } ++} ++ ++void InterpreterMacroAssembler::jump_to_entry(address entry) { ++ assert(entry, "Entry must have been generated by now"); ++ jmp(entry); ++} ++ ++#ifndef CC_INTERP ++ ++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, ++ int number_of_arguments) { ++ // interpreter specific ++ // ++ // Note: No need to save/restore bcp & locals pointer ++ // since these are callee saved registers and no blocking/ ++ // GC can happen in leaf calls. ++ // Further Note: DO NOT save/restore bcp/locals. If a caller has ++ // already saved them so that it can use BCP/LVP as temporaries ++ // then a save/restore here will DESTROY the copy the caller ++ // saved! There used to be a save_bcp() that only happened in ++ // the ASSERT path (no restore_bcp). Which caused bizarre failures ++ // when jvm built with ASSERTs. ++#ifdef ASSERT ++ save_bcp(); ++ { ++ Label L; ++ ld_d(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT,R0,L); ++ stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif ++ // super call ++ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); ++ // interpreter specific ++ // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals ++ // but since they may not have been saved (and we don't want to ++ // save them here (see note above) the assert is invalid. ++} ++ ++void InterpreterMacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // interpreter specific ++ // ++ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't ++ // really make a difference for these runtime calls, since they are ++ // slow anyway. Btw., bcp must be saved/restored since it may change ++ // due to GC. ++ assert(java_thread == noreg , "not expecting a precomputed java thread"); ++ save_bcp(); ++#ifdef ASSERT ++ { ++ Label L; ++ ld_d(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT, R0, L); ++ stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif /* ASSERT */ ++ // super call ++ MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp, ++ entry_point, number_of_arguments, ++ check_exceptions); ++ // interpreter specific ++ restore_bcp(); ++ restore_locals(); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { ++ if (JvmtiExport::can_pop_frame()) { ++ Label L; ++ // Initiate popframe handling only if it is not already being ++ // processed. If the flag has the popframe_processing bit set, it ++ // means that this code is called *during* popframe handling - we ++ // don't want to reenter. ++ // This method is only called just after the call into the vm in ++ // call_VM_base, so the arg registers are available. ++ // Not clear if any other register is available, so load AT twice ++ assert(AT != java_thread, "check"); ++ ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_pending_bit); ++ beq(AT, R0, L); ++ ++ ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_processing_bit); ++ bne(AT, R0, L); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); ++ jr(V0); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::load_earlyret_value(TosState state) { ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset())); ++ const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ //V0, oop_addr,V1,val_addr ++ switch (state) { ++ case atos: ++ ld_ptr(V0, oop_addr); ++ st_ptr(R0, oop_addr); ++ verify_oop(V0, state); ++ break; ++ case ltos: ++ ld_ptr(V0, val_addr); // fall through ++ break; ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ ld_w(V0, val_addr); ++ break; ++ case ftos: ++ fld_s(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case dtos: ++ fld_d(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ // Clean up tos value in the thread object ++ li(AT, (int)ilgl); ++ st_w(AT, tos_addr); ++ st_w(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { ++ if (JvmtiExport::can_force_early_return()) { ++ Label L; ++ Register tmp = T4; ++ ++ assert(java_thread != AT, "check"); ++ assert(java_thread != tmp, "check"); ++ ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ beq(AT, R0, L); ++ ++ // Initiate earlyret handling only if it is not already being processed. ++ // If the flag has the earlyret_processing bit set, it means that this code ++ // is called *during* earlyret handling - we don't want to reenter. ++ ld_w(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ li(tmp, JvmtiThreadState::earlyret_pending); ++ bne(tmp, AT, L); ++ ++ // Call Interpreter::remove_activation_early_entry() to get the address of the ++ // same-named entrypoint in the generated interpreter code. ++ ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ ld_w(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ move(A0, AT); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0); ++ jr(V0); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, ++ int bcp_offset) { ++ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); ++ ld_bu(AT, BCP, bcp_offset); ++ ld_bu(reg, BCP, bcp_offset + 1); ++ bstrins_w(reg, AT, 15, 8); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ if (index_size == sizeof(u2)) { ++ get_2_byte_integer_at_bcp(index, AT, bcp_offset); ++ } else if (index_size == sizeof(u4)) { ++ get_4_byte_integer_at_bcp(index, bcp_offset); ++ // Check if the secondary index definition is still ~x, otherwise ++ // we have to change the following assembler code to calculate the ++ // plain index. ++ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); ++ nor(index, index, R0); ++ slli_w(index, index, 0); ++ } else if (index_size == sizeof(u1)) { ++ ld_bu(index, BCP, bcp_offset); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, ++ Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert_different_registers(cache, index); ++ get_cache_index_at_bcp(index, bcp_offset, index_size); ++ ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line"); ++ shl(index, 2); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, ++ Register index, ++ Register bytecode, ++ int byte_no, ++ int bcp_offset, ++ size_t index_size) { ++ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); ++ // We use a 32-bit load here since the layout of 64-bit words on ++ // little-endian machines allow us that. ++ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ ld_w(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); ++ if(os::is_MP()) { ++ membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore)); ++ } ++ ++ const int shift_count = (1 + byte_no) * BitsPerByte; ++ assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) || ++ (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift), ++ "correct shift count"); ++ srli_d(bytecode, bytecode, shift_count); ++ assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); ++ li(AT, ConstantPoolCacheEntry::bytecode_1_mask); ++ andr(bytecode, bytecode, AT); ++} ++ ++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, ++ Register tmp, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ assert(cache != tmp, "must use different register"); ++ get_cache_index_at_bcp(tmp, bcp_offset, index_size); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ // convert from field index to ConstantPoolCacheEntry index ++ // and from word offset to byte offset ++ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); ++ shl(tmp, 2 + LogBytesPerWord); ++ ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ // skip past the header ++ addi_d(cache, cache, in_bytes(ConstantPoolCache::base_offset())); ++ add_d(cache, cache, tmp); ++} ++ ++void InterpreterMacroAssembler::get_method_counters(Register method, ++ Register mcs, Label& skip) { ++ Label has_counters; ++ ld_d(mcs, method, in_bytes(Method::method_counters_offset())); ++ bne(mcs, R0, has_counters); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::build_method_counters), method); ++ ld_d(mcs, method, in_bytes(Method::method_counters_offset())); ++ beq(mcs, R0, skip); // No MethodCounters allocated, OutOfMemory ++ bind(has_counters); ++} ++ ++// Load object from cpool->resolved_references(index) ++void InterpreterMacroAssembler::load_resolved_reference_at_index( ++ Register result, Register index, Register tmp) { ++ assert_different_registers(result, index); ++ // convert from field index to resolved_references() index and from ++ // word index to byte offset. Since this is a java object, it can be compressed ++ shl(index, LogBytesPerHeapOop); ++ ++ get_constant_pool(result); ++ // load pointer for resolved_references[] objArray ++ ld_d(result, result, ConstantPool::cache_offset_in_bytes()); ++ ld_d(result, result, ConstantPoolCache::resolved_references_offset_in_bytes()); ++ resolve_oop_handle(result, tmp); ++ // Add in the index ++ add_d(result, result, index); ++ load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp); ++} ++ ++// load cpool->resolved_klass_at(index) ++void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool, ++ Register index, Register klass) { ++ alsl_d(AT, index, cpool, Address::times_ptr - 1); ++ ld_h(index, AT, sizeof(ConstantPool)); ++ Register resolved_klasses = cpool; ++ ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); ++ alsl_d(AT, index, resolved_klasses, Address::times_ptr - 1); ++ ld_d(klass, AT, Array::base_offset_in_bytes()); ++} ++ ++// Resets LVP to locals. Register sub_klass cannot be any of the above. ++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) { ++ ++ assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" ); ++ assert( Rsub_klass != T1, "T1 holds 2ndary super array length" ); ++ assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" ); ++ // Profile the not-null value's klass. ++ // Here T4 and T1 are used as temporary registers. ++ profile_typecheck(T4, Rsub_klass, T1); // blows T4, reloads T1 ++ ++ // Do the check. ++ check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1 ++ ++ // Profile the failure of the check. ++ profile_typecheck_failed(T4); // blows T4 ++ ++} ++ ++ ++ ++// Java Expression Stack ++ ++void InterpreterMacroAssembler::pop_ptr(Register r) { ++ ld_d(r, SP, 0); ++ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_i(Register r) { ++ ld_w(r, SP, 0); ++ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_l(Register r) { ++ ld_d(r, SP, 0); ++ addi_d(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_f(FloatRegister r) { ++ fld_s(r, SP, 0); ++ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_d(FloatRegister r) { ++ fld_d(r, SP, 0); ++ addi_d(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_ptr(Register r) { ++ addi_d(SP, SP, - Interpreter::stackElementSize); ++ st_d(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_i(Register r) { ++ // For compatibility reason, don't change to sw. ++ addi_d(SP, SP, - Interpreter::stackElementSize); ++ st_d(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_l(Register r) { ++ addi_d(SP, SP, -2 * Interpreter::stackElementSize); ++ st_d(r, SP, 0); ++ st_d(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_f(FloatRegister r) { ++ addi_d(SP, SP, - Interpreter::stackElementSize); ++ fst_s(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_d(FloatRegister r) { ++ addi_d(SP, SP, -2 * Interpreter::stackElementSize); ++ fst_d(r, SP, 0); ++ st_d(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop(TosState state) { ++ switch (state) { ++ case atos: pop_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: pop_i(); break; ++ case ltos: pop_l(); break; ++ case ftos: pop_f(); break; ++ case dtos: pop_d(); break; ++ case vtos: /* nothing to do */ break; ++ default: ShouldNotReachHere(); ++ } ++ verify_oop(FSR, state); ++} ++ ++//FSR=V0,SSR=V1 ++void InterpreterMacroAssembler::push(TosState state) { ++ verify_oop(FSR, state); ++ switch (state) { ++ case atos: push_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: push_i(); break; ++ case ltos: push_l(); break; ++ case ftos: push_f(); break; ++ case dtos: push_d(); break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++void InterpreterMacroAssembler::load_ptr(int n, Register val) { ++ ld_d(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++void InterpreterMacroAssembler::store_ptr(int n, Register val) { ++ st_d(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++// Jump to from_interpreted entry of a call unless single stepping is possible ++// in this thread in which case we must call the i2i entry ++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { ++ // record last_sp ++ move(Rsender, SP); ++ st_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++#ifndef OPT_THREAD ++ Register thread = temp; ++ get_thread(temp); ++#else ++ Register thread = TREG; ++#endif ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ ld_w(AT, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(AT, R0, run_compiled_code); ++ ld_d(AT, method, in_bytes(Method::interpreter_entry_offset())); ++ jr(AT); ++ bind(run_compiled_code); ++ } ++ ++ ld_d(AT, method, in_bytes(Method::from_interpreted_offset())); ++ jr(AT); ++} ++ ++ ++// The following two routines provide a hook so that an implementation ++// can schedule the dispatch in two parts. LoongArch64 does not do this. ++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { ++ // Nothing LoongArch64 specific to be done here ++} ++ ++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { ++ dispatch_next(state, step); ++} ++ ++// assume the next bytecode in T8. ++void InterpreterMacroAssembler::dispatch_base(TosState state, ++ address* table, ++ bool verifyoop, ++ bool generate_poll) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ ++ if (VerifyActivationFrameSize) { ++ Label L; ++ ++ sub_d(T2, FP, SP); ++ int min_frame_size = (frame::java_frame_link_offset - ++ frame::interpreter_frame_initial_sp_offset) * wordSize; ++ addi_d(T2, T2, -min_frame_size); ++ bge(T2, R0, L); ++ stop("broken stack frame"); ++ bind(L); ++ } ++ // FIXME: I do not know which register should pass to verify_oop ++ if (verifyoop) verify_oop(FSR, state); ++ ++ Label safepoint; ++ address* const safepoint_table = Interpreter::safept_table(state); ++ bool needs_thread_local_poll = generate_poll && ++ SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; ++ ++ if (needs_thread_local_poll) { ++ NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); ++ ld_d(T3, thread, in_bytes(Thread::polling_page_offset())); ++ andi(T3, T3, SafepointMechanism::poll_bit()); ++ bne(T3, R0, safepoint); ++ } ++ ++ if((long)table >= (long)Interpreter::dispatch_table(btos) && ++ (long)table <= (long)Interpreter::dispatch_table(vtos)) { ++ int table_size = (long)Interpreter::dispatch_table(itos) - ++ (long)Interpreter::dispatch_table(stos); ++ int table_offset = ((int)state - (int)itos) * table_size; ++ ++ // S8 points to the starting address of Interpreter::dispatch_table(itos). ++ // See StubGenerator::generate_call_stub(address& return_address) for the initialization of S8. ++ if (table_offset != 0) { ++ if (is_simm(table_offset, 12)) { ++ alsl_d(T3, Rnext, S8, LogBytesPerWord - 1); ++ ld_d(T3, T3, table_offset); ++ } else { ++ li(T2, table_offset); ++ alsl_d(T3, Rnext, S8, LogBytesPerWord - 1); ++ ldx_d(T3, T2, T3); ++ } ++ } else { ++ slli_d(T2, Rnext, LogBytesPerWord); ++ ldx_d(T3, S8, T2); ++ } ++ } else { ++ li(T3, (long)table); ++ slli_d(T2, Rnext, LogBytesPerWord); ++ ldx_d(T3, T2, T3); ++ } ++ jr(T3); ++ ++ if (needs_thread_local_poll) { ++ bind(safepoint); ++ li(T3, (long)safepoint_table); ++ slli_d(T2, Rnext, LogBytesPerWord); ++ ldx_d(T3, T3, T2); ++ jr(T3); ++ } ++} ++ ++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) { ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state)); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state), false); ++} ++ ++ ++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { ++ // load next bytecode ++ ld_bu(Rnext, BCP, step); ++ increment(BCP, step); ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} ++ ++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { ++ // load current bytecode ++ ld_bu(Rnext, BCP, 0); ++ dispatch_base(state, table); ++} ++ ++// remove activation ++// ++// Unlock the receiver if this is a synchronized method. ++// Unlock any Java monitors from syncronized blocks. ++// Remove the activation from the stack. ++// ++// If there are locked Java monitors ++// If throw_monitor_exception ++// throws IllegalMonitorStateException ++// Else if install_monitor_exception ++// installs IllegalMonitorStateException ++// Else ++// no error processing ++// used registers : T1, T2, T3, T8 ++// T1 : thread, method access flags ++// T2 : monitor entry pointer ++// T3 : method, monitor top ++// T8 : unlock flag ++void InterpreterMacroAssembler::remove_activation( ++ TosState state, ++ Register ret_addr, ++ bool throw_monitor_exception, ++ bool install_monitor_exception, ++ bool notify_jvmdi) { ++ // Note: Registers V0, V1 and F0, F1 may be in use for the result ++ // check if synchronized method ++ Label unlocked, unlock, no_unlock; ++ ++ // get the value of _do_not_unlock_if_synchronized into T8 ++#ifndef OPT_THREAD ++ Register thread = T1; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ld_b(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // reset the flag ++ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // get method access flags ++ ld_d(T3, FP, frame::interpreter_frame_method_offset * wordSize); ++ ld_w(T1, T3, in_bytes(Method::access_flags_offset())); ++ andi(T1, T1, JVM_ACC_SYNCHRONIZED); ++ beq(T1, R0, unlocked); ++ ++ // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. ++ bne(T8, R0, no_unlock); ++ // unlock monitor ++ push(state); // save result ++ ++ // BasicObjectLock will be first in list, since this is a ++ // synchronized method. However, need to check that the object has ++ // not been unlocked by an explicit monitorexit bytecode. ++ addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize ++ - (int)sizeof(BasicObjectLock)); ++ // address of first monitor ++ ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, unlock); ++ pop(state); ++ if (throw_monitor_exception) { ++ // Entry already unlocked, need to throw exception ++ // I think LA do not need empty_FPU_stack ++ // remove possible return value from FPU-stack, otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Monitor already unlocked during a stack unroll. If requested, ++ // install an illegal_monitor_state_exception. Continue with ++ // stack unrolling. ++ if (install_monitor_exception) { ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ ++ } ++ ++ b(unlocked); ++ } ++ ++ bind(unlock); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ // Check that for block-structured locking (i.e., that all locked ++ // objects has been unlocked) ++ bind(unlocked); ++ ++ // V0, V1: Might contain return value ++ ++ // Check that all monitors are unlocked ++ { ++ Label loop, exception, entry, restart; ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ const Address monitor_block_top(FP, ++ frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ ++ bind(restart); ++ // points to current entry, starting with top-most entry ++ ld_d(c_rarg0, monitor_block_top); ++ // points to word before bottom of monitor block ++ addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ b(entry); ++ ++ // Entry already locked, need to throw exception ++ bind(exception); ++ ++ if (throw_monitor_exception) { ++ // Throw exception ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Stack unrolling. Unlock object and install illegal_monitor_exception ++ // Unlock does not block, so don't have to worry about the frame ++ // We don't have to preserve c_rarg0, since we are going to ++ // throw an exception ++ ++ push(state); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ if (install_monitor_exception) { ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ } ++ ++ b(restart); ++ } ++ ++ bind(loop); ++ ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, exception);// check if current entry is used ++ ++ addi_d(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry ++ bind(entry); ++ bne(c_rarg0, T3, loop); // check if bottom reached ++ } ++ ++ bind(no_unlock); ++ ++ // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame) ++ if (notify_jvmdi) { ++ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA ++ } else { ++ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA ++ } ++ ++ // remove activation ++ ld_d(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ if (StackReservedPages > 0) { ++ // testing if reserved zone needs to be re-enabled ++ Label no_reserved_zone_enabling; ++ ++ ld_d(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); ++ sub_d(AT, TSR, AT); ++ bge(R0, AT, no_reserved_zone_enabling); ++ ++ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_delayed_StackOverflowError)); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++ } ++ ld_d(ret_addr, FP, frame::java_frame_return_addr_offset * wordSize); ++ ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); ++ move(SP, TSR); // set sp to sender sp ++} ++ ++#endif // CC_INTERP ++ ++// Lock object ++// ++// Args: ++// c_rarg0: BasicObjectLock to be used for locking ++// ++// Kills: ++// T1 ++// T2 ++void InterpreterMacroAssembler::lock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ } else { ++ Label done, slow_case; ++ const Register tmp_reg = T2; ++ const Register scr_reg = T1; ++ const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); ++ const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); ++ const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Load object pointer into scr_reg ++ ld_d(scr_reg, lock_reg, obj_offset); ++ ++ if (UseBiasedLocking) { ++ // Note: we use noreg for the temporary register since it's hard ++ // to come up with a free register on all incoming code paths ++ biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case); ++ } ++ ++ // Load (object->mark() | 1) into tmp_reg ++ ld_d(AT, scr_reg, 0); ++ ori(tmp_reg, AT, 1); ++ ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ st_d(tmp_reg, lock_reg, mark_offset); ++ ++ assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label succ, fail; ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail); ++ bind(succ); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ b(done); ++ bind(fail); ++ } else { ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done); ++ } ++ ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) SP <= mark < SP + os::pagesize() ++ // ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in tmp_reg as the result of cmpxchg ++ sub_d(tmp_reg, tmp_reg, SP); ++ li(AT, 7 - os::vm_page_size()); ++ andr(tmp_reg, tmp_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ st_d(tmp_reg, lock_reg, mark_offset); ++ if (PrintBiasedLockingStatistics) { ++ bnez(tmp_reg, slow_case); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ } ++ beqz(tmp_reg, done); ++ ++ bind(slow_case); ++ // Call the runtime routine for slow case ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ ++ bind(done); ++ } ++} ++ ++// Unlocks an object. Used in monitorexit bytecode and ++// remove_activation. Throws an IllegalMonitorException if object is ++// not locked by current thread. ++// ++// Args: ++// c_rarg0: BasicObjectLock for lock ++// ++// Kills: ++// T1 ++// T2 ++// T3 ++// Throw an IllegalMonitorException if object is not locked by current thread ++void InterpreterMacroAssembler::unlock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ } else { ++ Label done; ++ const Register tmp_reg = T1; ++ const Register scr_reg = T2; ++ const Register hdr_reg = T3; ++ ++ save_bcp(); // Save in case of exception ++ ++ // Convert from BasicObjectLock structure to object and BasicLock structure ++ // Store the BasicLock address into tmp_reg ++ addi_d(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes()); ++ ++ // Load oop into scr_reg ++ ld_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ // free entry ++ st_d(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ if (UseBiasedLocking) { ++ biased_locking_exit(scr_reg, hdr_reg, done); ++ } ++ ++ // Load the old header from BasicLock structure ++ ld_d(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes()); ++ // zero for recursive case ++ beqz(hdr_reg, done); ++ ++ // Atomic swap back the old header ++ cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done); ++ ++ // Call the runtime routine for slow case. ++ st_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj ++ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), ++ lock_reg); ++ ++ bind(done); ++ ++ restore_bcp(); ++ } ++} ++ ++#ifndef CC_INTERP ++ ++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, ++ Label& zero_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ld_d(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++ beq(mdp, R0, zero_continue); ++} ++ ++ ++// Set the method data pointer for the current bcp. ++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Label set_mdp; ++ ++ // V0 and T0 will be used as two temporary registers. ++ push2(V0, T0); ++ ++ get_method(T0); ++ // Test MDO to avoid the call if it is NULL. ++ ld_d(V0, T0, in_bytes(Method::method_data_offset())); ++ beq(V0, R0, set_mdp); ++ ++ // method: T0 ++ // bcp: BCP --> S0 ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP); ++ // mdi: V0 ++ // mdo is guaranteed to be non-zero here, we checked for it before the call. ++ get_method(T0); ++ ld_d(T0, T0, in_bytes(Method::method_data_offset())); ++ addi_d(T0, T0, in_bytes(MethodData::data_offset())); ++ add_d(V0, T0, V0); ++ bind(set_mdp); ++ st_d(V0, FP, frame::interpreter_frame_mdp_offset * wordSize); ++ pop2(V0, T0); ++} ++ ++void InterpreterMacroAssembler::verify_method_data_pointer() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++#ifdef ASSERT ++ Label verify_continue; ++ Register method = T5; ++ Register mdp = T6; ++ Register tmp = A0; ++ push(method); ++ push(mdp); ++ push(tmp); ++ test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue ++ get_method(method); ++ ++ // If the mdp is valid, it will point to a DataLayout header which is ++ // consistent with the bcp. The converse is highly probable also. ++ ld_hu(tmp, mdp, in_bytes(DataLayout::bci_offset())); ++ ld_d(AT, method, in_bytes(Method::const_offset())); ++ add_d(tmp, tmp, AT); ++ addi_d(tmp, tmp, in_bytes(ConstMethod::codes_offset())); ++ beq(tmp, BCP, verify_continue); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp); ++ bind(verify_continue); ++ pop(tmp); ++ pop(mdp); ++ pop(method); ++#endif // ASSERT ++} ++ ++ ++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, ++ int constant, ++ Register value) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Address data(mdp_in, constant); ++ st_d(value, data); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ int constant, ++ bool decrement) { ++ // Counter address ++ Address data(mdp_in, constant); ++ ++ increment_mdp_data_at(data, decrement); ++} ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Address data, ++ bool decrement) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ // %%% this does 64bit counters at best it is wasting space ++ // at worst it is a rare bug when counters overflow ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Decrement the register. ++ ld_d(AT, data); ++ sltu(tmp, R0, AT); ++ sub_d(AT, AT, tmp); ++ st_d(AT, data); ++ } else { ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Increment the register. ++ ld_d(AT, data); ++ addi_d(tmp, AT, DataLayout::counter_increment); ++ sltu(tmp, R0, tmp); ++ add_d(AT, AT, tmp); ++ st_d(AT, data); ++ } ++ pop(tmp); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ Register reg, ++ int constant, ++ bool decrement) { ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !"); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Decrement the register. ++ add_d(tmp, mdp_in, reg); ++ ld_d(AT, tmp, constant); ++ sltu(tmp, R0, AT); ++ sub_d(AT, AT, tmp); ++ add_d(tmp, mdp_in, reg); ++ st_d(AT, tmp, constant); ++ } else { ++ assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !"); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Increment the register. ++ add_d(tmp, mdp_in, reg); ++ ld_d(AT, tmp, constant); ++ addi_d(tmp, AT, DataLayout::counter_increment); ++ sltu(tmp, R0, tmp); ++ add_d(AT, AT, tmp); ++ add_d(tmp, mdp_in, reg); ++ st_d(AT, tmp, constant); ++ } ++ pop(tmp); ++} ++ ++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, ++ int flag_byte_constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ int header_offset = in_bytes(DataLayout::header_offset()); ++ int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant); ++ // Set the flag ++ ld_w(AT, Address(mdp_in, header_offset)); ++ if(Assembler::is_simm(header_bits, 12)) { ++ ori(AT, AT, header_bits); ++ } else { ++ push(T8); ++ // T8 is used as a temporary register. ++ li(T8, header_bits); ++ orr(AT, AT, T8); ++ pop(T8); ++ } ++ st_w(AT, Address(mdp_in, header_offset)); ++} ++ ++ ++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, ++ int offset, ++ Register value, ++ Register test_value_out, ++ Label& not_equal_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if (test_value_out == noreg) { ++ ld_d(AT, Address(mdp_in, offset)); ++ bne(AT, value, not_equal_continue); ++ } else { ++ // Put the test value into a register, so caller can use it: ++ ld_d(test_value_out, Address(mdp_in, offset)); ++ bne(value, test_value_out, not_equal_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12"); ++ ld_d(AT, mdp_in, offset_of_disp); ++ add_d(mdp_in, mdp_in, AT); ++ st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ Register reg, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ add_d(AT, reg, mdp_in); ++ assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12"); ++ ld_d(AT, AT, offset_of_disp); ++ add_d(mdp_in, mdp_in, AT); ++ st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, ++ int constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if(Assembler::is_simm(constant, 12)) { ++ addi_d(mdp_in, mdp_in, constant); ++ } else { ++ li(AT, constant); ++ add_d(mdp_in, mdp_in, AT); ++ } ++ st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ push(return_bci); // save/restore across call_VM ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), ++ return_bci); ++ pop(return_bci); ++} ++ ++ ++void InterpreterMacroAssembler::profile_taken_branch(Register mdp, ++ Register bumped_count) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ // Otherwise, assign to mdp ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the taken count. ++ // We inline increment_mdp_data_at to return bumped_count in a register ++ //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); ++ ld_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ addi_d(AT, bumped_count, DataLayout::counter_increment); ++ sltu(AT, R0, AT); ++ add_d(bumped_count, bumped_count, AT); ++ st_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the not taken count. ++ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); ++ ++ // The method data pointer needs to be updated to correspond to ++ // the next bytecode ++ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_final_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_virtual_call(Register receiver, ++ Register mdp, ++ Register reg2, ++ bool receiver_can_be_null) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label skip_receiver_profile; ++ if (receiver_can_be_null) { ++ Label not_null; ++ bnez(receiver, not_null); ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ b(skip_receiver_profile); ++ bind(not_null); ++ } ++ ++ // Record the receiver type. ++ record_klass_in_profile(receiver, mdp, reg2, true); ++ bind(skip_receiver_profile); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++#if INCLUDE_JVMCI ++void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) { ++ assert_different_registers(method, mdp, reg2); ++ if (ProfileInterpreter && MethodProfileWidth > 0) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label done; ++ record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth, ++ &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset())); ++ bind(done); ++ ++ update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++#endif // INCLUDE_JVMCI ++ ++// This routine creates a state machine for updating the multi-row ++// type profile at a virtual call site (or other type-sensitive bytecode). ++// The machine visits each row (of receiver/count) until the receiver type ++// is found, or until it runs out of rows. At the same time, it remembers ++// the location of the first empty row. (An empty row records null for its ++// receiver, and can be allocated for a newly-observed receiver type.) ++// Because there are two degrees of freedom in the state, a simple linear ++// search will not work; it must be a decision tree. Hence this helper ++// function is recursive, to generate the required tree structured code. ++// It's the interpreter, so we are trading off code space for speed. ++// See below for example code. ++void InterpreterMacroAssembler::record_klass_in_profile_helper( ++ Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call) { ++ if (TypeProfileWidth == 0) { ++ if (is_virtual_call) { ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ } ++#if INCLUDE_JVMCI ++ else if (EnableJVMCI) { ++ increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset())); ++ } ++#endif // INCLUDE_JVMCI ++ } else { ++ int non_profiled_offset = -1; ++ if (is_virtual_call) { ++ non_profiled_offset = in_bytes(CounterData::count_offset()); ++ } ++#if INCLUDE_JVMCI ++ else if (EnableJVMCI) { ++ non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()); ++ } ++#endif // INCLUDE_JVMCI ++ ++ record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth, ++ &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); ++ } ++} ++ ++void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp, ++ Register reg2, int start_row, Label& done, int total_rows, ++ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, ++ int non_profiled_offset) { ++ int last_row = total_rows - 1; ++ assert(start_row <= last_row, "must be work left to do"); ++ // Test this row for both the item and for null. ++ // Take any of three different outcomes: ++ // 1. found item => increment count and goto done ++ // 2. found null => keep looking for case 1, maybe allocate this cell ++ // 3. found something else => keep looking for cases 1 and 2 ++ // Case 3 is handled by a recursive call. ++ for (int row = start_row; row <= last_row; row++) { ++ Label next_test; ++ bool test_for_null_also = (row == start_row); ++ ++ // See if the receiver is item[n]. ++ int item_offset = in_bytes(item_offset_fn(row)); ++ test_mdp_data_at(mdp, item_offset, item, ++ (test_for_null_also ? reg2 : noreg), ++ next_test); ++ // (Reg2 now contains the item from the CallData.) ++ ++ // The receiver is item[n]. Increment count[n]. ++ int count_offset = in_bytes(item_count_offset_fn(row)); ++ increment_mdp_data_at(mdp, count_offset); ++ b(done); ++ bind(next_test); ++ ++ if (test_for_null_also) { ++ Label found_null; ++ // Failed the equality check on item[n]... Test for null. ++ if (start_row == last_row) { ++ // The only thing left to do is handle the null case. ++ if (non_profiled_offset >= 0) { ++ beqz(reg2, found_null); ++ // Item did not match any saved item and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ increment_mdp_data_at(mdp, non_profiled_offset); ++ b(done); ++ bind(found_null); ++ } else { ++ bnez(reg2, done); ++ } ++ break; ++ } ++ // Since null is rare, make it be the branch-taken case. ++ beqz(reg2, found_null); ++ ++ // Put all the "Case 3" tests here. ++ record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows, ++ item_offset_fn, item_count_offset_fn, non_profiled_offset); ++ ++ // Found a null. Keep searching for a matching item, ++ // but remember that this is an empty (unused) slot. ++ bind(found_null); ++ } ++ } ++ ++ // In the fall-through case, we found no matching item, but we ++ // observed the item[start_row] is NULL. ++ ++ // Fill in the item field and increment the count. ++ int item_offset = in_bytes(item_offset_fn(start_row)); ++ set_mdp_data_at(mdp, item_offset, item); ++ int count_offset = in_bytes(item_count_offset_fn(start_row)); ++ li(reg2, DataLayout::counter_increment); ++ set_mdp_data_at(mdp, count_offset, reg2); ++ if (start_row > 0) { ++ b(done); ++ } ++} ++ ++// Example state machine code for three profile rows: ++// // main copy of decision tree, rooted at row[1] ++// if (row[0].rec == rec) { row[0].incr(); goto done; } ++// if (row[0].rec != NULL) { ++// // inner copy of decision tree, rooted at row[1] ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[1].rec != NULL) { ++// // degenerate decision tree, rooted at row[2] ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// if (row[2].rec != NULL) { goto done; } // overflow ++// row[2].init(rec); goto done; ++// } else { ++// // remember row[1] is empty ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[1].init(rec); goto done; ++// } ++// } else { ++// // remember row[0] is empty ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[0].init(rec); goto done; ++// } ++// done: ++ ++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, ++ Register mdp, Register reg2, ++ bool is_virtual_call) { ++ assert(ProfileInterpreter, "must be profiling"); ++ Label done; ++ ++ record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); ++ ++ bind (done); ++} ++ ++void InterpreterMacroAssembler::profile_ret(Register return_bci, ++ Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ uint row; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the total ret count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ for (row = 0; row < RetData::row_limit(); row++) { ++ Label next_test; ++ ++ // See if return_bci is equal to bci[n]: ++ test_mdp_data_at(mdp, ++ in_bytes(RetData::bci_offset(row)), ++ return_bci, noreg, ++ next_test); ++ ++ // return_bci is equal to bci[n]. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, ++ in_bytes(RetData::bci_displacement_offset(row))); ++ b(profile_continue); ++ bind(next_test); ++ } ++ ++ update_mdp_for_ret(return_bci); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_null_seen(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { ++ if (ProfileInterpreter && TypeProfileCasts) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int count_offset = in_bytes(CounterData::count_offset()); ++ // Back up the address, since we have already bumped the mdp. ++ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // *Decrement* the counter. We expect to see zero or small negatives. ++ increment_mdp_data_at(mdp, count_offset, true); ++ ++ bind (profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // Record the object type. ++ record_klass_in_profile(klass, mdp, reg2, false); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_default(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the default case count ++ increment_mdp_data_at(mdp, ++ in_bytes(MultiBranchData::default_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ in_bytes(MultiBranchData:: ++ default_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_case(Register index, ++ Register mdp, ++ Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Build the base (index * per_case_size_in_bytes()) + ++ // case_array_offset_in_bytes() ++ li(reg2, in_bytes(MultiBranchData::per_case_size())); ++ mul_d(index, index, reg2); ++ addi_d(index, index, in_bytes(MultiBranchData::case_array_offset())); ++ ++ // Update the case count ++ increment_mdp_data_at(mdp, ++ index, ++ in_bytes(MultiBranchData::relative_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ index, ++ in_bytes(MultiBranchData:: ++ relative_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::narrow(Register result) { ++ // Get method->_constMethod->_result_type ++ ld_d(T4, FP, frame::interpreter_frame_method_offset * wordSize); ++ ld_d(T4, T4, in_bytes(Method::const_offset())); ++ ld_bu(T4, T4, in_bytes(ConstMethod::result_type_offset())); ++ ++ Label done, notBool, notByte, notChar; ++ ++ // common case first ++ addi_d(AT, T4, -T_INT); ++ beq(AT, R0, done); ++ ++ // mask integer result to narrower return type. ++ addi_d(AT, T4, -T_BOOLEAN); ++ bne(AT, R0, notBool); ++ andi(result, result, 0x1); ++ beq(R0, R0, done); ++ ++ bind(notBool); ++ addi_d(AT, T4, -T_BYTE); ++ bne(AT, R0, notByte); ++ ext_w_b(result, result); ++ beq(R0, R0, done); ++ ++ bind(notByte); ++ addi_d(AT, T4, -T_CHAR); ++ bne(AT, R0, notChar); ++ bstrpick_d(result, result, 15, 0); ++ beq(R0, R0, done); ++ ++ bind(notChar); ++ ext_w_h(result, result); ++ ++ // Nothing to do for T_INT ++ bind(done); ++} ++ ++ ++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { ++ Label update, next, none; ++ ++ verify_oop(obj); ++ ++ if (mdo_addr.index() != noreg) { ++ guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !"); ++ guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !"); ++ push(T0); ++ alsl_d(T0, mdo_addr.index(), mdo_addr.base(), mdo_addr.scale() - 1); ++ } ++ ++ bnez(obj, update); ++ ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::null_seen); ++ if (mdo_addr.index() == noreg) { ++ st_d(AT, mdo_addr); ++ } else { ++ st_d(AT, T0, mdo_addr.disp()); ++ } ++ ++ b(next); ++ ++ bind(update); ++ load_klass(obj, obj); ++ ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ bstrpick_d(AT, obj, 63, 2); ++ beqz(AT, next); ++ ++ andi(AT, obj, TypeEntries::type_unknown); ++ bnez(AT, next); ++ ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ beqz(AT, none); ++ ++ addi_d(AT, AT, -(TypeEntries::null_seen)); ++ beqz(AT, none); ++ ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ bstrpick_d(AT, obj, 63, 2); ++ beqz(AT, next); ++ ++ // different than before. Cannot keep accurate profile. ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::type_unknown); ++ if (mdo_addr.index() == noreg) { ++ st_d(AT, mdo_addr); ++ } else { ++ st_d(AT, T0, mdo_addr.disp()); ++ } ++ b(next); ++ ++ bind(none); ++ // first time here. Set profile type. ++ if (mdo_addr.index() == noreg) { ++ st_d(obj, mdo_addr); ++ } else { ++ st_d(obj, T0, mdo_addr.disp()); ++ } ++ ++ bind(next); ++ if (mdo_addr.index() != noreg) { ++ pop(T0); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { ++ if (!ProfileInterpreter) { ++ return; ++ } ++ ++ if (MethodData::profile_arguments() || MethodData::profile_return()) { ++ Label profile_continue; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); ++ ++ ld_b(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start); ++ li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); ++ bne(tmp, AT, profile_continue); ++ ++ ++ if (MethodData::profile_arguments()) { ++ Label done; ++ int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); ++ if (Assembler::is_simm(off_to_args, 12)) { ++ addi_d(mdp, mdp, off_to_args); ++ } else { ++ li(AT, off_to_args); ++ add_d(mdp, mdp, AT); ++ } ++ ++ ++ for (int i = 0; i < TypeProfileArgsLimit; i++) { ++ if (i > 0 || MethodData::profile_return()) { ++ // If return value type is profiled we may have no argument to profile ++ ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ if (Assembler::is_simm(-1 * i * TypeStackSlotEntries::per_arg_count(), 12)) { ++ addi_w(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count()); ++ } else { ++ li(AT, i*TypeStackSlotEntries::per_arg_count()); ++ sub_w(tmp, tmp, AT); ++ } ++ ++ li(AT, TypeStackSlotEntries::per_arg_count()); ++ blt(tmp, AT, done); ++ } ++ ld_d(tmp, callee, in_bytes(Method::const_offset())); ++ ++ ld_hu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // stack offset o (zero based) from the start of the argument ++ // list, for n arguments translates into offset n - o - 1 from ++ // the end of the argument list ++ ld_d(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args); ++ sub_d(tmp, tmp, AT); ++ ++ addi_w(tmp, tmp, -1); ++ ++ Address arg_addr = argument_address(tmp); ++ ld_d(tmp, arg_addr); ++ ++ Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); ++ profile_obj_type(tmp, mdo_arg_addr); ++ ++ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); ++ if (Assembler::is_simm(to_add, 12)) { ++ addi_d(mdp, mdp, to_add); ++ } else { ++ li(AT, to_add); ++ add_d(mdp, mdp, AT); ++ } ++ ++ off_to_args += to_add; ++ } ++ ++ if (MethodData::profile_return()) { ++ ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(); ++ if (Assembler::is_simm(-1 * tmp_arg_counts, 12)) { ++ addi_w(tmp, tmp, -1 * tmp_arg_counts); ++ } else { ++ li(AT, tmp_arg_counts); ++ sub_w(mdp, mdp, AT); ++ } ++ } ++ ++ bind(done); ++ ++ if (MethodData::profile_return()) { ++ // We're right after the type profile for the last ++ // argument. tmp is the number of cells left in the ++ // CallTypeData/VirtualCallTypeData to reach its end. Non null ++ // if there's a return to profile. ++ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); ++ slli_w(tmp, tmp, exact_log2(DataLayout::cell_size)); ++ add_d(mdp, mdp, tmp); ++ } ++ st_d(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize); ++ } else { ++ assert(MethodData::profile_return(), "either profile call args or call ret"); ++ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); ++ } ++ ++ // mdp points right after the end of the ++ // CallTypeData/VirtualCallTypeData, right after the cells for the ++ // return value type if there's one ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { ++ assert_different_registers(mdp, ret, tmp, _bcp_register); ++ if (ProfileInterpreter && MethodData::profile_return()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ if (MethodData::profile_return_jsr292_only()) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ // If we don't profile all invoke bytecodes we must make sure ++ // it's a bytecode we indeed profile. We can't go back to the ++ // begining of the ProfileData we intend to update to check its ++ // type because we're right after it and we don't known its ++ // length ++ Label do_profile; ++ ld_b(tmp, _bcp_register, 0); ++ addi_d(AT, tmp, -1 * Bytecodes::_invokedynamic); ++ beqz(AT, do_profile); ++ addi_d(AT, tmp, -1 * Bytecodes::_invokehandle); ++ beqz(AT, do_profile); ++ ++ get_method(tmp); ++ ld_hu(tmp, tmp, Method::intrinsic_id_offset_in_bytes()); ++ li(AT, vmIntrinsics::_compiledLambdaForm); ++ bne(tmp, AT, profile_continue); ++ ++ bind(do_profile); ++ } ++ ++ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); ++ add_d(tmp, ret, R0); ++ profile_obj_type(tmp, mdo_ret_addr); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { ++ guarantee(T4 == tmp1, "You are reqired to use T4 as the index register for LoongArch !"); ++ ++ if (ProfileInterpreter && MethodData::profile_parameters()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Load the offset of the area within the MDO used for ++ // parameters. If it's negative we're not profiling any parameters ++ ld_w(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())); ++ blt(tmp1, R0, profile_continue); ++ ++ // Compute a pointer to the area for parameters from the offset ++ // and move the pointer to the slot for the last ++ // parameters. Collect profiling from last parameter down. ++ // mdo start + parameters offset + array length - 1 ++ add_d(mdp, mdp, tmp1); ++ ld_d(tmp1, mdp, in_bytes(ArrayData::array_len_offset())); ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ ++ ++ Label loop; ++ bind(loop); ++ ++ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); ++ int type_base = in_bytes(ParametersTypeData::type_offset(0)); ++ Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size); ++ Address arg_type(mdp, tmp1, per_arg_scale, type_base); ++ ++ // load offset on the stack from the slot for this parameter ++ alsl_d(AT, tmp1, mdp, per_arg_scale - 1); ++ ld_d(tmp2, AT, off_base); ++ ++ sub_d(tmp2, R0, tmp2); ++ ++ // read the parameter from the local area ++ slli_d(AT, tmp2, Interpreter::logStackElementSize); ++ ldx_d(tmp2, AT, _locals_register); ++ ++ // profile the parameter ++ profile_obj_type(tmp2, arg_type); ++ ++ // go to next parameter ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ blt(R0, tmp1, loop); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { ++ if (state == atos) { ++ MacroAssembler::verify_oop(reg); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ++} ++#endif // !CC_INTERP ++ ++ ++void InterpreterMacroAssembler::notify_method_entry() { ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label L; ++ ld_w(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, L); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_method_entry)); ++ bind(L); ++ } ++ ++ { ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ //Rthread, ++ thread, ++ //Rmethod); ++ S3); ++ } ++} ++ ++void InterpreterMacroAssembler::notify_method_exit( ++ TosState state, NotifyMethodExitMode mode) { ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { ++ Label skip; ++ // Note: frame::interpreter_frame_result has a dependency on how the ++ // method result is saved across the call to post_method_exit. If this ++ // is changed then the interpreter_frame_result implementation will ++ // need to be updated too. ++ ++ // template interpreter will leave it on the top of the stack. ++ push(state); ++ ld_w(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, skip); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); ++ bind(skip); ++ pop(state); ++ } ++ ++ { ++ // Dtrace notification ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ push(state); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ //Rthread, Rmethod); ++ thread, S3); ++ pop(state); ++ } ++} ++ ++// Jump if ((*counter_addr += increment) & mask) satisfies the condition. ++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, ++ int increment, int mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where) { ++ assert_different_registers(scratch, AT); ++ ++ if (!preloaded) { ++ ld_w(scratch, counter_addr); ++ } ++ addi_w(scratch, scratch, increment); ++ st_w(scratch, counter_addr); ++ ++ li(AT, mask); ++ andr(scratch, scratch, AT); ++ ++ if (cond == Assembler::zero) { ++ beq(scratch, R0, *where); ++ } else { ++ unimplemented(); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,281 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP ++#define CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP ++ ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "interpreter/invocationCounter.hpp" ++#include "runtime/frame.hpp" ++ ++// This file specializes the assember with interpreter-specific macros ++ ++typedef ByteSize (*OffsetFunction)(uint); ++ ++class InterpreterMacroAssembler: public MacroAssembler { ++#ifndef CC_INTERP ++ private: ++ ++ Register _locals_register; // register that contains the pointer to the locals ++ Register _bcp_register; // register that contains the bcp ++ ++ protected: ++ // Interpreter specific version of call_VM_base ++ virtual void call_VM_leaf_base(address entry_point, ++ int number_of_arguments); ++ ++ virtual void call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions); ++ ++ // base routine for all dispatches ++ void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false); ++#endif // CC_INTERP ++ ++ public: ++ void jump_to_entry(address entry); ++ // narrow int return value ++ void narrow(Register result); ++ ++ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {} ++ ++ void get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset); ++ void get_4_byte_integer_at_bcp(Register reg, int offset); ++ ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ void load_earlyret_value(TosState state); ++ ++#ifdef CC_INTERP ++ void save_bcp() { /* not needed in c++ interpreter and harmless */ } ++ void restore_bcp() { /* not needed in c++ interpreter and harmless */ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg); ++ ++#else ++ ++ // Interpreter-specific registers ++ void save_bcp() { ++ st_d(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); ++ } ++ ++ void restore_bcp() { ++ ld_d(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); ++ } ++ ++ void restore_locals() { ++ ld_d(LVP, FP, frame::interpreter_frame_locals_offset * wordSize); ++ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg) { ++ ld_d(reg, FP, frame::interpreter_frame_method_offset * wordSize); ++ } ++ ++ void get_const(Register reg){ ++ get_method(reg); ++ ld_d(reg, reg, in_bytes(Method::const_offset())); ++ } ++ ++ void get_constant_pool(Register reg) { ++ get_const(reg); ++ ld_d(reg, reg, in_bytes(ConstMethod::constants_offset())); ++ } ++ ++ void get_constant_pool_cache(Register reg) { ++ get_constant_pool(reg); ++ ld_d(reg, reg, ConstantPool::cache_offset_in_bytes()); ++ } ++ ++ void get_cpool_and_tags(Register cpool, Register tags) { ++ get_constant_pool(cpool); ++ ld_d(tags, cpool, ConstantPool::tags_offset_in_bytes()); ++ } ++ ++ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); ++ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_method_counters(Register method, Register mcs, Label& skip); ++ ++ // load cpool->resolved_references(index); ++ void load_resolved_reference_at_index(Register result, Register index, Register tmp); ++ ++ // load cpool->resolved_klass_at(index) ++ void load_resolved_klass_at_index(Register cpool, // the constant pool (corrupted on return) ++ Register index, // the constant pool index (corrupted on return) ++ Register klass); // contains the Klass on return ++ ++ void pop_ptr( Register r = FSR); ++ void pop_i( Register r = FSR); ++ void pop_l( Register r = FSR); ++ void pop_f(FloatRegister r = FSF); ++ void pop_d(FloatRegister r = FSF); ++ ++ void push_ptr( Register r = FSR); ++ void push_i( Register r = FSR); ++ void push_l( Register r = FSR); ++ void push_f(FloatRegister r = FSF); ++ void push_d(FloatRegister r = FSF); ++ ++ void pop(Register r ) { ((MacroAssembler*)this)->pop(r); } ++ ++ void push(Register r ) { ((MacroAssembler*)this)->push(r); } ++ ++ void pop(TosState state); // transition vtos -> state ++ void push(TosState state); // transition state -> vtos ++ ++ void empty_expression_stack() { ++ ld_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // NULL last_sp until next java call ++ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ } ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void load_ptr(int n, Register val); ++ void store_ptr(int n, Register val); ++ ++ // Generate a subtype check: branch to ok_is_subtype if sub_klass is ++ // a subtype of super_klass. ++ //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); ++ void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype ); ++ ++ // Dispatching ++ void dispatch_prolog(TosState state, int step = 0); ++ void dispatch_epilog(TosState state, int step = 0); ++ void dispatch_only(TosState state, bool generate_poll = false); ++ void dispatch_only_normal(TosState state); ++ void dispatch_only_noverify(TosState state); ++ void dispatch_next(TosState state, int step = 0, bool generate_poll = false); ++ void dispatch_via (TosState state, address* table); ++ ++ // jump to an invoked target ++ void prepare_to_jump_from_interpreted(); ++ void jump_from_interpreted(Register method, Register temp); ++ ++ ++ // Returning from interpreted functions ++ // ++ // Removes the current activation (incl. unlocking of monitors) ++ // and sets up the return address. This code is also used for ++ // exception unwindwing. In that case, we do not want to throw ++ // IllegalMonitorStateExceptions, since that might get us into an ++ // infinite rethrow exception loop. ++ // Additionally this code is used for popFrame and earlyReturn. ++ // In popFrame case we want to skip throwing an exception, ++ // installing an exception, and notifying jvmdi. ++ // In earlyReturn case we only want to skip throwing an exception ++ // and installing an exception. ++ void remove_activation(TosState state, Register ret_addr, ++ bool throw_monitor_exception = true, ++ bool install_monitor_exception = true, ++ bool notify_jvmdi = true); ++#endif // CC_INTERP ++ ++ // Object locking ++ void lock_object (Register lock_reg); ++ void unlock_object(Register lock_reg); ++ ++#ifndef CC_INTERP ++ ++ // Interpreter profiling operations ++ void set_method_data_pointer_for_bcp(); ++ void test_method_data_pointer(Register mdp, Label& zero_continue); ++ void verify_method_data_pointer(); ++ ++ void set_mdp_data_at(Register mdp_in, int constant, Register value); ++ void increment_mdp_data_at(Address data, bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, int constant, ++ bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, Register reg, int constant, ++ bool decrement = false); ++ void increment_mask_and_jump(Address counter_addr, ++ int increment, int mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where); ++ void set_mdp_flag_at(Register mdp_in, int flag_constant); ++ void test_mdp_data_at(Register mdp_in, int offset, Register value, ++ Register test_value_out, ++ Label& not_equal_continue); ++ ++ void record_klass_in_profile(Register receiver, Register mdp, ++ Register reg2, bool is_virtual_call); ++ void record_klass_in_profile_helper(Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call); ++ ++ void record_item_in_profile_helper(Register item, Register mdp, ++ Register reg2, int start_row, Label& done, int total_rows, ++ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, ++ int non_profiled_offset); ++ void update_mdp_by_offset(Register mdp_in, int offset_of_offset); ++ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); ++ void update_mdp_by_constant(Register mdp_in, int constant); ++ void update_mdp_for_ret(Register return_bci); ++ ++ void profile_taken_branch(Register mdp, Register bumped_count); ++ void profile_not_taken_branch(Register mdp); ++ void profile_call(Register mdp); ++ void profile_final_call(Register mdp); ++ void profile_virtual_call(Register receiver, Register mdp, ++ Register scratch2, ++ bool receiver_can_be_null = false); ++ void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN; ++ void profile_ret(Register return_bci, Register mdp); ++ void profile_null_seen(Register mdp); ++ void profile_typecheck(Register mdp, Register klass, Register scratch); ++ void profile_typecheck_failed(Register mdp); ++ void profile_switch_default(Register mdp); ++ void profile_switch_case(Register index_in_scratch, Register mdp, ++ Register scratch2); ++ ++ // Debugging ++ // only if +VerifyOops && state == atos ++ void verify_oop(Register reg, TosState state = atos); ++ // only if +VerifyFPU && (state == ftos || state == dtos) ++ void verify_FPU(int stack_depth, TosState state = ftos); ++ ++ void profile_obj_type(Register obj, const Address& mdo_addr); ++ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); ++ void profile_return_type(Register mdp, Register ret, Register tmp); ++ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); ++#endif // !CC_INTERP ++ ++ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; ++ ++ // support for jvmti/dtrace ++ void notify_method_entry(); ++ void notify_method_exit(TosState state, NotifyMethodExitMode mode); ++}; ++ ++#endif // CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,273 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "memory/universe.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/signature.hpp" ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// Implementation of SignatureHandlerGenerator ++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( ++ const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { ++ _masm = new MacroAssembler(buffer); ++ _num_int_args = (method->is_static() ? 1 : 0); ++ _num_fp_args = 0; ++ _stack_offset = 0; ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { ++ __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(from_offset)); ++ __ st_d(temp(), to(), to_offset * longSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) { ++ __ addi_d(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) ); ++ __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(from_offset) ); ++ ++ __ maskeqz(temp(), temp(), AT); ++ __ st_w(temp(), to(), to_offset * wordSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { ++ // generate code to handle arguments ++ iterate(fingerprint); ++ // return result handler ++ __ li(V0, AbstractInterpreter::result_handler(method()->result_type())); ++ // return ++ __ jr(RA); ++ ++ __ flush(); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_w(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ st_w(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_d(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ st_d(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ Register reg = as_Register(++_num_int_args + A0->encoding()); ++ if (_num_int_args == 1) { ++ assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); ++ __ addi_d(reg, from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ ld_d(reg, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ maskeqz(reg, AT, reg); ++ } ++ } else { ++ __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ maskeqz(temp(), AT, temp()); ++ __ st_d(temp(), to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ __ fld_s(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_w(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ st_w(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ __ fld_d(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_d(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ st_d(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++ ++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return LVP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::to() { return SP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return T8; } ++ ++// Implementation of SignatureHandlerLibrary ++ ++void SignatureHandlerLibrary::pd_set_handler(address handler) {} ++ ++ ++class SlowSignatureHandler ++ : public NativeSignatureIterator { ++ private: ++ address _from; ++ intptr_t* _to; ++ intptr_t* _int_args; ++ intptr_t* _fp_args; ++ intptr_t* _fp_identifiers; ++ unsigned int _num_int_args; ++ unsigned int _num_fp_args; ++ ++ virtual void pass_int() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_long() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2 * Interpreter::stackElementSize; ++ ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_object() ++ { ++ intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ _num_int_args++; ++ } else { ++ *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ } ++ } ++ ++ virtual void pass_float() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ *_fp_args++ = from_obj; ++ _num_fp_args++; ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_double() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2*Interpreter::stackElementSize; ++ ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ *_fp_args++ = from_obj; ++ *_fp_identifiers |= (1 << _num_fp_args); // mark as double ++ _num_fp_args++; ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ public: ++ SlowSignatureHandler(methodHandle method, address from, intptr_t* to) ++ : NativeSignatureIterator(method) ++ { ++ _from = from; ++ _to = to; ++ ++ // see TemplateInterpreterGenerator::generate_slow_signature_handler() ++ _int_args = to - (method->is_static() ? 15 : 16); ++ _fp_args = to - 8; ++ _fp_identifiers = to - 9; ++ *(int*) _fp_identifiers = 0; ++ _num_int_args = (method->is_static() ? 1 : 0); ++ _num_fp_args = 0; ++ } ++}; ++ ++ ++IRT_ENTRY(address, ++ InterpreterRuntime::slow_signature_handler(JavaThread* thread, ++ Method* method, ++ intptr_t* from, ++ intptr_t* to)) ++ methodHandle m(thread, (Method*)method); ++ assert(m->is_native(), "sanity check"); ++ ++ // handle arguments ++ SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1)); ++ ++ // return result handler ++ return Interpreter::result_handler(m->result_type()); ++IRT_END +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP ++#define CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP ++ ++// This is included in the middle of class Interpreter. ++// Do not include files here. ++ ++// native method calls ++ ++class SignatureHandlerGenerator: public NativeSignatureIterator { ++ private: ++ MacroAssembler* _masm; ++ unsigned int _num_fp_args; ++ unsigned int _num_int_args; ++ int _stack_offset; ++ ++ void move(int from_offset, int to_offset); ++ void box(int from_offset, int to_offset); ++ void pass_int(); ++ void pass_long(); ++ void pass_object(); ++ void pass_float(); ++ void pass_double(); ++ ++ public: ++ // Creation ++ SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); ++ ++ // Code generation ++ void generate(uint64_t fingerprint); ++ ++ // Code generation support ++ static Register from(); ++ static Register to(); ++ static Register temp(); ++}; ++ ++#endif // CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,87 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP ++#define CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP ++ ++private: ++ ++ // FP value associated with _last_Java_sp: ++ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to ++ ++public: ++ // Each arch must define reset, save, restore ++ // These are used by objects that only care about: ++ // 1 - initializing a new state (thread creation, javaCalls) ++ // 2 - saving a current state (javaCalls) ++ // 3 - restoring an old state (javaCalls) ++ ++ void clear(void) { ++ // clearing _last_Java_sp must be first ++ _last_Java_sp = NULL; ++ // fence? ++ _last_Java_fp = NULL; ++ _last_Java_pc = NULL; ++ } ++ ++ void copy(JavaFrameAnchor* src) { ++ // In order to make sure the transition state is valid for "this" ++ // We must clear _last_Java_sp before copying the rest of the new data ++ // ++ // Hack Alert: Temporary bugfix for 4717480/4721647 ++ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp ++ // unless the value is changing ++ // ++ if (_last_Java_sp != src->_last_Java_sp) ++ _last_Java_sp = NULL; ++ ++ _last_Java_fp = src->_last_Java_fp; ++ _last_Java_pc = src->_last_Java_pc; ++ // Must be last so profiler will always see valid frame if has_last_frame() is true ++ _last_Java_sp = src->_last_Java_sp; ++ } ++ ++ // Always walkable ++ bool walkable(void) { return true; } ++ // Never any thing to do since we are always walkable and can find address of return addresses ++ void make_walkable(JavaThread* thread) { } ++ ++ intptr_t* last_Java_sp(void) const { return _last_Java_sp; } ++ ++ address last_Java_pc(void) { return _last_Java_pc; } ++ ++private: ++ ++ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } ++ ++public: ++ ++ void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } ++ ++ intptr_t* last_Java_fp(void) { return _last_Java_fp; } ++ // Assert (last_Java_sp == NULL || fp == NULL) ++ void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } ++ ++#endif // CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,166 @@ ++/* ++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/codeBlob.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/safepoint.hpp" ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#define BUFFER_SIZE 30*wordSize ++ ++// Instead of issuing lfence for LoadLoad barrier, we create data dependency ++// between loads, which is more efficient than lfence. ++ ++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { ++ const char *name = NULL; ++ switch (type) { ++ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; ++ case T_BYTE: name = "jni_fast_GetByteField"; break; ++ case T_CHAR: name = "jni_fast_GetCharField"; break; ++ case T_SHORT: name = "jni_fast_GetShortField"; break; ++ case T_INT: name = "jni_fast_GetIntField"; break; ++ case T_LONG: name = "jni_fast_GetLongField"; break; ++ case T_FLOAT: name = "jni_fast_GetFloatField"; break; ++ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; ++ default: ShouldNotReachHere(); ++ } ++ ResourceMark rm; ++ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); ++ CodeBuffer cbuf(blob); ++ MacroAssembler* masm = new MacroAssembler(&cbuf); ++ address fast_entry = __ pc(); ++ ++ Label slow; ++ ++ // return pc RA ++ // jni env A0 ++ // obj A1 ++ // jfieldID A2 ++ ++ address counter_addr = SafepointSynchronize::safepoint_counter_addr(); ++ __ li(AT, (long)counter_addr); ++ __ ld_w(T1, AT, 0); ++ ++ // Parameters(A0~A3) should not be modified, since they will be used in slow path ++ __ andi(AT, T1, 1); ++ __ bne(AT, R0, slow); ++ ++ __ move(T0, A1); ++ // Both T0 and T4 are clobbered by try_resolve_jobject_in_native. ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->try_resolve_jobject_in_native(masm, /* jni_env */ A0, T0, T4, slow); ++ ++ __ srli_d(T2, A2, 2); // offset ++ __ add_d(T0, T0, T2); ++ ++ __ li(AT, (long)counter_addr); ++ __ ld_w(AT, AT, 0); ++ __ bne(T1, AT, slow); ++ ++ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); ++ speculative_load_pclist[count] = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ ld_bu (V0, T0, 0); break; ++ case T_BYTE: __ ld_b (V0, T0, 0); break; ++ case T_CHAR: __ ld_hu (V0, T0, 0); break; ++ case T_SHORT: __ ld_h (V0, T0, 0); break; ++ case T_INT: __ ld_w (V0, T0, 0); break; ++ case T_LONG: __ ld_d (V0, T0, 0); break; ++ case T_FLOAT: __ fld_s (F0, T0, 0); break; ++ case T_DOUBLE: __ fld_d (F0, T0, 0); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ jr(RA); ++ ++ slowcase_entry_pclist[count++] = __ pc(); ++ __ bind (slow); ++ address slow_case_addr = NULL; ++ switch (type) { ++ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; ++ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; ++ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; ++ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; ++ case T_INT: slow_case_addr = jni_GetIntField_addr(); break; ++ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; ++ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; ++ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; ++ default: ShouldNotReachHere(); ++ } ++ __ jmp(slow_case_addr); ++ ++ __ flush (); ++ ++ return fast_entry; ++} ++ ++address JNI_FastGetField::generate_fast_get_boolean_field() { ++ return generate_fast_get_int_field0(T_BOOLEAN); ++} ++ ++address JNI_FastGetField::generate_fast_get_byte_field() { ++ return generate_fast_get_int_field0(T_BYTE); ++} ++ ++address JNI_FastGetField::generate_fast_get_char_field() { ++ return generate_fast_get_int_field0(T_CHAR); ++} ++ ++address JNI_FastGetField::generate_fast_get_short_field() { ++ return generate_fast_get_int_field0(T_SHORT); ++} ++ ++address JNI_FastGetField::generate_fast_get_int_field() { ++ return generate_fast_get_int_field0(T_INT); ++} ++ ++address JNI_FastGetField::generate_fast_get_long_field() { ++ return generate_fast_get_int_field0(T_LONG); ++} ++ ++address JNI_FastGetField::generate_fast_get_float_field() { ++ return generate_fast_get_int_field0(T_FLOAT); ++} ++ ++address JNI_FastGetField::generate_fast_get_double_field() { ++ return generate_fast_get_int_field0(T_DOUBLE); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,144 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP ++#define CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP ++ ++#include "jni.h" ++#include "memory/allocation.hpp" ++#include "oops/oop.hpp" ++ ++// This file holds platform-dependent routines used to write primitive jni ++// types to the array of arguments passed into JavaCalls::call ++ ++class JNITypes : AllStatic { ++ // These functions write a java primitive type (in native format) ++ // to a java stack slot array to be passed as an argument to JavaCalls:calls. ++ // I.e., they are functionally 'push' operations if they have a 'pos' ++ // formal parameter. Note that jlong's and jdouble's are written ++ // _in reverse_ of the order in which they appear in the interpreter ++ // stack. This is because call stubs (see stubGenerator_sparc.cpp) ++ // reverse the argument list constructed by JavaCallArguments (see ++ // javaCalls.hpp). ++ ++private: ++ ++ // 32bit Helper routines. ++ static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; ++ *(jint *)(to ) = from[0]; } ++ static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } ++ ++public: ++ // In LOOGNARCH64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[] ++ // is 8 bytes. ++ // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values. ++ // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded. ++ // This error occurs in ReflectInvoke.java ++ // The parameter of DD(int) should be 4 instead of 0x550000004. ++ // ++ // See: [runtime/javaCalls.hpp] ++ ++ static inline void put_int(jint from, intptr_t *to) { *(intptr_t *)(to + 0 ) = from; } ++ static inline void put_int(jint from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = from; } ++ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; } ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_long(jlong from, intptr_t *to) { ++ *(jlong*) (to + 1) = from; ++ *(jlong*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_long(jlong from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = from; ++ *(jlong*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_long(jlong *from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = *from; ++ *(jlong*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // Oops are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } ++ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } ++ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } ++ ++ // Floats are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } ++ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } ++ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } ++ ++#undef _JNI_SLOT_OFFSET ++#define _JNI_SLOT_OFFSET 0 ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_double(jdouble from, intptr_t *to) { ++ *(jdouble*) (to + 1) = from; ++ *(jdouble*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_double(jdouble from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = from; ++ *(jdouble*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = *from; ++ *(jdouble*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // The get_xxx routines, on the other hand, actually _do_ fetch ++ // java primitive types from the interpreter stack. ++ static inline jint get_int (intptr_t *from) { return *(jint *) from; } ++ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } ++ static inline oop get_obj (intptr_t *from) { return *(oop *) from; } ++ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } ++ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } ++#undef _JNI_SLOT_OFFSET ++}; ++ ++#endif // CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,199 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "jvmci/jvmciCodeInstaller.hpp" ++#include "jvmci/jvmciRuntime.hpp" ++#include "jvmci/jvmciCompilerToVM.hpp" ++#include "jvmci/jvmciJavaClasses.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) { ++ if (inst->is_int_branch() || inst->is_float_branch()) { ++ return pc_offset + NativeInstruction::nop_instruction_size; ++ } else if (inst->is_call()) { ++ return pc_offset + NativeCall::instruction_size; ++ } else if (inst->is_far_call()) { ++ return pc_offset + NativeFarCall::instruction_size; ++ } else if (inst->is_jump()) { ++ return pc_offset + NativeGeneralJump::instruction_size; ++ } else if (inst->is_lu12iw_lu32id()) { ++ // match LoongArch64TestAssembler.java emitCall ++ // lu12i_w; lu32i_d; jirl ++ return pc_offset + 3 * NativeInstruction::nop_instruction_size; ++ } else { ++ JVMCI_ERROR_0("unsupported type of instruction for call site"); ++ } ++ return 0; ++} ++ ++void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) { ++ address pc = _instructions->start() + pc_offset; ++ Handle obj(THREAD, HotSpotObjectConstantImpl::object(constant)); ++ jobject value = JNIHandles::make_local(obj()); ++ if (HotSpotObjectConstantImpl::compressed(constant)) { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ move->set_data((intptr_t)(CompressedOops::encode(cast_to_oop(cast_from_oop
(obj()))))); ++ int oop_index = _oop_recorder->find_index(value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ _instructions->relocate(pc, rspec, Assembler::narrow_oop_operand); ++ } else { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ move->set_data((intptr_t)(cast_from_oop
(obj()))); ++ int oop_index = _oop_recorder->find_index(value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ _instructions->relocate(pc, rspec); ++ } ++} ++ ++void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) { ++ address pc = _instructions->start() + pc_offset; ++ if (HotSpotMetaspaceConstantImpl::compressed(constant)) { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ narrowKlass narrowOop = record_narrow_metadata_reference(_instructions, pc, constant, CHECK); ++ move->set_data((intptr_t) narrowOop); ++ TRACE_jvmci_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/0x%x", p2i(pc), narrowOop); ++ } else { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ void* reference = record_metadata_reference(_instructions, pc, constant, CHECK); ++ move->set_data((intptr_t) reference); ++ TRACE_jvmci_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(reference)); ++ } ++} ++ ++void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) { ++ address pc = _instructions->start() + pc_offset; ++ NativeInstruction* inst = nativeInstruction_at(pc); ++ if (inst->is_pcaddu12i_add()) { ++ address dest = _constants->start() + data_offset; ++ _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS)); ++ TRACE_jvmci_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset); ++ } else { ++ JVMCI_ERROR("unknown load or move instruction at " PTR_FORMAT, p2i(pc)); ++ } ++} ++ ++void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) { ++ address pc = (address) inst; ++ if (inst->is_call()) { ++ NativeCall* call = nativeCall_at(pc); ++ call->set_destination((address) foreign_call_destination); ++ _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec()); ++ } else if (inst->is_far_call()) { ++ NativeFarCall* call = nativeFarCall_at(pc); ++ call->set_destination((address) foreign_call_destination); ++ _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec()); ++ } else if (inst->is_jump()) { ++ NativeGeneralJump* jump = nativeGeneralJump_at(pc); ++ jump->set_jump_destination((address) foreign_call_destination); ++ _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec()); ++ } else if (inst->is_lu12iw_lu32id()) { ++ // match emitCall of LoongArch64TestAssembler.java ++ // lu12i_w; lu32i_d; jirl ++ MacroAssembler::pd_patch_instruction((address)inst, (address)foreign_call_destination); ++ } else { ++ JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc)); ++ } ++ TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst)); ++} ++ ++void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &cbuf, Handle hotspot_method, jint pc_offset, TRAPS) { ++#ifdef ASSERT ++ Method* method = NULL; ++ // we need to check, this might also be an unresolved method ++ if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) { ++ method = getMethodFromHotSpotMethod(hotspot_method()); ++ } ++#endif ++ switch (_next_call_type) { ++ case INLINE_INVOKE: ++ break; ++ case INVOKEVIRTUAL: ++ case INVOKEINTERFACE: { ++ assert(!method->is_static(), "cannot call static method with invokeinterface"); ++ NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); ++ _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc)); ++ call->trampoline_jump(cbuf, SharedRuntime::get_resolve_virtual_call_stub()); ++ break; ++ } ++ case INVOKESTATIC: { ++ assert(method->is_static(), "cannot call non-static method with invokestatic"); ++ NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); ++ _instructions->relocate(call->instruction_address(), relocInfo::static_call_type); ++ call->trampoline_jump(cbuf, SharedRuntime::get_resolve_static_call_stub()); ++ break; ++ } ++ case INVOKESPECIAL: { ++ assert(!method->is_static(), "cannot call static method with invokespecial"); ++ NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); ++ _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type); ++ call->trampoline_jump(cbuf, SharedRuntime::get_resolve_opt_virtual_call_stub()); ++ break; ++ } ++ default: ++ JVMCI_ERROR("invalid _next_call_type value"); ++ break; ++ } ++} ++ ++void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) { ++ switch (mark) { ++ case POLL_NEAR: ++ JVMCI_ERROR("unimplemented"); ++ break; ++ case POLL_FAR: ++ _instructions->relocate(pc, relocInfo::poll_type); ++ break; ++ case POLL_RETURN_NEAR: ++ JVMCI_ERROR("unimplemented"); ++ break; ++ case POLL_RETURN_FAR: ++ _instructions->relocate(pc, relocInfo::poll_return_type); ++ break; ++ default: ++ JVMCI_ERROR("invalid mark value"); ++ break; ++ } ++} ++ ++// convert JVMCI register indices (as used in oop maps) to HotSpot registers ++VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) { ++ if (jvmci_reg < RegisterImpl::number_of_registers) { ++ return as_Register(jvmci_reg)->as_VMReg(); ++ } else { ++ jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers; ++ if (floatRegisterNumber >= 0 && floatRegisterNumber < FloatRegisterImpl::number_of_registers) { ++ return as_FloatRegister(floatRegisterNumber)->as_VMReg(); ++ } ++ JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg); ++ } ++} ++ ++bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) { ++ return !hotspotRegister->is_FloatRegister(); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad +--- a/src/hotspot/cpu/loongarch/loongarch_64.ad 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/loongarch_64.ad 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,13917 @@ ++// ++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// GodSon3 Architecture Description File ++ ++//----------REGISTER DEFINITION BLOCK------------------------------------------ ++// This information is used by the matcher and the register allocator to ++// describe individual registers and classes of registers within the target ++// archtecture. ++ ++// format: ++// reg_def name (call convention, c-call convention, ideal type, encoding); ++// call convention : ++// NS = No-Save ++// SOC = Save-On-Call ++// SOE = Save-On-Entry ++// AS = Always-Save ++// ideal type : ++// see opto/opcodes.hpp for more info ++// reg_class name (reg, ...); ++// alloc_class name (reg, ...); ++register %{ ++ ++// General Registers ++// Integer Registers ++ reg_def R0 ( NS, NS, Op_RegI, 0, R0->as_VMReg()); ++ reg_def R0_H ( NS, NS, Op_RegI, 0, R0->as_VMReg()->next()); ++ reg_def RA ( NS, NS, Op_RegI, 1, RA->as_VMReg()); ++ reg_def RA_H ( NS, NS, Op_RegI, 1, RA->as_VMReg()->next()); ++ reg_def TP ( NS, NS, Op_RegI, 2, TP->as_VMReg()); ++ reg_def TP_H ( NS, NS, Op_RegI, 2, TP->as_VMReg()->next()); ++ reg_def SP ( NS, NS, Op_RegI, 3, SP->as_VMReg()); ++ reg_def SP_H ( NS, NS, Op_RegI, 3, SP->as_VMReg()->next()); ++ reg_def A0 (SOC, SOC, Op_RegI, 4, A0->as_VMReg()); ++ reg_def A0_H (SOC, SOC, Op_RegI, 4, A0->as_VMReg()->next()); ++ reg_def A1 (SOC, SOC, Op_RegI, 5, A1->as_VMReg()); ++ reg_def A1_H (SOC, SOC, Op_RegI, 5, A1->as_VMReg()->next()); ++ reg_def A2 (SOC, SOC, Op_RegI, 6, A2->as_VMReg()); ++ reg_def A2_H (SOC, SOC, Op_RegI, 6, A2->as_VMReg()->next()); ++ reg_def A3 (SOC, SOC, Op_RegI, 7, A3->as_VMReg()); ++ reg_def A3_H (SOC, SOC, Op_RegI, 7, A3->as_VMReg()->next()); ++ reg_def A4 (SOC, SOC, Op_RegI, 8, A4->as_VMReg()); ++ reg_def A4_H (SOC, SOC, Op_RegI, 8, A4->as_VMReg()->next()); ++ reg_def A5 (SOC, SOC, Op_RegI, 9, A5->as_VMReg()); ++ reg_def A5_H (SOC, SOC, Op_RegI, 9, A5->as_VMReg()->next()); ++ reg_def A6 (SOC, SOC, Op_RegI, 10, A6->as_VMReg()); ++ reg_def A6_H (SOC, SOC, Op_RegI, 10, A6->as_VMReg()->next()); ++ reg_def A7 (SOC, SOC, Op_RegI, 11, A7->as_VMReg()); ++ reg_def A7_H (SOC, SOC, Op_RegI, 11, A7->as_VMReg()->next()); ++ reg_def T0 (SOC, SOC, Op_RegI, 12, T0->as_VMReg()); ++ reg_def T0_H (SOC, SOC, Op_RegI, 12, T0->as_VMReg()->next()); ++ reg_def T1 (SOC, SOC, Op_RegI, 13, T1->as_VMReg()); ++ reg_def T1_H (SOC, SOC, Op_RegI, 13, T1->as_VMReg()->next()); ++ reg_def T2 (SOC, SOC, Op_RegI, 14, T2->as_VMReg()); ++ reg_def T2_H (SOC, SOC, Op_RegI, 14, T2->as_VMReg()->next()); ++ reg_def T3 (SOC, SOC, Op_RegI, 15, T3->as_VMReg()); ++ reg_def T3_H (SOC, SOC, Op_RegI, 15, T3->as_VMReg()->next()); ++ reg_def T4 (SOC, SOC, Op_RegI, 16, T4->as_VMReg()); ++ reg_def T4_H (SOC, SOC, Op_RegI, 16, T4->as_VMReg()->next()); ++ reg_def T5 (SOC, SOC, Op_RegI, 17, T5->as_VMReg()); ++ reg_def T5_H (SOC, SOC, Op_RegI, 17, T5->as_VMReg()->next()); ++ reg_def T6 (SOC, SOC, Op_RegI, 18, T6->as_VMReg()); ++ reg_def T6_H (SOC, SOC, Op_RegI, 18, T6->as_VMReg()->next()); ++ reg_def T7 (SOC, SOC, Op_RegI, 19, T7->as_VMReg()); ++ reg_def T7_H (SOC, SOC, Op_RegI, 19, T7->as_VMReg()->next()); ++ reg_def T8 (SOC, SOC, Op_RegI, 20, T8->as_VMReg()); ++ reg_def T8_H (SOC, SOC, Op_RegI, 20, T8->as_VMReg()->next()); ++ reg_def RX ( NS, NS, Op_RegI, 21, RX->as_VMReg()); ++ reg_def RX_H ( NS, NS, Op_RegI, 21, RX->as_VMReg()->next()); ++ reg_def FP ( NS, NS, Op_RegI, 22, FP->as_VMReg()); ++ reg_def FP_H ( NS, NS, Op_RegI, 22, FP->as_VMReg()->next()); ++ reg_def S0 (SOC, SOE, Op_RegI, 23, S0->as_VMReg()); ++ reg_def S0_H (SOC, SOE, Op_RegI, 23, S0->as_VMReg()->next()); ++ reg_def S1 (SOC, SOE, Op_RegI, 24, S1->as_VMReg()); ++ reg_def S1_H (SOC, SOE, Op_RegI, 24, S1->as_VMReg()->next()); ++ reg_def S2 (SOC, SOE, Op_RegI, 25, S2->as_VMReg()); ++ reg_def S2_H (SOC, SOE, Op_RegI, 25, S2->as_VMReg()->next()); ++ reg_def S3 (SOC, SOE, Op_RegI, 26, S3->as_VMReg()); ++ reg_def S3_H (SOC, SOE, Op_RegI, 26, S3->as_VMReg()->next()); ++ reg_def S4 (SOC, SOE, Op_RegI, 27, S4->as_VMReg()); ++ reg_def S4_H (SOC, SOE, Op_RegI, 27, S4->as_VMReg()->next()); ++ reg_def S5 (SOC, SOE, Op_RegI, 28, S5->as_VMReg()); ++ reg_def S5_H (SOC, SOE, Op_RegI, 28, S5->as_VMReg()->next()); ++ reg_def S6 (SOC, SOE, Op_RegI, 29, S6->as_VMReg()); ++ reg_def S6_H (SOC, SOE, Op_RegI, 29, S6->as_VMReg()->next()); ++ reg_def S7 (SOC, SOE, Op_RegI, 30, S7->as_VMReg()); ++ reg_def S7_H (SOC, SOE, Op_RegI, 30, S7->as_VMReg()->next()); ++ reg_def S8 (SOC, SOE, Op_RegI, 31, S8->as_VMReg()); ++ reg_def S8_H (SOC, SOE, Op_RegI, 31, S8->as_VMReg()->next()); ++ ++ ++// Floating/Vector registers. ++ reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg() ); ++ reg_def F0_H ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next() ); ++ reg_def F0_J ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(2) ); ++ reg_def F0_K ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(3) ); ++ reg_def F0_L ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(4) ); ++ reg_def F0_M ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(5) ); ++ reg_def F0_N ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(6) ); ++ reg_def F0_O ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(7) ); ++ ++ reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg() ); ++ reg_def F1_H ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next() ); ++ reg_def F1_J ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(2) ); ++ reg_def F1_K ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(3) ); ++ reg_def F1_L ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(4) ); ++ reg_def F1_M ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(5) ); ++ reg_def F1_N ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(6) ); ++ reg_def F1_O ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(7) ); ++ ++ reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg() ); ++ reg_def F2_H ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next() ); ++ reg_def F2_J ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(2) ); ++ reg_def F2_K ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(3) ); ++ reg_def F2_L ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(4) ); ++ reg_def F2_M ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(5) ); ++ reg_def F2_N ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(6) ); ++ reg_def F2_O ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(7) ); ++ ++ reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg() ); ++ reg_def F3_H ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next() ); ++ reg_def F3_J ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(2) ); ++ reg_def F3_K ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(3) ); ++ reg_def F3_L ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(4) ); ++ reg_def F3_M ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(5) ); ++ reg_def F3_N ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(6) ); ++ reg_def F3_O ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(7) ); ++ ++ reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg() ); ++ reg_def F4_H ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next() ); ++ reg_def F4_J ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(2) ); ++ reg_def F4_K ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(3) ); ++ reg_def F4_L ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(4) ); ++ reg_def F4_M ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(5) ); ++ reg_def F4_N ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(6) ); ++ reg_def F4_O ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(7) ); ++ ++ reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg() ); ++ reg_def F5_H ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next() ); ++ reg_def F5_J ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(2) ); ++ reg_def F5_K ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(3) ); ++ reg_def F5_L ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(4) ); ++ reg_def F5_M ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(5) ); ++ reg_def F5_N ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(6) ); ++ reg_def F5_O ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(7) ); ++ ++ reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg() ); ++ reg_def F6_H ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next() ); ++ reg_def F6_J ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(2) ); ++ reg_def F6_K ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(3) ); ++ reg_def F6_L ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(4) ); ++ reg_def F6_M ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(5) ); ++ reg_def F6_N ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(6) ); ++ reg_def F6_O ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(7) ); ++ ++ reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg() ); ++ reg_def F7_H ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next() ); ++ reg_def F7_J ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(2) ); ++ reg_def F7_K ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(3) ); ++ reg_def F7_L ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(4) ); ++ reg_def F7_M ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(5) ); ++ reg_def F7_N ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(6) ); ++ reg_def F7_O ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(7) ); ++ ++ reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg() ); ++ reg_def F8_H ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next() ); ++ reg_def F8_J ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(2) ); ++ reg_def F8_K ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(3) ); ++ reg_def F8_L ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(4) ); ++ reg_def F8_M ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(5) ); ++ reg_def F8_N ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(6) ); ++ reg_def F8_O ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(7) ); ++ ++ reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg() ); ++ reg_def F9_H ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next() ); ++ reg_def F9_J ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(2) ); ++ reg_def F9_K ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(3) ); ++ reg_def F9_L ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(4) ); ++ reg_def F9_M ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(5) ); ++ reg_def F9_N ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(6) ); ++ reg_def F9_O ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(7) ); ++ ++ reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg() ); ++ reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next() ); ++ reg_def F10_J ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(2) ); ++ reg_def F10_K ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(3) ); ++ reg_def F10_L ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(4) ); ++ reg_def F10_M ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(5) ); ++ reg_def F10_N ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(6) ); ++ reg_def F10_O ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(7) ); ++ ++ reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg() ); ++ reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next() ); ++ reg_def F11_J ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(2) ); ++ reg_def F11_K ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(3) ); ++ reg_def F11_L ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(4) ); ++ reg_def F11_M ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(5) ); ++ reg_def F11_N ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(6) ); ++ reg_def F11_O ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(7) ); ++ ++ reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg() ); ++ reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next() ); ++ reg_def F12_J ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(2) ); ++ reg_def F12_K ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(3) ); ++ reg_def F12_L ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(4) ); ++ reg_def F12_M ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(5) ); ++ reg_def F12_N ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(6) ); ++ reg_def F12_O ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(7) ); ++ ++ reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg() ); ++ reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next() ); ++ reg_def F13_J ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(2) ); ++ reg_def F13_K ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(3) ); ++ reg_def F13_L ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(4) ); ++ reg_def F13_M ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(5) ); ++ reg_def F13_N ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(6) ); ++ reg_def F13_O ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(7) ); ++ ++ reg_def F14 ( SOC, SOC, Op_RegF, 14, F14->as_VMReg() ); ++ reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next() ); ++ reg_def F14_J ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(2) ); ++ reg_def F14_K ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(3) ); ++ reg_def F14_L ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(4) ); ++ reg_def F14_M ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(5) ); ++ reg_def F14_N ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(6) ); ++ reg_def F14_O ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(7) ); ++ ++ reg_def F15 ( SOC, SOC, Op_RegF, 15, F15->as_VMReg() ); ++ reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next() ); ++ reg_def F15_J ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(2) ); ++ reg_def F15_K ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(3) ); ++ reg_def F15_L ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(4) ); ++ reg_def F15_M ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(5) ); ++ reg_def F15_N ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(6) ); ++ reg_def F15_O ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(7) ); ++ ++ reg_def F16 ( SOC, SOC, Op_RegF, 16, F16->as_VMReg() ); ++ reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next() ); ++ reg_def F16_J ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(2) ); ++ reg_def F16_K ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(3) ); ++ reg_def F16_L ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(4) ); ++ reg_def F16_M ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(5) ); ++ reg_def F16_N ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(6) ); ++ reg_def F16_O ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(7) ); ++ ++ reg_def F17 ( SOC, SOC, Op_RegF, 17, F17->as_VMReg() ); ++ reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next() ); ++ reg_def F17_J ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(2) ); ++ reg_def F17_K ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(3) ); ++ reg_def F17_L ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(4) ); ++ reg_def F17_M ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(5) ); ++ reg_def F17_N ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(6) ); ++ reg_def F17_O ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(7) ); ++ ++ reg_def F18 ( SOC, SOC, Op_RegF, 18, F18->as_VMReg() ); ++ reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next() ); ++ reg_def F18_J ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(2) ); ++ reg_def F18_K ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(3) ); ++ reg_def F18_L ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(4) ); ++ reg_def F18_M ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(5) ); ++ reg_def F18_N ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(6) ); ++ reg_def F18_O ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(7) ); ++ ++ reg_def F19 ( SOC, SOC, Op_RegF, 19, F19->as_VMReg() ); ++ reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next() ); ++ reg_def F19_J ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(2) ); ++ reg_def F19_K ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(3) ); ++ reg_def F19_L ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(4) ); ++ reg_def F19_M ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(5) ); ++ reg_def F19_N ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(6) ); ++ reg_def F19_O ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(7) ); ++ ++ reg_def F20 ( SOC, SOC, Op_RegF, 20, F20->as_VMReg() ); ++ reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next() ); ++ reg_def F20_J ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(2) ); ++ reg_def F20_K ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(3) ); ++ reg_def F20_L ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(4) ); ++ reg_def F20_M ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(5) ); ++ reg_def F20_N ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(6) ); ++ reg_def F20_O ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(7) ); ++ ++ reg_def F21 ( SOC, SOC, Op_RegF, 21, F21->as_VMReg() ); ++ reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next() ); ++ reg_def F21_J ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(2) ); ++ reg_def F21_K ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(3) ); ++ reg_def F21_L ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(4) ); ++ reg_def F21_M ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(5) ); ++ reg_def F21_N ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(6) ); ++ reg_def F21_O ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(7) ); ++ ++ reg_def F22 ( SOC, SOC, Op_RegF, 22, F22->as_VMReg() ); ++ reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next() ); ++ reg_def F22_J ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(2) ); ++ reg_def F22_K ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(3) ); ++ reg_def F22_L ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(4) ); ++ reg_def F22_M ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(5) ); ++ reg_def F22_N ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(6) ); ++ reg_def F22_O ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(7) ); ++ ++ reg_def F23 ( SOC, SOC, Op_RegF, 23, F23->as_VMReg() ); ++ reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next() ); ++ reg_def F23_J ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(2) ); ++ reg_def F23_K ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(3) ); ++ reg_def F23_L ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(4) ); ++ reg_def F23_M ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(5) ); ++ reg_def F23_N ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(6) ); ++ reg_def F23_O ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(7) ); ++ ++ reg_def F24 ( SOC, SOE, Op_RegF, 24, F24->as_VMReg() ); ++ reg_def F24_H ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()->next() ); ++ reg_def F24_J ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(2) ); ++ reg_def F24_K ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(3) ); ++ reg_def F24_L ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(4) ); ++ reg_def F24_M ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(5) ); ++ reg_def F24_N ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(6) ); ++ reg_def F24_O ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(7) ); ++ ++ reg_def F25 ( SOC, SOE, Op_RegF, 25, F25->as_VMReg() ); ++ reg_def F25_H ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()->next() ); ++ reg_def F25_J ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(2) ); ++ reg_def F25_K ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(3) ); ++ reg_def F25_L ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(4) ); ++ reg_def F25_M ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(5) ); ++ reg_def F25_N ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(6) ); ++ reg_def F25_O ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(7) ); ++ ++ reg_def F26 ( SOC, SOE, Op_RegF, 26, F26->as_VMReg() ); ++ reg_def F26_H ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()->next() ); ++ reg_def F26_J ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(2) ); ++ reg_def F26_K ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(3) ); ++ reg_def F26_L ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(4) ); ++ reg_def F26_M ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(5) ); ++ reg_def F26_N ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(6) ); ++ reg_def F26_O ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(7) ); ++ ++ reg_def F27 ( SOC, SOE, Op_RegF, 27, F27->as_VMReg() ); ++ reg_def F27_H ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()->next() ); ++ reg_def F27_J ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(2) ); ++ reg_def F27_K ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(3) ); ++ reg_def F27_L ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(4) ); ++ reg_def F27_M ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(5) ); ++ reg_def F27_N ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(6) ); ++ reg_def F27_O ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(7) ); ++ ++ reg_def F28 ( SOC, SOE, Op_RegF, 28, F28->as_VMReg() ); ++ reg_def F28_H ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()->next() ); ++ reg_def F28_J ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(2) ); ++ reg_def F28_K ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(3) ); ++ reg_def F28_L ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(4) ); ++ reg_def F28_M ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(5) ); ++ reg_def F28_N ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(6) ); ++ reg_def F28_O ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(7) ); ++ ++ reg_def F29 ( SOC, SOE, Op_RegF, 29, F29->as_VMReg() ); ++ reg_def F29_H ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()->next() ); ++ reg_def F29_J ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(2) ); ++ reg_def F29_K ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(3) ); ++ reg_def F29_L ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(4) ); ++ reg_def F29_M ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(5) ); ++ reg_def F29_N ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(6) ); ++ reg_def F29_O ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(7) ); ++ ++ reg_def F30 ( SOC, SOE, Op_RegF, 30, F30->as_VMReg() ); ++ reg_def F30_H ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()->next() ); ++ reg_def F30_J ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(2) ); ++ reg_def F30_K ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(3) ); ++ reg_def F30_L ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(4) ); ++ reg_def F30_M ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(5) ); ++ reg_def F30_N ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(6) ); ++ reg_def F30_O ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(7) ); ++ ++ reg_def F31 ( SOC, SOE, Op_RegF, 31, F31->as_VMReg() ); ++ reg_def F31_H ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()->next() ); ++ reg_def F31_J ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(2) ); ++ reg_def F31_K ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(3) ); ++ reg_def F31_L ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(4) ); ++ reg_def F31_M ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(5) ); ++ reg_def F31_N ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(6) ); ++ reg_def F31_O ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(7) ); ++ ++ ++// ---------------------------- ++// Special Registers ++//S6 is used for get_thread(S6) ++//S5 is uesd for heapbase of compressed oop ++alloc_class chunk0( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S5, S5_H, ++ S6, S6_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T8, T8_H, ++ T4, T4_H, ++ T1, T1_H, // inline_cache_reg ++ T6, T6_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ T5, T5_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H, ++ S8, S8_H ++ RA, RA_H, ++ SP, SP_H, // stack_pointer ++ FP, FP_H, // frame_pointer ++ ++ // non-allocatable registers ++ T7, T7_H, ++ TP, TP_H, ++ RX, RX_H, ++ R0, R0_H, ++ ); ++ ++// F23 is scratch reg ++alloc_class chunk1( F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O, ++ F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O, ++ F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O, ++ F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O, ++ F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O, ++ F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O, ++ F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O, ++ F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O, ++ F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O, ++ F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O, ++ F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O, ++ F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O, ++ F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O, ++ F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O, ++ F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O, ++ F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O, ++ F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O, ++ F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O, ++ F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O, ++ F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O, ++ F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O, ++ F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O, ++ F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O, ++ F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O, ++ F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O, ++ F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O, ++ F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O, ++ F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O, ++ F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O, ++ F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O, ++ F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O, ++ ++ // non-allocatable registers ++ F23, F23_H, F23_J, F23_K, F23_L, F23_M, F23_N, F23_O, ++ ); ++ ++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 ); ++reg_class s0_reg( S0 ); ++reg_class s1_reg( S1 ); ++reg_class s2_reg( S2 ); ++reg_class s3_reg( S3 ); ++reg_class s4_reg( S4 ); ++reg_class s5_reg( S5 ); ++reg_class s6_reg( S6 ); ++reg_class s7_reg( S7 ); ++ ++reg_class t_reg( T0, T1, T2, T3, T8, T4 ); ++reg_class t0_reg( T0 ); ++reg_class t1_reg( T1 ); ++reg_class t2_reg( T2 ); ++reg_class t3_reg( T3 ); ++reg_class t8_reg( T8 ); ++reg_class t4_reg( T4 ); ++ ++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 ); ++reg_class a0_reg( A0 ); ++reg_class a1_reg( A1 ); ++reg_class a2_reg( A2 ); ++reg_class a3_reg( A3 ); ++reg_class a4_reg( A4 ); ++reg_class a5_reg( A5 ); ++reg_class a6_reg( A6 ); ++reg_class a7_reg( A7 ); ++ ++// TODO: LA ++//reg_class v0_reg( A0 ); ++//reg_class v1_reg( A1 ); ++ ++reg_class sp_reg( SP, SP_H ); ++reg_class fp_reg( FP, FP_H ); ++ ++reg_class v0_long_reg( A0, A0_H ); ++reg_class v1_long_reg( A1, A1_H ); ++reg_class a0_long_reg( A0, A0_H ); ++reg_class a1_long_reg( A1, A1_H ); ++reg_class a2_long_reg( A2, A2_H ); ++reg_class a3_long_reg( A3, A3_H ); ++reg_class a4_long_reg( A4, A4_H ); ++reg_class a5_long_reg( A5, A5_H ); ++reg_class a6_long_reg( A6, A6_H ); ++reg_class a7_long_reg( A7, A7_H ); ++reg_class t0_long_reg( T0, T0_H ); ++reg_class t1_long_reg( T1, T1_H ); ++reg_class t2_long_reg( T2, T2_H ); ++reg_class t3_long_reg( T3, T3_H ); ++reg_class t8_long_reg( T8, T8_H ); ++reg_class t4_long_reg( T4, T4_H ); ++reg_class s0_long_reg( S0, S0_H ); ++reg_class s1_long_reg( S1, S1_H ); ++reg_class s2_long_reg( S2, S2_H ); ++reg_class s3_long_reg( S3, S3_H ); ++reg_class s4_long_reg( S4, S4_H ); ++reg_class s5_long_reg( S5, S5_H ); ++reg_class s6_long_reg( S6, S6_H ); ++reg_class s7_long_reg( S7, S7_H ); ++ ++//reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, A7, A6, A5, A4, T5, A3, A2, A1, A0, T0 ); ++ ++reg_class all_reg32( ++ S8, ++ S7, ++ S5, /* S5_heapbase */ ++ /* S6, S6 TREG */ ++ S4, ++ S3, ++ S2, ++ S1, ++ S0, ++ T8, ++ /* T7, AT */ ++ T6, ++ T5, ++ /* T4, jarl T4 */ ++ T3, ++ T2, ++ T1, ++ T0, ++ A7, ++ A6, ++ A5, ++ A4, ++ A3, ++ A2, ++ A1, ++ A0 ); ++ ++reg_class int_reg %{ ++ return _ANY_REG32_mask; ++%} ++ ++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, T5, T0 ); ++ ++reg_class p_reg %{ ++ return _PTR_REG_mask; ++%} ++ ++reg_class no_T8_p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++reg_class no_Ax_p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ T0, T0_H ++ ); ++ ++reg_class all_reg( ++ S8, S8_H, ++ S7, S7_H, ++ /* S6, S6_H, S6 TREG */ ++ S5, S5_H, /* S5_heapbase */ ++ S4, S4_H, ++ S3, S3_H, ++ S2, S2_H, ++ S1, S1_H, ++ S0, S0_H, ++ T8, T8_H, ++ /* T7, T7_H, AT */ ++ T6, T6_H, ++ T5, T5_H, ++ /* T4, T4_H, jalr T4 */ ++ T3, T3_H, ++ T2, T2_H, ++ T1, T1_H, ++ T0, T0_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H ++ ); ++ ++ ++reg_class long_reg %{ ++ return _ANY_REG_mask; ++%} ++ ++// Floating point registers. ++// F31 are not used as temporary registers in D2I ++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F24, F25, F26, F27, F28, F29, F30, F31); ++ ++reg_class dbl_reg( F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F12, F12_H, ++ F13, F13_H, ++ F14, F14_H, ++ F15, F15_H, ++ F16, F16_H, ++ F17, F17_H, ++ F18, F18_H, ++ F19, F19_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F29, F29_H, ++ F30, F30_H, ++ F31, F31_H); ++ ++// Class for all 128bit vector registers ++reg_class vectorx_reg( F0, F0_H, F0_J, F0_K, ++ F1, F1_H, F1_J, F1_K, ++ F2, F2_H, F2_J, F2_K, ++ F3, F3_H, F3_J, F3_K, ++ F4, F4_H, F4_J, F4_K, ++ F5, F5_H, F5_J, F5_K, ++ F6, F6_H, F6_J, F6_K, ++ F7, F7_H, F7_J, F7_K, ++ F8, F8_H, F8_J, F8_K, ++ F9, F9_H, F9_J, F9_K, ++ F10, F10_H, F10_J, F10_K, ++ F11, F11_H, F11_J, F11_K, ++ F12, F12_H, F12_J, F12_K, ++ F13, F13_H, F13_J, F13_K, ++ F14, F14_H, F14_J, F14_K, ++ F15, F15_H, F15_J, F15_K, ++ F16, F16_H, F16_J, F16_K, ++ F17, F17_H, F17_J, F17_K, ++ F18, F18_H, F18_J, F18_K, ++ F19, F19_H, F19_J, F19_K, ++ F20, F20_H, F20_J, F20_K, ++ F21, F21_H, F21_J, F21_K, ++ F22, F22_H, F22_J, F22_K, ++ F24, F24_H, F24_J, F24_K, ++ F25, F25_H, F25_J, F25_K, ++ F26, F26_H, F26_J, F26_K, ++ F27, F27_H, F27_J, F27_K, ++ F28, F28_H, F28_J, F28_K, ++ F29, F29_H, F29_J, F29_K, ++ F30, F30_H, F30_J, F30_K, ++ F31, F31_H, F31_J, F31_K); ++ ++// Class for all 256bit vector registers ++reg_class vectory_reg( F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O, ++ F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O, ++ F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O, ++ F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O, ++ F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O, ++ F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O, ++ F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O, ++ F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O, ++ F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O, ++ F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O, ++ F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O, ++ F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O, ++ F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O, ++ F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O, ++ F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O, ++ F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O, ++ F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O, ++ F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O, ++ F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O, ++ F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O, ++ F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O, ++ F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O, ++ F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O, ++ F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O, ++ F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O, ++ F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O, ++ F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O, ++ F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O, ++ F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O, ++ F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O, ++ F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O); ++ ++// TODO: LA ++//reg_class flt_arg0( F0 ); ++//reg_class dbl_arg0( F0, F0_H ); ++//reg_class dbl_arg1( F1, F1_H ); ++ ++%} ++ ++//----------DEFINITION BLOCK--------------------------------------------------- ++// Define name --> value mappings to inform the ADLC of an integer valued name ++// Current support includes integer values in the range [0, 0x7FFFFFFF] ++// Format: ++// int_def ( , ); ++// Generated Code in ad_.hpp ++// #define () ++// // value == ++// Generated code in ad_.cpp adlc_verification() ++// assert( == , "Expect () to equal "); ++// ++definitions %{ ++ int_def DEFAULT_COST ( 100, 100); ++ int_def HUGE_COST (1000000, 1000000); ++ ++ // Memory refs are twice as expensive as run-of-the-mill. ++ int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); ++ ++ // Branches are even more expensive. ++ int_def BRANCH_COST ( 300, DEFAULT_COST * 3); ++ // we use jr instruction to construct call, so more expensive ++ int_def CALL_COST ( 500, DEFAULT_COST * 5); ++/* ++ int_def EQUAL ( 1, 1 ); ++ int_def NOT_EQUAL ( 2, 2 ); ++ int_def GREATER ( 3, 3 ); ++ int_def GREATER_EQUAL ( 4, 4 ); ++ int_def LESS ( 5, 5 ); ++ int_def LESS_EQUAL ( 6, 6 ); ++*/ ++%} ++ ++ ++ ++//----------SOURCE BLOCK------------------------------------------------------- ++// This is a block of C++ code which provides values, functions, and ++// definitions necessary in the rest of the architecture description ++ ++source_hpp %{ ++// Header information of the source block. ++// Method declarations/definitions which are used outside ++// the ad-scope can conveniently be defined here. ++// ++// To keep related declarations/definitions/uses close together, ++// we switch between source %{ }% and source_hpp %{ }% freely as needed. ++ ++extern RegMask _ANY_REG32_mask; ++extern RegMask _ANY_REG_mask; ++extern RegMask _PTR_REG_mask; ++ ++class CallStubImpl { ++ ++ //-------------------------------------------------------------- ++ //---< Used for optimization in Compile::shorten_branches >--- ++ //-------------------------------------------------------------- ++ ++ public: ++ // Size of call trampoline stub. ++ static uint size_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++ ++ // number of relocations needed by a call trampoline stub ++ static uint reloc_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++}; ++ ++class HandlerImpl { ++ ++ public: ++ ++ static int emit_exception_handler(CodeBuffer &cbuf); ++ static int emit_deopt_handler(CodeBuffer& cbuf); ++ ++ static uint size_exception_handler() { ++ // NativeCall instruction size is the same as NativeJump. ++ // exception handler starts out as jump and can be patched to ++ // a call be deoptimization. (4932387) ++ // Note that this value is also credited (in output.cpp) to ++ // the size of the code section. ++ int size = NativeFarCall::instruction_size; ++ const uintx m = 16 - 1; ++ return mask_bits(size + m, ~m); ++ //return round_to(size, 16); ++ } ++ ++ static uint size_deopt_handler() { ++ int size = NativeFarCall::instruction_size; ++ const uintx m = 16 - 1; ++ return mask_bits(size + m, ~m); ++ //return round_to(size, 16); ++ } ++}; ++ ++bool is_CAS(int opcode); ++bool use_AMO(int opcode); ++ ++bool unnecessary_acquire(const Node *barrier); ++bool unnecessary_release(const Node *barrier); ++bool unnecessary_volatile(const Node *barrier); ++bool needs_releasing_store(const Node *store); ++ ++%} // end source_hpp ++ ++source %{ ++ ++#define NO_INDEX 0 ++#define RELOC_IMM64 Assembler::imm_operand ++#define RELOC_DISP32 Assembler::disp32_operand ++ ++#define V0_num A0_num ++#define V0_H_num A0_H_num ++ ++#define __ _masm. ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++RegMask _ANY_REG32_mask; ++RegMask _ANY_REG_mask; ++RegMask _PTR_REG_mask; ++ ++void reg_mask_init() { ++ _ANY_REG32_mask = _ALL_REG32_mask; ++ _ANY_REG_mask = _ALL_REG_mask; ++ _PTR_REG_mask = _ALL_REG_mask; ++ ++ if (UseCompressedOops && (Universe::narrow_ptrs_base() != NULL)) { ++ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r28->as_VMReg())); ++ _ANY_REG_mask.SUBTRACT(_S5_LONG_REG_mask); ++ _PTR_REG_mask.SUBTRACT(_S5_LONG_REG_mask); ++ } ++} ++ ++// Emit exception handler code. ++// Stuff framesize into a register and call a VM stub routine. ++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_exception_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_exception_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point()); ++ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++// Emit deopt handler code. ++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_deopt_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_deopt_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call(SharedRuntime::deopt_blob()->unpack()); ++ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++ ++const bool Matcher::match_rule_supported(int opcode) { ++ if (!has_match_rule(opcode)) ++ return false; ++ ++ return true; // Per default match rules are supported. ++} ++ ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++ // identify extra cases that we might want to provide match rules for ++ // e.g. Op_ vector nodes and other intrinsics while guarding with vlen ++ bool ret_value = match_rule_supported(opcode); ++ ++ return ret_value; // Per default match rules are supported. ++} ++ ++const bool Matcher::has_predicated_vectors(void) { ++ return false; ++} ++ ++const int Matcher::float_pressure(int default_pressure_threshold) { ++ Unimplemented(); ++ return default_pressure_threshold; ++} ++ ++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { ++ const int safety_zone = 3 * BytesPerInstWord; ++ int offs = offset - br_size + 4; ++ // To be conservative on LoongArch ++ // branch node should be end with: ++ // branch inst ++ offs = (offs < 0 ? offs - safety_zone : offs + safety_zone) >> 2; ++ switch (rule) { ++ case jmpDir_long_rule: ++ case jmpDir_short_rule: ++ return Assembler::is_simm(offs, 26); ++ case jmpCon_flags_long_rule: ++ case jmpCon_flags_short_rule: ++ case branchConP_0_long_rule: ++ case branchConP_0_short_rule: ++ case branchConN2P_0_long_rule: ++ case branchConN2P_0_short_rule: ++ case cmpN_null_branch_long_rule: ++ case cmpN_null_branch_short_rule: ++ case branchConF_reg_reg_long_rule: ++ case branchConF_reg_reg_short_rule: ++ case branchConD_reg_reg_long_rule: ++ case branchConD_reg_reg_short_rule: ++ return Assembler::is_simm(offs, 21); ++ default: ++ return Assembler::is_simm(offs, 16); ++ } ++ return false; ++} ++ ++ ++// No additional cost for CMOVL. ++const int Matcher::long_cmove_cost() { return 0; } ++ ++// No CMOVF/CMOVD with SSE2 ++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } ++ ++// Does the CPU require late expand (see block.cpp for description of late expand)? ++const bool Matcher::require_postalloc_expand = false; ++ ++// Do we need to mask the count passed to shift instructions or does ++// the cpu only look at the lower 5/6 bits anyway? ++const bool Matcher::need_masked_shift_count = false; ++ ++bool Matcher::narrow_oop_use_complex_address() { ++ assert(UseCompressedOops, "only for compressed oops code"); ++ return false; ++} ++ ++bool Matcher::narrow_klass_use_complex_address() { ++ assert(UseCompressedClassPointers, "only for compressed klass code"); ++ return false; ++} ++ ++bool Matcher::const_oop_prefer_decode() { ++ // Prefer ConN+DecodeN over ConP. ++ return true; ++} ++ ++bool Matcher::const_klass_prefer_decode() { ++ // TODO: Either support matching DecodeNKlass (heap-based) in operand ++ // or condisider the following: ++ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. ++ //return Universe::narrow_klass_base() == NULL; ++ return true; ++} ++ ++// This is UltraSparc specific, true just means we have fast l2f conversion ++const bool Matcher::convL2FSupported(void) { ++ return true; ++} ++ ++// Vector ideal reg ++const uint Matcher::vector_ideal_reg(int size) { ++ assert(MaxVectorSize == 16 || MaxVectorSize == 32, ""); ++ switch(size) { ++ case 16: return Op_VecX; ++ case 32: return Op_VecY; ++ } ++ ShouldNotReachHere(); ++ return 0; ++} ++ ++// Only lowest bits of xmm reg are used for vector shift count. ++const uint Matcher::vector_shift_count_ideal_reg(int size) { ++ assert(MaxVectorSize == 16 || MaxVectorSize == 32, ""); ++ switch(size) { ++ case 16: return Op_VecX; ++ case 32: return Op_VecY; ++ } ++ ShouldNotReachHere(); ++ return 0; ++} ++ ++ ++const bool Matcher::convi2l_type_required = true; ++ ++// Should the Matcher clone shifts on addressing modes, expecting them ++// to be subsumed into complex addressing expressions or compute them ++// into registers? ++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { ++ return clone_base_plus_offset_address(m, mstack, address_visited); ++} ++ ++void Compile::reshape_address(AddPNode* addp) { ++} ++ ++// Max vector size in bytes. 0 if not supported. ++const int Matcher::vector_width_in_bytes(BasicType bt) { ++ return (int)MaxVectorSize; ++} ++ ++// Limits on vector size (number of elements) loaded into vector. ++const int Matcher::max_vector_size(const BasicType bt) { ++ assert(is_java_primitive(bt), "only primitive type vectors"); ++ return vector_width_in_bytes(bt)/type2aelembytes(bt); ++} ++ ++const int Matcher::min_vector_size(const BasicType bt) { ++ int max_size = max_vector_size(bt); ++ int size = 0; ++ ++ if (UseLSX) size = 16; ++ size = size / type2aelembytes(bt); ++ return MIN2(size,max_size); ++} ++ ++// LoongArch supports misaligned vectors store/load? FIXME ++const bool Matcher::misaligned_vectors_ok() { ++ return false; ++ //return !AlignVector; // can be changed by flag ++} ++ ++// Register for DIVI projection of divmodI ++RegMask Matcher::divI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for MODI projection of divmodI ++RegMask Matcher::modI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for DIVL projection of divmodL ++RegMask Matcher::divL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++int Matcher::regnum_to_fpu_offset(int regnum) { ++ return regnum - 32; // The FP registers are in the second chunk ++} ++ ++ ++const bool Matcher::isSimpleConstant64(jlong value) { ++ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. ++ return true; ++} ++ ++ ++// Return whether or not this register is ever used as an argument. This ++// function is used on startup to build the trampoline stubs in generateOptoStub. ++// Registers not mentioned will be killed by the VM call in the trampoline, and ++// arguments in those registers not be available to the callee. ++bool Matcher::can_be_java_arg( int reg ) { ++ // Refer to: [sharedRuntime_loongarch_64.cpp] SharedRuntime::java_calling_convention() ++ if ( reg == T0_num || reg == T0_H_num ++ || reg == A0_num || reg == A0_H_num ++ || reg == A1_num || reg == A1_H_num ++ || reg == A2_num || reg == A2_H_num ++ || reg == A3_num || reg == A3_H_num ++ || reg == A4_num || reg == A4_H_num ++ || reg == A5_num || reg == A5_H_num ++ || reg == A6_num || reg == A6_H_num ++ || reg == A7_num || reg == A7_H_num ) ++ return true; ++ ++ if ( reg == F0_num || reg == F0_H_num ++ || reg == F1_num || reg == F1_H_num ++ || reg == F2_num || reg == F2_H_num ++ || reg == F3_num || reg == F3_H_num ++ || reg == F4_num || reg == F4_H_num ++ || reg == F5_num || reg == F5_H_num ++ || reg == F6_num || reg == F6_H_num ++ || reg == F7_num || reg == F7_H_num ) ++ return true; ++ ++ return false; ++} ++ ++bool Matcher::is_spillable_arg( int reg ) { ++ return can_be_java_arg(reg); ++} ++ ++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { ++ return false; ++} ++ ++// Register for MODL projection of divmodL ++RegMask Matcher::modL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++const RegMask Matcher::method_handle_invoke_SP_save_mask() { ++ return FP_REG_mask(); ++} ++ ++// LoongArch doesn't support AES intrinsics ++const bool Matcher::pass_original_key_for_aes() { ++ return false; ++} ++ ++int CallStaticJavaDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallLeafDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallRuntimeDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++// If CPU can load and store mis-aligned doubles directly then no fixup is ++// needed. Else we split the double into 2 integer pieces and move it ++// piece-by-piece. Only happens when passing doubles into C code as the ++// Java calling convention forces doubles to be aligned. ++const bool Matcher::misaligned_doubles_ok = false; ++// Do floats take an entire double register or just half? ++//const bool Matcher::float_in_double = true; ++bool Matcher::float_in_double() { return false; } ++// Do ints take an entire long register or just half? ++const bool Matcher::int_in_long = true; ++// Is it better to copy float constants, or load them directly from memory? ++// Intel can load a float constant from a direct address, requiring no ++// extra registers. Most RISCs will have to materialize an address into a ++// register first, so they would do better to copy the constant from stack. ++const bool Matcher::rematerialize_float_constants = false; ++// Advertise here if the CPU requires explicit rounding operations ++// to implement the UseStrictFP mode. ++const bool Matcher::strict_fp_requires_explicit_rounding = false; ++// false => size gets scaled to BytesPerLong, ok. ++const bool Matcher::init_array_count_is_in_bytes = false; ++ ++// Indicate if the safepoint node needs the polling page as an input. ++// it does if the polling page is more than disp32 away. ++bool SafePointNode::needs_polling_address_input() { ++ return SafepointMechanism::uses_thread_local_poll(); ++} ++ ++#ifndef PRODUCT ++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("BRK"); ++} ++#endif ++ ++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { ++ MacroAssembler _masm(&cbuf); ++ __ brk(5); ++} ++ ++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++// !!!!! Special hack to get all type of calls to specify the byte offset ++// from the start of the call to the point where the return address ++// will point. ++int MachCallStaticJavaNode::ret_addr_offset() { ++ // bl ++ return NativeCall::instruction_size; ++} ++ ++int MachCallDynamicJavaNode::ret_addr_offset() { ++ // lu12i_w IC_Klass, ++ // ori IC_Klass, ++ // lu32i_d IC_Klass ++ // lu52i_d IC_Klass ++ ++ // bl ++ return NativeMovConstReg::instruction_size + NativeCall::instruction_size; ++} ++ ++//============================================================================= ++ ++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack ++enum RC { rc_bad, rc_int, rc_float, rc_stack }; ++static enum RC rc_class( OptoReg::Name reg ) { ++ if( !OptoReg::is_valid(reg) ) return rc_bad; ++ if (OptoReg::is_stack(reg)) return rc_stack; ++ VMReg r = OptoReg::as_VMReg(reg); ++ if (r->is_Register()) return rc_int; ++ assert(r->is_FloatRegister(), "must be"); ++ return rc_float; ++} ++ ++// Helper methods for MachSpillCopyNode::implementation(). ++static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, ++ int src_hi, int dst_hi, uint ireg, outputStream* st) { ++ int size = 0; ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ int offset = __ offset(); ++ switch (ireg) { ++ case Op_VecX: ++ __ vori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0); ++ break; ++ case Op_VecY: ++ __ xvori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#ifndef PRODUCT ++ } else if (!do_size) { ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vori.b %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); ++ break; ++ case Op_VecY: ++ st->print("xvori.b %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#endif ++ } ++ size += 4; ++ return size; ++} ++ ++static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, ++ int stack_offset, int reg, uint ireg, outputStream* st) { ++ int size = 0; ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ int offset = __ offset(); ++ if (is_load) { ++ switch (ireg) { ++ case Op_VecX: ++ __ vld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ case Op_VecY: ++ __ xvld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { // store ++ switch (ireg) { ++ case Op_VecX: ++ __ vst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ case Op_VecY: ++ __ xvst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++#ifndef PRODUCT ++ } else if (!do_size) { ++ if (is_load) { ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vld %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ case Op_VecY: ++ st->print("xvld %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { // store ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vst %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ case Op_VecY: ++ st->print("xvst %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++#endif ++ } ++ size += 4; ++ return size; ++} ++ ++static int vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, ++ int dst_offset, uint ireg, outputStream* st) { ++ int size = 0; ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ switch (ireg) { ++ case Op_VecX: ++ __ vld(F23, SP, src_offset); ++ __ vst(F23, SP, dst_offset); ++ break; ++ case Op_VecY: ++ __ xvld(F23, SP, src_offset); ++ __ xvst(F23, SP, dst_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#ifndef PRODUCT ++ } else { ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vld f23, %d(sp)\n\t" ++ "vst f23, %d(sp)\t# 128-bit mem-mem spill", ++ src_offset, dst_offset); ++ break; ++ case Op_VecY: ++ st->print("xvld f23, %d(sp)\n\t" ++ "xvst f23, %d(sp)\t# 256-bit mem-mem spill", ++ src_offset, dst_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#endif ++ } ++ size += 8; ++ return size; ++} ++ ++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { ++ // Get registers to move ++ OptoReg::Name src_second = ra_->get_reg_second(in(1)); ++ OptoReg::Name src_first = ra_->get_reg_first(in(1)); ++ OptoReg::Name dst_second = ra_->get_reg_second(this ); ++ OptoReg::Name dst_first = ra_->get_reg_first(this ); ++ ++ enum RC src_second_rc = rc_class(src_second); ++ enum RC src_first_rc = rc_class(src_first); ++ enum RC dst_second_rc = rc_class(dst_second); ++ enum RC dst_first_rc = rc_class(dst_first); ++ ++ assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); ++ ++ // Generate spill code! ++ ++ if( src_first == dst_first && src_second == dst_second ) ++ return 0; // Self copy, no move ++ ++ if (bottom_type()->isa_vect() != NULL) { ++ uint ireg = ideal_reg(); ++ assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); ++ if (src_first_rc == rc_stack && dst_first_rc == rc_stack) { ++ // mem -> mem ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); ++ } else if (src_first_rc == rc_float && dst_first_rc == rc_float) { ++ vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); ++ } else if (src_first_rc == rc_float && dst_first_rc == rc_stack) { ++ int stack_offset = ra_->reg2offset(dst_first); ++ vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); ++ } else if (src_first_rc == rc_stack && dst_first_rc == rc_float) { ++ int stack_offset = ra_->reg2offset(src_first); ++ vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); ++ } else { ++ ShouldNotReachHere(); ++ } ++ return 0; ++ } ++ ++ if (src_first_rc == rc_stack) { ++ // mem -> ++ if (dst_first_rc == rc_stack) { ++ // mem -> mem ++ assert(src_second != dst_first, "overlap"); ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld_d(AT, Address(SP, src_offset)); ++ __ st_d(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tld_d AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t" ++ "st_d AT, [SP + #%d]", ++ src_offset, dst_offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ // No pushl/popl, so: ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld_w(AT, Address(SP, src_offset)); ++ __ st_w(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tld_w AT, [SP + #%d] spill 2\n\t" ++ "st_w AT, [SP + #%d]\n\t", ++ src_offset, dst_offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // mem -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld_d(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tld_d %s, [SP + #%d]\t# spill 3", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ ld_w(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++ else { ++ if (Assembler::is_simm(offset, 12)) { ++ __ ld_wu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++ } else { ++ __ li(AT, offset); ++ __ ldx_wu(as_Register(Matcher::_regEncode[dst_first]), SP, AT); ++ } ++ } ++#ifndef PRODUCT ++ } else { ++ if (this->ideal_reg() == Op_RegI) ++ st->print("\tld_w %s, [SP + #%d]\t# spill 4", ++ Matcher::regName[dst_first], ++ offset); ++ else ++ st->print("\tld_wu %s, [SP + #%d]\t# spill 5", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // mem-> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fld_d( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tfld_d %s, [SP + #%d]\t# spill 6", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fld_s( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tfld_s %s, [SP + #%d]\t# spill 7", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ } ++ return 0; ++ } else if (src_first_rc == rc_int) { ++ // gpr -> ++ if (dst_first_rc == rc_stack) { ++ // gpr -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ st_d(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tst_d %s, [SP + #%d] # spill 8", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ st_w(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tst_w %s, [SP + #%d]\t# spill 9", ++ Matcher::regName[src_first], offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // gpr -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ move(as_Register(Matcher::_regEncode[dst_first]), ++ as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\tmove(64bit) %s <-- %s\t# spill 10", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++ else ++ __ add_d(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("move(32-bit) %s <-- %s\t# spill 11", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } ++ } else if (dst_first_rc == rc_float) { ++ // gpr -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ movgr2fr_d(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movgr2fr_d %s, %s\t# spill 12", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ movgr2fr_w(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movgr2fr_w %s, %s\t# spill 13", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } else if (src_first_rc == rc_float) { ++ // xmm -> ++ if (dst_first_rc == rc_stack) { ++ // xmm -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fst_d( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) ); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fst_d %s, [SP + #%d]\t# spill 14", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fst_s(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fst_s %s, [SP + #%d]\t# spill 15", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // xmm -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ movfr2gr_d( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movfr2gr_d %s, %s\t# spill 16", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ movfr2gr_s( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movfr2gr_s %s, %s\t# spill 17", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // xmm -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fmov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fmov_d %s <-- %s\t# spill 18", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fmov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fmov_s %s <-- %s\t# spill 19", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } ++ ++ assert(0," foo "); ++ Unimplemented(); ++ return 0; ++} ++ ++#ifndef PRODUCT ++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ implementation( NULL, ra_, false, st ); ++} ++#endif ++ ++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ implementation( &cbuf, ra_, false, NULL ); ++} ++ ++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile *C = ra_->C; ++ int framesize = C->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ st->print_cr("addi_d SP, SP, %d # Rlease stack @ MachEpilogNode", framesize); ++ st->print("\t"); ++ st->print_cr("ld_d RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize); ++ st->print("\t"); ++ st->print_cr("ld_d FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2); ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ st->print("\t"); ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ st->print_cr("ld_d AT, poll_offset[thread] #polling_page_address\n\t" ++ "ld_w AT, [AT]\t" ++ "# Safepoint: poll for GC"); ++ } else { ++ st->print_cr("Poll Safepoint # MachEpilogNode"); ++ } ++ } ++} ++#endif ++ ++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile *C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ int framesize = C->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ __ ld_d(RA, Address(SP, framesize - wordSize)); ++ __ ld_d(FP, Address(SP, framesize - wordSize * 2)); ++ if (Assembler::is_simm(framesize, 12)) { ++ __ addi_d(SP, SP, framesize); ++ } else { ++ __ li(AT, framesize); ++ __ add_d(SP, SP, AT); ++ } ++ ++ if (StackReservedPages > 0 && C->has_reserved_stack_access()) { ++ __ reserved_stack_check(); ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ __ ld_d(AT, thread, in_bytes(Thread::polling_page_offset())); ++ __ relocate(relocInfo::poll_return_type); ++ __ ld_w(AT, AT, 0); ++ } else { ++ __ li(AT, (long)os::get_polling_page()); ++ __ relocate(relocInfo::poll_return_type); ++ __ ld_w(AT, AT, 0); ++ } ++ } ++} ++ ++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way ++} ++ ++int MachEpilogNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++const Pipeline * MachEpilogNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++int MachEpilogNode::safepoint_offset() const { return 0; } ++ ++//============================================================================= ++ ++#ifndef PRODUCT ++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_reg_first(this); ++ st->print("ADDI_D %s, SP, %d @BoxLockNode",Matcher::regName[reg],offset); ++} ++#endif ++ ++ ++uint BoxLockNode::size(PhaseRegAlloc *ra_) const { ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ ++ if (Assembler::is_simm(offset, 12)) ++ return 4; ++ else ++ return 3 * 4; ++} ++ ++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_encode(this); ++ ++ if (Assembler::is_simm(offset, 12)) { ++ __ addi_d(as_Register(reg), SP, offset); ++ } else { ++ __ lu12i_w(AT, Assembler::split_low20(offset >> 12)); ++ __ ori(AT, AT, Assembler::split_low12(offset)); ++ __ add_d(as_Register(reg), SP, AT); ++ } ++} ++ ++int MachCallRuntimeNode::ret_addr_offset() { ++ // pcaddu18i ++ // jirl ++ return NativeFarCall::instruction_size; ++} ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); ++} ++#endif ++ ++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { ++ MacroAssembler _masm(&cbuf); ++ int i = 0; ++ for(i = 0; i < _count; i++) ++ __ nop(); ++} ++ ++uint MachNopNode::size(PhaseRegAlloc *) const { ++ return 4 * _count; ++} ++const Pipeline* MachNopNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++//============================================================================= ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ st->print_cr("load_klass(T4, T0)"); ++ st->print_cr("\tbeq(T4, iCache, L)"); ++ st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)"); ++ st->print_cr(" L:"); ++} ++#endif ++ ++ ++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ int ic_reg = Matcher::inline_cache_reg_encode(); ++ Label L; ++ Register receiver = T0; ++ Register iCache = as_Register(ic_reg); ++ ++ __ load_klass(T4, receiver); ++ __ beq(T4, iCache, L); ++ __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); ++ __ bind(L); ++} ++ ++uint MachUEPNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++//============================================================================= ++ ++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask(); ++ ++int Compile::ConstantTable::calculate_table_base_offset() const { ++ return 0; // absolute addressing, no offset ++} ++ ++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } ++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { ++ ShouldNotReachHere(); ++} ++ ++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { ++ Compile* C = ra_->C; ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ MacroAssembler _masm(&cbuf); ++ ++ Register Rtoc = as_Register(ra_->get_encode(this)); ++ CodeSection* consts_section = cbuf.consts(); ++ int consts_size = consts_section->align_at_start(consts_section->size()); ++ assert(constant_table.size() == consts_size, "must be equal"); ++ ++ if (consts_section->size()) { ++ assert((CodeBuffer::SECT_CONSTS + 1) == CodeBuffer::SECT_INSTS, ++ "insts must be immediately follow consts"); ++ // Materialize the constant table base. ++ address baseaddr = cbuf.insts()->start() - consts_size + -(constant_table.table_base_offset()); ++ jint offs = (baseaddr - __ pc()) >> 2; ++ guarantee(Assembler::is_simm(offs, 20), "Not signed 20-bit offset"); ++ __ pcaddi(Rtoc, offs); ++ } ++} ++ ++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { ++ // pcaddi ++ return 1 * BytesPerInstWord; ++} ++ ++#ifndef PRODUCT ++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { ++ Register r = as_Register(ra_->get_encode(this)); ++ st->print("pcaddi %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name()); ++} ++#endif ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile* C = ra_->C; ++ ++ int framesize = C->frame_size_in_bytes(); ++ int bangsize = C->bang_size_in_bytes(); ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ // Calls to C2R adapters often do not accept exceptional returns. ++ // We require that their callers must bang for them. But be careful, because ++ // some VM calls (such as call site linkage) can use several kilobytes of ++ // stack. But the stack safety zone should account for that. ++ // See bugs 4446381, 4468289, 4497237. ++ if (C->need_stack_bang(bangsize)) { ++ st->print_cr("# stack bang"); st->print("\t"); ++ } ++ st->print("st_d RA, %d(SP) @ MachPrologNode\n\t", -wordSize); ++ st->print("st_d FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); ++ st->print("addi_d FP, SP, -%d \n\t", wordSize*2); ++ st->print("addi_d SP, SP, -%d \t",framesize); ++} ++#endif ++ ++ ++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile* C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ ++ int framesize = C->frame_size_in_bytes(); ++ int bangsize = C->bang_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++#ifdef ASSERT ++ address start = __ pc(); ++#endif ++ ++ if (C->need_stack_bang(bangsize)) { ++ __ generate_stack_overflow_check(bangsize); ++ } ++ ++ if (Assembler::is_simm(-framesize, 12)) { ++ __ addi_d(SP, SP, -framesize); ++ } else { ++ __ li(AT, -framesize); ++ __ add_d(SP, SP, AT); ++ } ++ __ st_d(RA, Address(SP, framesize - wordSize)); ++ __ st_d(FP, Address(SP, framesize - wordSize * 2)); ++ if (Assembler::is_simm(framesize - wordSize * 2, 12)) { ++ __ addi_d(FP, SP, framesize - wordSize * 2); ++ } else { ++ __ li(AT, framesize - wordSize * 2); ++ __ add_d(FP, SP, AT); ++ } ++ ++ assert((__ pc() - start) >= 1 * BytesPerInstWord, "No enough room for patch_verified_entry"); ++ ++ C->set_frame_complete(cbuf.insts_size()); ++ if (C->has_mach_constant_base_node()) { ++ // NOTE: We set the table base offset here because users might be ++ // emitted before MachConstantBaseNode. ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); ++ } ++} ++ ++ ++uint MachPrologNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way ++} ++ ++int MachPrologNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++bool is_CAS(int opcode) ++{ ++ switch(opcode) { ++ // We handle these ++ case Op_CompareAndSwapI: ++ case Op_CompareAndSwapL: ++ case Op_CompareAndSwapP: ++ case Op_CompareAndSwapN: ++ case Op_GetAndSetI: ++ case Op_GetAndSetL: ++ case Op_GetAndSetP: ++ case Op_GetAndSetN: ++ case Op_GetAndAddI: ++ case Op_GetAndAddL: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++bool use_AMO(int opcode) ++{ ++ switch(opcode) { ++ // We handle these ++ case Op_StoreI: ++ case Op_StoreL: ++ case Op_StoreP: ++ case Op_StoreN: ++ case Op_StoreNKlass: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++bool unnecessary_acquire(const Node *barrier) ++{ ++ assert(barrier->is_MemBar(), "expecting a membar"); ++ ++ if (UseBarriersForVolatile) { ++ // we need to plant a dbar ++ return false; ++ } ++ ++ MemBarNode* mb = barrier->as_MemBar(); ++ ++ if (mb->trailing_load_store()) { ++ Node* load_store = mb->in(MemBarNode::Precedent); ++ assert(load_store->is_LoadStore(), "unexpected graph shape"); ++ return is_CAS(load_store->Opcode()); ++ } ++ ++ return false; ++} ++ ++bool unnecessary_release(const Node *n) ++{ ++ assert((n->is_MemBar() && n->Opcode() == Op_MemBarRelease), "expecting a release membar"); ++ ++ if (UseBarriersForVolatile) { ++ // we need to plant a dbar ++ return false; ++ } ++ ++ MemBarNode *barrier = n->as_MemBar(); ++ ++ if (!barrier->leading()) { ++ return false; ++ } else { ++ Node* trailing = barrier->trailing_membar(); ++ MemBarNode* trailing_mb = trailing->as_MemBar(); ++ assert(trailing_mb->trailing(), "Not a trailing membar?"); ++ assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars"); ++ ++ Node* mem = trailing_mb->in(MemBarNode::Precedent); ++ if (mem->is_Store()) { ++ assert(mem->as_Store()->is_release(), ""); ++ assert(trailing_mb->Opcode() == Op_MemBarVolatile, ""); ++ return use_AMO(mem->Opcode()); ++ } else { ++ assert(mem->is_LoadStore(), ""); ++ assert(trailing_mb->Opcode() == Op_MemBarAcquire, ""); ++ return is_CAS(mem->Opcode()); ++ } ++ } ++ ++ return false; ++} ++ ++bool unnecessary_volatile(const Node *n) ++{ ++ // assert n->is_MemBar(); ++ if (UseBarriersForVolatile) { ++ // we need to plant a dbar ++ return false; ++ } ++ ++ MemBarNode *mbvol = n->as_MemBar(); ++ ++ bool release = false; ++ if (mbvol->trailing_store()) { ++ Node* mem = mbvol->in(MemBarNode::Precedent); ++ release = use_AMO(mem->Opcode()); ++ } ++ ++ assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), ""); ++#ifdef ASSERT ++ if (release) { ++ Node* leading = mbvol->leading_membar(); ++ assert(leading->Opcode() == Op_MemBarRelease, ""); ++ assert(leading->as_MemBar()->leading_store(), ""); ++ assert(leading->as_MemBar()->trailing_membar() == mbvol, ""); ++ } ++#endif ++ ++ return release; ++} ++ ++bool needs_releasing_store(const Node *n) ++{ ++ // assert n->is_Store(); ++ if (UseBarriersForVolatile) { ++ // we use a normal store and dbar combination ++ return false; ++ } ++ ++ StoreNode *st = n->as_Store(); ++ ++ return st->trailing_membar() != NULL; ++} ++ ++%} ++ ++//----------ENCODING BLOCK----------------------------------------------------- ++// This block specifies the encoding classes used by the compiler to output ++// byte streams. Encoding classes generate functions which are called by ++// Machine Instruction Nodes in order to generate the bit encoding of the ++// instruction. Operands specify their base encoding interface with the ++// interface keyword. There are currently supported four interfaces, ++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an ++// operand to generate a function which returns its register number when ++// queried. CONST_INTER causes an operand to generate a function which ++// returns the value of the constant when queried. MEMORY_INTER causes an ++// operand to generate four functions which return the Base Register, the ++// Index Register, the Scale Value, and the Offset Value of the operand when ++// queried. COND_INTER causes an operand to generate six functions which ++// return the encoding code (ie - encoding bits for the instruction) ++// associated with each basic boolean condition for a conditional instruction. ++// Instructions specify two basic values for encoding. They use the ++// ins_encode keyword to specify their encoding class (which must be one of ++// the class names specified in the encoding block), and they use the ++// opcode keyword to specify, in order, their primary, secondary, and ++// tertiary opcode. Only the opcode sections which a particular instruction ++// needs for encoding need to be specified. ++encode %{ ++ ++ enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf ++ MacroAssembler _masm(&cbuf); ++ // This is the instruction starting address for relocation info. ++ __ block_comment("Java_To_Runtime"); ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call((address)$meth$$method); ++ %} ++ ++ enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL ++ // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine ++ // who we intended to call. ++ MacroAssembler _masm(&cbuf); ++ address addr = (address)$meth$$method; ++ address call; ++ __ block_comment("Java_Static_Call"); ++ ++ if ( !_method ) { ++ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. ++ call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } else { ++ int method_index = resolved_method_index(cbuf); ++ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) ++ : static_call_Relocation::spec(method_index); ++ call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ // Emit stub for static call ++ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); ++ if (stub == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } ++ %} ++ ++ ++ // ++ // [Ref: LIR_Assembler::ic_call() ] ++ // ++ enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL ++ MacroAssembler _masm(&cbuf); ++ __ block_comment("Java_Dynamic_Call"); ++ address call = __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} ++ ++ ++ enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{ ++ Register result = $result$$Register; ++ Register sub = $sub$$Register; ++ Register super = $super$$Register; ++ Register length = $tmp$$Register; ++ Register tmp = T4; ++ Label miss; ++ ++ // result may be the same as sub ++ // 47c B40: # B21 B41 <- B20 Freq: 0.155379 ++ // 47c partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0 ++ // 4bc mov S2, NULL #@loadConP ++ // 4c0 beq S1, S2, B21 #@branchConP P=0.999999 C=-1.000000 ++ // ++ MacroAssembler _masm(&cbuf); ++ Label done; ++ __ check_klass_subtype_slow_path(sub, super, length, tmp, ++ NULL, &miss, ++ /*set_cond_codes:*/ true); ++ // Refer to X86_64's RDI ++ __ move(result, 0); ++ __ b(done); ++ ++ __ bind(miss); ++ __ li(result, 1); ++ __ bind(done); ++ %} ++ ++%} ++ ++ ++//---------LOONGARCH FRAME-------------------------------------------------------------- ++// Definition of frame structure and management information. ++// ++// S T A C K L A Y O U T Allocators stack-slot number ++// | (to get allocators register number ++// G Owned by | | v add SharedInfo::stack0) ++// r CALLER | | ++// o | +--------+ pad to even-align allocators stack-slot ++// w V | pad0 | numbers; owned by CALLER ++// t -----------+--------+----> Matcher::_in_arg_limit, unaligned ++// h ^ | in | 5 ++// | | args | 4 Holes in incoming args owned by SELF ++// | | old | | 3 ++// | | SP-+--------+----> Matcher::_old_SP, even aligned ++// v | | ret | 3 return address ++// Owned by +--------+ ++// Self | pad2 | 2 pad to align old SP ++// | +--------+ 1 ++// | | locks | 0 ++// | +--------+----> SharedInfo::stack0, even aligned ++// | | pad1 | 11 pad to align new SP ++// | +--------+ ++// | | | 10 ++// | | spills | 9 spills ++// V | | 8 (pad0 slot for callee) ++// -----------+--------+----> Matcher::_out_arg_limit, unaligned ++// ^ | out | 7 ++// | | args | 6 Holes in outgoing args owned by CALLEE ++// Owned by new | | ++// Callee SP-+--------+----> Matcher::_new_SP, even aligned ++// | | ++// ++// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is ++// known from SELF's arguments and the Java calling convention. ++// Region 6-7 is determined per call site. ++// Note 2: If the calling convention leaves holes in the incoming argument ++// area, those holes are owned by SELF. Holes in the outgoing area ++// are owned by the CALLEE. Holes should not be nessecary in the ++// incoming area, as the Java calling convention is completely under ++// the control of the AD file. Doubles can be sorted and packed to ++// avoid holes. Holes in the outgoing arguments may be nessecary for ++// varargs C calling conventions. ++// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is ++// even aligned with pad0 as needed. ++// Region 6 is even aligned. Region 6-7 is NOT even aligned; ++// region 6-11 is even aligned; it may be padded out more so that ++// the region from SP to FP meets the minimum stack alignment. ++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack ++// alignment. Region 11, pad1, may be dynamically extended so that ++// SP meets the minimum alignment. ++ ++ ++frame %{ ++ ++ stack_direction(TOWARDS_LOW); ++ ++ // These two registers define part of the calling convention ++ // between compiled code and the interpreter. ++ // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention ++ // for more information. ++ ++ inline_cache_reg(T1); // Inline Cache Register ++ interpreter_method_oop_reg(S3); // Method Oop Register when calling interpreter ++ ++ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] ++ cisc_spilling_operand_name(indOffset32); ++ ++ // Number of stack slots consumed by locking an object ++ // generate Compile::sync_stack_slots ++ sync_stack_slots(2); ++ ++ frame_pointer(SP); ++ ++ // Interpreter stores its frame pointer in a register which is ++ // stored to the stack by I2CAdaptors. ++ // I2CAdaptors convert from interpreted java to compiled java. ++ ++ interpreter_frame_pointer(FP); ++ ++ // generate Matcher::stack_alignment ++ stack_alignment(StackAlignmentInBytes); //wordSize = sizeof(char*); ++ ++ // Number of stack slots between incoming argument block and the start of ++ // a new frame. The PROLOG must add this many slots to the stack. The ++ // EPILOG must remove this many slots. ++ in_preserve_stack_slots(4); //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp ++ ++ // Number of outgoing stack slots killed above the out_preserve_stack_slots ++ // for calls to C. Supports the var-args backing area for register parms. ++ varargs_C_out_slots_killed(0); ++ ++ // The after-PROLOG location of the return address. Location of ++ // return address specifies a type (REG or STACK) and a number ++ // representing the register number (i.e. - use a register name) or ++ // stack slot. ++ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. ++ // Otherwise, it is above the locks and verification slot and alignment word ++ //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong)); ++ return_addr(REG RA); ++ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ ++ // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing) ++ // StartNode::calling_convention call this. ++ calling_convention %{ ++ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); ++ %} ++ ++ ++ ++ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ ++ // SEE CallRuntimeNode::calling_convention for more information. ++ c_calling_convention %{ ++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); ++ %} ++ ++ ++ // Location of C & interpreter return values ++ // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR. ++ // SEE Matcher::match. ++ c_return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num }; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++ // Location of return values ++ // register(s) contain(s) return value for Op_StartC2I and Op_Start. ++ // SEE Matcher::match. ++ ++ return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num}; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++%} ++ ++//----------ATTRIBUTES--------------------------------------------------------- ++//----------Operand Attributes------------------------------------------------- ++op_attrib op_cost(0); // Required cost attribute ++ ++//----------Instruction Attributes--------------------------------------------- ++ins_attrib ins_cost(100); // Required cost attribute ++ins_attrib ins_size(32); // Required size attribute (in bits) ++ins_attrib ins_pc_relative(0); // Required PC Relative flag ++ins_attrib ins_short_branch(0); // Required flag: is this instruction a ++ // non-matching short branch variant of some ++ // long branch? ++ins_attrib ins_alignment(4); // Required alignment attribute (must be a power of 2) ++ // specifies the alignment that some part of the instruction (not ++ // necessarily the start) requires. If > 1, a compute_padding() ++ // function must be provided for the instruction ++ ++//----------OPERANDS----------------------------------------------------------- ++// Operand definitions must precede instruction definitions for correct parsing ++// in the ADLC because operands constitute user defined types which are used in ++// instruction definitions. ++ ++// Vectors ++ ++operand vecX() %{ ++ constraint(ALLOC_IN_RC(vectorx_reg)); ++ match(VecX); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand vecY() %{ ++ constraint(ALLOC_IN_RC(vectory_reg)); ++ match(VecY); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Flags register, used as output of compare instructions ++operand FlagsReg() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegFlags); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++//----------Simple Operands---------------------------------------------------- ++// TODO: Should we need to define some more special immediate number ? ++// Immediate Operands ++// Integer Immediate ++operand immI() %{ ++ match(ConI); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU1() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 1)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU2() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 3)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU3() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 7)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU4() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 15)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU5() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 31)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU6() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 63)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU8() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 255)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI10() %{ ++ predicate((-512 <= n->get_int()) && (n->get_int() <= 511)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI12() %{ ++ predicate((-2048 <= n->get_int()) && (n->get_int() <= 2047)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M65536() %{ ++ predicate(n->get_int() == -65536); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for decrement ++operand immI_M1() %{ ++ predicate(n->get_int() == -1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for zero ++operand immI_0() %{ ++ predicate(n->get_int() == 0); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_1() %{ ++ predicate(n->get_int() == 1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_2() %{ ++ predicate(n->get_int() == 2); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_16() %{ ++ predicate(n->get_int() == 16); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_24() %{ ++ predicate(n->get_int() == 24); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for long shifts ++operand immI_32() %{ ++ predicate(n->get_int() == 32); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for byte-wide masking ++operand immI_255() %{ ++ predicate(n->get_int() == 255); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_65535() %{ ++ predicate(n->get_int() == 65535); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_MaxI() %{ ++ predicate(n->get_int() == 2147483647); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M2047_2048() %{ ++ predicate((-2047 <= n->get_int()) && (n->get_int() <= 2048)); ++ match(ConI); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Valid scale values for addressing modes ++operand immI_0_3() %{ ++ predicate(0 <= n->get_int() && (n->get_int() <= 3)); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_31() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 31); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_4095() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 4095); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_1_4() %{ ++ predicate(1 <= n->get_int() && (n->get_int() <= 4)); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_32_63() %{ ++ predicate(n->get_int() >= 32 && n->get_int() <= 63); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M128_255() %{ ++ predicate((-128 <= n->get_int()) && (n->get_int() <= 255)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive integer mask ++operand immI_nonneg_mask() %{ ++ predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1)); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate ++operand immL() %{ ++ match(ConL); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immLU5() %{ ++ predicate((0 <= n->get_long()) && (n->get_long() <= 31)); ++ match(ConL); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL10() %{ ++ predicate((-512 <= n->get_long()) && (n->get_long() <= 511)); ++ match(ConL); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL12() %{ ++ predicate((-2048 <= n->get_long()) && (n->get_long() <= 2047)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate 32-bit signed ++operand immL32() ++%{ ++ predicate(n->get_long() == (int)n->get_long()); ++ match(ConL); ++ ++ op_cost(15); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 3..6 zero ++operand immL_M121() %{ ++ predicate(n->get_long() == -121L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..2 zero ++operand immL_M8() %{ ++ predicate(n->get_long() == -8L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 1..2 zero ++operand immL_M7() %{ ++ predicate(n->get_long() == -7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 2 zero ++operand immL_M5() %{ ++ predicate(n->get_long() == -5L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..1 zero ++operand immL_M4() %{ ++ predicate(n->get_long() == -4L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate zero ++operand immL_0() %{ ++ predicate(n->get_long() == 0L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_7() %{ ++ predicate(n->get_long() == 7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_MaxUI() %{ ++ predicate(n->get_long() == 0xFFFFFFFFL); ++ match(ConL); ++ op_cost(20); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_M2047_2048() %{ ++ predicate((-2047 <= n->get_long()) && (n->get_long() <= 2048)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_0_4095() %{ ++ predicate(n->get_long() >= 0 && n->get_long() <= 4095); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive long mask ++operand immL_nonneg_mask() %{ ++ predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1)); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immP() %{ ++ match(ConP); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immP_0() %{ ++ predicate(n->get_ptr() == 0); ++ match(ConP); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immP_no_oop_cheap() %{ ++ predicate(!n->bottom_type()->isa_oop_ptr()); ++ match(ConP); ++ ++ op_cost(5); ++ // formats are generated automatically for constants and base registers ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer for polling page ++operand immP_poll() %{ ++ predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page()); ++ match(ConP); ++ op_cost(5); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immN() %{ ++ match(ConN); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immN_0() %{ ++ predicate(n->get_narrowcon() == 0); ++ match(ConN); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immNKlass() %{ ++ match(ConNKlass); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point immediate ++operand immF() %{ ++ match(ConF); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point zero ++operand immF_0() %{ ++ predicate(jint_cast(n->getf()) == 0); ++ match(ConF); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point immediate ++operand immD() %{ ++ match(ConD); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point zero ++operand immD_0() %{ ++ predicate(jlong_cast(n->getd()) == 0); ++ match(ConD); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Register Operands ++// Integer Register ++operand mRegI() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_Ax_mRegI() %{ ++ constraint(ALLOC_IN_RC(no_Ax_int_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mS0RegI() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S0" %} ++ interface(REG_INTER); ++%} ++ ++operand mS1RegI() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S1" %} ++ interface(REG_INTER); ++%} ++ ++operand mS3RegI() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S3" %} ++ interface(REG_INTER); ++%} ++ ++operand mS4RegI() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S4" %} ++ interface(REG_INTER); ++%} ++ ++operand mS5RegI() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S5" %} ++ interface(REG_INTER); ++%} ++ ++operand mS6RegI() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S6" %} ++ interface(REG_INTER); ++%} ++ ++operand mS7RegI() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S7" %} ++ interface(REG_INTER); ++%} ++ ++ ++operand mT0RegI() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++operand mT1RegI() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T1" %} ++ interface(REG_INTER); ++%} ++ ++operand mT2RegI() %{ ++ constraint(ALLOC_IN_RC(t2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T2" %} ++ interface(REG_INTER); ++%} ++ ++operand mT3RegI() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T3" %} ++ interface(REG_INTER); ++%} ++ ++operand mT8RegI() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T8" %} ++ interface(REG_INTER); ++%} ++ ++operand mT4RegI() %{ ++ constraint(ALLOC_IN_RC(t4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T4" %} ++ interface(REG_INTER); ++%} ++ ++operand mA0RegI() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand mA1RegI() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A1" %} ++ interface(REG_INTER); ++%} ++ ++operand mA2RegI() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A2" %} ++ interface(REG_INTER); ++%} ++ ++operand mA3RegI() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A3" %} ++ interface(REG_INTER); ++%} ++ ++operand mA4RegI() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A4" %} ++ interface(REG_INTER); ++%} ++ ++operand mA5RegI() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A5" %} ++ interface(REG_INTER); ++%} ++ ++operand mA6RegI() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A6" %} ++ interface(REG_INTER); ++%} ++ ++operand mA7RegI() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A7" %} ++ interface(REG_INTER); ++%} ++ ++operand mRegN() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegN() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegN() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegN() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegN() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegN() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegN() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegN() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegN() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegN() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5_RegN() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegN() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegN() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0_RegN() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegN() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s2_RegN() %{ ++ constraint(ALLOC_IN_RC(s2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegN() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegN() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegN() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegN() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegN() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer Register ++operand mRegP() %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(RegP); ++ match(a0_RegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_T8_mRegP() %{ ++ constraint(ALLOC_IN_RC(no_T8_p_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_Ax_mRegP() %{ ++ constraint(ALLOC_IN_RC(no_Ax_p_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++ ++operand a5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mRegL() %{ ++ constraint(ALLOC_IN_RC(long_reg)); ++ match(RegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mRegI2L(mRegI reg) %{ ++ match(ConvI2L reg); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mRegL2I(mRegL reg) %{ ++ match(ConvL2I reg); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0RegL() %{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1RegL() %{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0RegL() %{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand a1RegL() %{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2RegL() %{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3RegL() %{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0RegL() %{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1RegL() %{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3RegL() %{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8RegL() %{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4RegL() %{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5RegL() %{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6RegL() %{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7RegL() %{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0RegL() %{ ++ constraint(ALLOC_IN_RC(s0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1RegL() %{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3RegL() %{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4RegL() %{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7RegL() %{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Floating register operands ++operand regF() %{ ++ constraint(ALLOC_IN_RC(flt_reg)); ++ match(RegF); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//Double Precision Floating register operands ++operand regD() %{ ++ constraint(ALLOC_IN_RC(dbl_reg)); ++ match(RegD); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//----------Memory Operands---------------------------------------------------- ++// Indirect Memory Operand ++operand indirect(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(reg); ++ ++ format %{ "[$reg] @ indirect" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset12(mRegP reg, immL12 off) ++%{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg off); ++ ++ op_cost(10); ++ format %{ "[$reg + $off (12-bit)] @ indOffset12" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indOffset12I2L(mRegP reg, immI12 off) ++%{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg (ConvI2L off)); ++ ++ op_cost(10); ++ format %{ "[$reg + $off (12-bit)] @ indOffset12I2L" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Plus Index Register ++operand indIndex(mRegP addr, mRegL index) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP addr index); ++ ++ op_cost(20); ++ format %{"[$addr + $index] @ indIndex" %} ++ interface(MEMORY_INTER) %{ ++ base($addr); ++ index($index); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indIndexI2L(mRegP reg, mRegI ireg) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg (ConvI2L ireg)); ++ op_cost(10); ++ format %{ "[$reg + $ireg] @ indIndexI2L" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($ireg); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Operand ++operand indirectNarrow(mRegN reg) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(DecodeN reg); ++ ++ format %{ "[$reg] @ indirectNarrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset12Narrow(mRegN reg, immL12 off) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeN reg) off); ++ ++ format %{ "[$reg + $off (12-bit)] @ indOffset12Narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++//----------Conditional Branch Operands---------------------------------------- ++// Comparison Op - This is the operation of the comparison, and is limited to ++// the following set of codes: ++// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) ++// ++// Other attributes of the comparison, such as unsignedness, are specified ++// by the comparison instruction that sets a condition code flags register. ++// That result is represented by a flags operand whose subtype is appropriate ++// to the unsignedness (etc.) of the comparison. ++// ++// Later, the instruction which matches both the Comparison Op (a Bool) and ++// the flags (produced by the Cmp) specifies the coding of the comparison op ++// by matching a specific subtype of Bool operand below, such as cmpOp. ++ ++// Comparision Code ++operand cmpOp() %{ ++ match(Bool); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++operand cmpOpEqNe() %{ ++ match(Bool); ++ predicate(n->as_Bool()->_test._test == BoolTest::ne ++ || n->as_Bool()->_test._test == BoolTest::eq); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++//----------Special Memory Operands-------------------------------------------- ++// Stack Slot Operand - This operand is used for loading and storing temporary ++// values on the stack where a match requires a value to ++// flow through memory. ++operand stackSlotP(sRegP reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotI(sRegI reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotF(sRegF reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotD(sRegD reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotL(sRegL reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++ ++//------------------------OPERAND CLASSES-------------------------------------- ++opclass memory( indirect, indOffset12, indOffset12I2L, indIndex, indIndexI2L, ++ indirectNarrow, indOffset12Narrow); ++opclass memory_loadRange(indOffset12, indirect); ++ ++opclass mRegLorI2L(mRegI2L, mRegL); ++opclass mRegIorL2I( mRegI, mRegL2I); ++ ++//----------PIPELINE----------------------------------------------------------- ++// Rules which define the behavior of the target architectures pipeline. ++ ++pipeline %{ ++ ++ //----------ATTRIBUTES--------------------------------------------------------- ++ attributes %{ ++ fixed_size_instructions; // Fixed size instructions ++ max_instructions_per_bundle = 1; // 1 instruction per bundle ++ max_bundles_per_cycle = 4; // Up to 4 bundles per cycle ++ bundle_unit_size=4; ++ instruction_unit_size = 4; // An instruction is 4 bytes long ++ instruction_fetch_unit_size = 16; // The processor fetches one line ++ instruction_fetch_units = 1; // of 16 bytes ++ ++ // List of nop instructions ++ nops( MachNop ); ++ %} ++ ++ //----------RESOURCES---------------------------------------------------------- ++ // Resources are the functional units available to the machine ++ ++ resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4, ALU1, ALU2, ALU = ALU1 | ALU2, FPU1, FPU2, FPU = FPU1 | FPU2, MEM, BR); ++ ++ //----------PIPELINE DESCRIPTION----------------------------------------------- ++ // Pipeline Description specifies the stages in the machine's pipeline ++ ++ // IF: fetch ++ // ID: decode ++ // RD: read ++ // CA: caculate ++ // WB: write back ++ // CM: commit ++ ++ pipe_desc(IF, ID, RD, CA, WB, CM); ++ ++ ++ //----------PIPELINE CLASSES--------------------------------------------------- ++ // Pipeline Classes describe the stages in which input and output are ++ // referenced by the hardware pipeline. ++ ++ //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{ ++ single_instruction; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+1; ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.19 Integer mult operation : dst <-- reg1 mult reg2 ++ pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer div operation : dst <-- reg1 div reg2 ++ pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer mod operation : dst <-- reg1 mod reg2 ++ pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{ ++ instruction_count(2); ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //no.16 load Long from memory : ++ pipe_class ialu_loadL(mRegL dst, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.17 Store Long to Memory : ++ pipe_class ialu_storeL(mRegL src, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{ ++ single_instruction; ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.3 Integer move operation : dst <-- reg ++ pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.4 No instructions : do nothing ++ pipe_class empty( ) %{ ++ instruction_count(0); ++ %} ++ ++ //No.5 UnConditional branch : ++ pipe_class pipe_jump( label labl ) %{ ++ multiple_bundles; ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //No.6 ALU Conditional branch : ++ pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //no.7 load integer from memory : ++ pipe_class ialu_loadI(mRegI dst, memory mem) %{ ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.8 Store Integer to Memory : ++ pipe_class ialu_storeI(mRegI src, memory mem) %{ ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ ++ //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ //No.22 Floating div operation : dst <-- reg1 div reg2 ++ pipe_class fpu_div(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ pipe_class fcvt_I2D(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class fcvt_D2I(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class pipe_mfc1(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ pipe_class pipe_mtc1(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD(5); ++ %} ++ ++ //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2 ++ pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ //No.11 Load Floating from Memory : ++ pipe_class fpu_loadF(regF dst, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.12 Store Floating to Memory : ++ pipe_class fpu_storeF(regF src, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.13 FPU Conditional branch : ++ pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++//No.14 Floating FPU reg operation : dst <-- op reg ++ pipe_class fpu1_regF(regF dst, regF src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ pipe_class long_memory_op() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(30); ++ %} ++ ++ pipe_class simple_call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ BR : RD; ++ %} ++ ++ pipe_class call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ %} ++ ++ //FIXME: ++ //No.9 Piple slow : for multi-instructions ++ pipe_class pipe_slow( ) %{ ++ instruction_count(20); ++ force_serialization; ++ multiple_bundles; ++ fixed_latency(50); ++ %} ++ ++%} ++ ++ ++ ++//----------INSTRUCTIONS------------------------------------------------------- ++// ++// match -- States which machine-independent subtree may be replaced ++// by this instruction. ++// ins_cost -- The estimated cost of this instruction is used by instruction ++// selection to identify a minimum cost tree of machine ++// instructions that matches a tree of machine-independent ++// instructions. ++// format -- A string providing the disassembly for this instruction. ++// The value of an instruction's operand may be inserted ++// by referring to it with a '$' prefix. ++// opcode -- Three instruction opcodes may be provided. These are referred ++// to within an encode class as $primary, $secondary, and $tertiary ++// respectively. The primary opcode is commonly used to ++// indicate the type of machine instruction, while secondary ++// and tertiary are often used for prefix options or addressing ++// modes. ++// ins_encode -- A list of encode classes with parameters. The encode class ++// name must have been defined in an 'enc_class' specification ++// in the encode section of the architecture description. ++ ++ ++// Load Integer ++instruct loadI(mRegI dst, memory mem) %{ ++ match(Set dst (LoadI mem)); ++ ++ ins_cost(125); ++ format %{ "ld_w $dst, $mem #@loadI" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadI_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadI mem))); ++ ++ ins_cost(125); ++ format %{ "ld_w $dst, $mem #@loadI_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Integer (32 bit signed) to Byte (8 bit signed) ++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem\t# int -> byte #@loadI2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) ++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "ld_bu $dst, $mem\t# int -> ubyte #@loadI2UB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Short (16 bit signed) ++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); ++ ++ ins_cost(125); ++ format %{ "ld_h $dst, $mem\t# int -> short #@loadI2S" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) ++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "ld_hu $dst, $mem\t# int -> ushort/char #@loadI2US" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Long. ++instruct loadL(mRegL dst, memory mem) %{ ++// predicate(!((LoadLNode*)n)->require_atomic_access()); ++ match(Set dst (LoadL mem)); ++ ++ ins_cost(250); ++ format %{ "ld_d $dst, $mem #@loadL" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Load Long - UNaligned ++instruct loadL_unaligned(mRegL dst, memory mem) %{ ++ match(Set dst (LoadL_unaligned mem)); ++ ++ // FIXME: Need more effective ldl/ldr ++ ins_cost(450); ++ format %{ "ld_d $dst, $mem #@loadL_unaligned\n\t" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Store Long ++instruct storeL_reg(memory mem, mRegL src) %{ ++ match(Set mem (StoreL mem src)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(200); ++ format %{ "st_d $mem, $src #@storeL_reg\n" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_reg_volatile(indirect mem, mRegL src) %{ ++ match(Set mem (StoreL mem src)); ++ ++ ins_cost(205); ++ format %{ "amswap_db_d R0, $src, $mem #@storeL_reg\n" %} ++ ins_encode %{ ++ __ amswap_db_d(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_immL_0(memory mem, immL_0 zero) %{ ++ match(Set mem (StoreL mem zero)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(180); ++ format %{ "st_d zero, $mem #@storeL_immL_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_immL_0_volatile(indirect mem, immL_0 zero) %{ ++ match(Set mem (StoreL mem zero)); ++ ++ ins_cost(185); ++ format %{ "amswap_db_d AT, R0, $mem #@storeL_immL_0" %} ++ ins_encode %{ ++ __ amswap_db_d(AT, R0, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++// Load Compressed Pointer ++instruct loadN(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadN mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# compressed ptr @ loadN" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2P(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeN (LoadN mem))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# @ loadN2P" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Pointer ++instruct loadP(mRegP dst, memory mem) %{ ++ match(Set dst (LoadP mem)); ++ ++ ins_cost(125); ++ format %{ "ld_d $dst, $mem #@loadP" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Klass Pointer ++instruct loadKlass(mRegP dst, memory mem) %{ ++ match(Set dst (LoadKlass mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load narrow Klass Pointer ++instruct loadNKlass(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadNKlass mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# compressed klass ptr @ loadNKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2PKlass(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeNKlass (LoadNKlass mem))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Constant ++instruct loadConI(mRegI dst, immI src) %{ ++ match(Set dst src); ++ ++ ins_cost(120); ++ format %{ "mov $dst, $src #@loadConI" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ int value = $src$$constant; ++ __ li(dst, value); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct loadConL(mRegL dst, immL src) %{ ++ match(Set dst src); ++ ins_cost(120); ++ format %{ "li $dst, $src @ loadConL" %} ++ ins_encode %{ ++ __ li($dst$$Register, $src$$constant); ++ %} ++ ins_pipe(ialu_regL_regL); ++%} ++ ++// Load Range ++instruct loadRange(mRegI dst, memory_loadRange mem) %{ ++ match(Set dst (LoadRange mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadRange" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct storeP(memory mem, mRegP src ) %{ ++ match(Set mem (StoreP mem src)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); ++ format %{ "st_d $src, $mem #@storeP" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP_volatile(indirect mem, mRegP src ) %{ ++ match(Set mem (StoreP mem src)); ++ ++ ins_cost(130); ++ format %{ "amswap_db_d R0, $src, $mem #@storeP" %} ++ ins_encode %{ ++ __ amswap_db_d(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store NULL Pointer, mark word, or other simple pointer constant. ++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{ ++ match(Set mem (StoreP mem zero)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); ++ format %{ "mov $mem, $zero #@storeImmP_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmP_immP_0_volatile(indirect mem, immP_0 zero) %{ ++ match(Set mem (StoreP mem zero)); ++ ++ ins_cost(130); ++ format %{ "amswap_db_d AT, R0, $mem #@storeImmP_0" %} ++ ins_encode %{ ++ __ amswap_db_d(AT, R0, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Compressed Pointer ++instruct storeN(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreN mem src)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# compressed ptr @ storeN" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeN_volatile(indirect mem, mRegN src) ++%{ ++ match(Set mem (StoreN mem src)); ++ ++ ins_cost(130); // XXX ++ format %{ "amswap_db_w R0, $src, $mem # compressed ptr @ storeN" %} ++ ins_encode %{ ++ __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2N(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreN mem (EncodeP src))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0 && !needs_releasing_store(n)); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# @ storeP2N" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2N_volatile(indirect mem, mRegP src) ++%{ ++ match(Set mem (StoreN mem (EncodeP src))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ ++ ins_cost(130); // XXX ++ format %{ "amswap_db_w R0, $src, $mem # @ storeP2N" %} ++ ins_encode %{ ++ __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeNKlass(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreNKlass mem src)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# compressed klass ptr @ storeNKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeNKlass_volatile(indirect mem, mRegN src) ++%{ ++ match(Set mem (StoreNKlass mem src)); ++ ++ ins_cost(130); ++ format %{ "amswap_db_w R0, $src, $mem # compressed klass ptr @ storeNKlass" %} ++ ins_encode %{ ++ __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2NKlass(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreNKlass mem (EncodePKlass src))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0 && !needs_releasing_store(n)); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# @ storeP2NKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2NKlass_volatile(indirect mem, mRegP src) ++%{ ++ match(Set mem (StoreNKlass mem (EncodePKlass src))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); ++ ++ ins_cost(130); ++ format %{ "amswap_db_w R0, $src, $mem # @ storeP2NKlass" %} ++ ins_encode %{ ++ __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmN_immN_0(memory mem, immN_0 zero) ++%{ ++ match(Set mem (StoreN mem zero)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); // XXX ++ format %{ "storeN0 zero, $mem\t# compressed ptr" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmN_immN_0_volatile(indirect mem, immN_0 zero) ++%{ ++ match(Set mem (StoreN mem zero)); ++ ++ ins_cost(130); // XXX ++ format %{ "amswap_db_w AT, R0, $mem # compressed ptr" %} ++ ins_encode %{ ++ __ amswap_db_w(AT, R0, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Byte ++instruct storeB_immB_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreB mem zero)); ++ ++ format %{ "mov $mem, zero #@storeB_immB_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeB(memory mem, mRegIorL2I src) %{ ++ match(Set mem (StoreB mem src)); ++ ++ ins_cost(125); ++ format %{ "st_b $src, $mem #@storeB" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Byte (8bit signed) ++instruct loadB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadB mem)); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem #@loadB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadB mem))); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem #@loadB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Byte (8bit UNsigned) ++instruct loadUB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUB mem)); ++ ++ ins_cost(125); ++ format %{ "ld_bu $dst, $mem #@loadUB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUB mem))); ++ ++ ins_cost(125); ++ format %{ "ld_bu $dst, $mem #@loadUB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16bit signed) ++instruct loadS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadS mem)); ++ ++ ins_cost(125); ++ format %{ "ld_h $dst, $mem #@loadS" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16 bit signed) to Byte (8 bit signed) ++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem\t# short -> byte #@loadS2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadS mem))); ++ ++ ins_cost(125); ++ format %{ "ld_h $dst, $mem #@loadS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Integer Immediate ++instruct storeI_immI_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreI mem zero)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(120); ++ format %{ "mov $mem, zero #@storeI_immI_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeI_immI_0_volatile(indirect mem, immI_0 zero) %{ ++ match(Set mem (StoreI mem zero)); ++ ++ ins_cost(125); ++ format %{ "amswap_db_w AT, R0, $mem #@storeI_immI_0" %} ++ ins_encode %{ ++ __ amswap_db_w(AT, R0, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Integer ++instruct storeI(memory mem, mRegIorL2I src) %{ ++ match(Set mem (StoreI mem src)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); ++ format %{ "st_w $mem, $src #@storeI" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeI_volatile(indirect mem, mRegIorL2I src) %{ ++ match(Set mem (StoreI mem src)); ++ ++ ins_cost(130); ++ format %{ "amswap_db_w R0, $src, $mem #@storeI" %} ++ ins_encode %{ ++ __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Float ++instruct loadF(regF dst, memory mem) %{ ++ match(Set dst (LoadF mem)); ++ ++ ins_cost(150); ++ format %{ "loadF $dst, $mem #@loadF" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadConP_general(mRegP dst, immP src) %{ ++ match(Set dst src); ++ ++ ins_cost(120); ++ format %{ "li $dst, $src #@loadConP_general" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ long* value = (long*)$src$$constant; ++ ++ if($src->constant_reloc() == relocInfo::metadata_type){ ++ int klass_index = __ oop_recorder()->find_index((Klass*)value); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ ++ __ relocate(rspec); ++ __ patchable_li52(dst, (long)value); ++ } else if($src->constant_reloc() == relocInfo::oop_type){ ++ int oop_index = __ oop_recorder()->find_index((jobject)value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ __ relocate(rspec); ++ __ patchable_li52(dst, (long)value); ++ } else if ($src->constant_reloc() == relocInfo::none) { ++ __ li(dst, (long)value); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{ ++ match(Set dst src); ++ ++ ins_cost(80); ++ format %{ "li $dst, $src @ loadConP_no_oop_cheap" %} ++ ++ ins_encode %{ ++ if ($src->constant_reloc() == relocInfo::metadata_type) { ++ __ mov_metadata($dst$$Register, (Metadata*)$src$$constant); ++ } else { ++ __ li($dst$$Register, $src$$constant); ++ } ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct loadConP_poll(mRegP dst, immP_poll src) %{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "li $dst, $src #@loadConP_poll" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ intptr_t value = (intptr_t)$src$$constant; ++ ++ __ li(dst, (jlong)value); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_immP_0(mRegP dst, immP_0 src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "mov $dst, R0\t# ptr" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ __ add_d(dst_reg, R0, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{ ++ match(Set dst src); ++ format %{ "move $dst, R0\t# compressed NULL ptr" %} ++ ins_encode %{ ++ __ move($dst$$Register, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN(mRegN dst, immN src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed ptr @ loadConN" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_oop(dst, (jobject)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++instruct loadConNKlass(mRegN dst, immNKlass src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed klass ptr @ loadConNKlass" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_klass(dst, (Klass*)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++//FIXME ++// Tail Call; Jump from runtime stub to Java code. ++// Also known as an 'interprocedural jump'. ++// Target of jump will eventually return to caller. ++// TailJump below removes the return address. ++instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{ ++ match(TailCall jump_target method_oop ); ++ ins_cost(300); ++ format %{ "JMP $jump_target \t# @TailCalljmpInd" %} ++ ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ Register oop = $method_oop$$Register; ++ ++ // RA will be used in generate_forward_exception() ++ __ push(RA); ++ ++ __ move(S3, oop); ++ __ jr(target); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++// Create exception oop: created by stack-crawling runtime code. ++// Created exception is now available to this handler, and is setup ++// just prior to jumping to this handler. No code emitted. ++instruct CreateException( a0_RegP ex_oop ) ++%{ ++ match(Set ex_oop (CreateEx)); ++ ++ // use the following format syntax ++ format %{ "# exception oop is in A0; no code emitted @CreateException" %} ++ ins_encode %{ ++ // X86 leaves this function empty ++ __ block_comment("CreateException is empty in LA"); ++ %} ++ ins_pipe( empty ); ++// ins_pipe( pipe_jump ); ++%} ++ ++ ++/* The mechanism of exception handling is clear now. ++ ++- Common try/catch: ++ [stubGenerator_loongarch.cpp] generate_forward_exception() ++ |- V0, V1 are created ++ |- T4 <= SharedRuntime::exception_handler_for_return_address ++ `- jr T4 ++ `- the caller's exception_handler ++ `- jr OptoRuntime::exception_blob ++ `- here ++- Rethrow(e.g. 'unwind'): ++ * The callee: ++ |- an exception is triggered during execution ++ `- exits the callee method through RethrowException node ++ |- The callee pushes exception_oop(T0) and exception_pc(RA) ++ `- The callee jumps to OptoRuntime::rethrow_stub() ++ * In OptoRuntime::rethrow_stub: ++ |- The VM calls _rethrow_Java to determine the return address in the caller method ++ `- exits the stub with tailjmpInd ++ |- pops exception_oop(V0) and exception_pc(V1) ++ `- jumps to the return address(usually an exception_handler) ++ * The caller: ++ `- continues processing the exception_blob with V0/V1 ++*/ ++ ++// Rethrow exception: ++// The exception oop will come in the first argument position. ++// Then JUMP (not call) to the rethrow stub code. ++instruct RethrowException() ++%{ ++ match(Rethrow); ++ ++ // use the following format syntax ++ format %{ "JMP rethrow_stub #@RethrowException" %} ++ ins_encode %{ ++ __ block_comment("@ RethrowException"); ++ ++ cbuf.set_insts_mark(); ++ cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec()); ++ ++ // call OptoRuntime::rethrow_stub to get the exception handler in parent method ++ __ patchable_jump((address)OptoRuntime::rethrow_stub()); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Branch Instructions --- long offset versions ++ ++// Jump Direct ++instruct jmpDir_long(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ __ jmp_far(*L); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ //ins_pc_relative(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, L, true /* signed */); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ int val = $src2$$constant; ++ ++ if (val == 0) { ++ __ cmp_branch_long(flag, op1, R0, L, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_long(flag, op1, AT, L, true /* signed */); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_long(cmpOpEqNe cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ __ bne_long($cr$$Register, R0, *L); ++ break; ++ case 0x02: //not equal ++ __ beq_long($cr$$Register, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_0_long(cmpOpEqNe cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, R0, L, true /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConN2P_0_long(cmpOpEqNe cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, R0, L, true /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConP_long(cmpOp cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_null_branch_long(cmpOpEqNe cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_long" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, R0, L, true /* signed */); ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_long" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConIU_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConIU_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_long(flag, op1, R0, L, false /* unsigned */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_long(flag, op1, AT, L, false /* unsigned */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, L, true /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_long(flag, op1, R0, L, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_long(flag, op1, AT, L, true /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, target, true /* signed */); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, target, false /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ long val = $src2$$constant; ++ ++ if (val == 0) { ++ __ cmp_branch_long(flag, op1, R0, target, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_long(flag, op1, AT, target, true /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ long val = $src2$$constant; ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_long(flag, op1, R0, target, false /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_long(flag, op1, AT, target, false /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: //not_equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: //not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// ============================================================================ ++// Branch Instructions -- short offset versions ++ ++// Jump Direct ++instruct jmpDir_short(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ if(&L) ++ __ b(L); ++ else ++ __ b(int(0)); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, L, true /* signed */); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ int val = $src2$$constant; ++ ++ if (val == 0) { ++ __ cmp_branch_short(flag, op1, R0, L, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_short(flag, op1, AT, L, true /* signed */); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_short(cmpOpEqNe cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ if (&L) ++ __ bnez($cr$$Register, L); ++ else ++ __ bnez($cr$$Register, (int)0); ++ break; ++ case 0x02: //not equal ++ if (&L) ++ __ beqz($cr$$Register, L); ++ else ++ __ beqz($cr$$Register, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_0_short(cmpOpEqNe cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branchEqNe_off21(flag, op1, L); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConN2P_0_short(cmpOpEqNe cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branchEqNe_off21(flag, op1, L); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConP_short(cmpOp cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_short" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branchEqNe_off21(flag, op1, L); ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_short" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConIU_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConIU_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_short(flag, op1, R0, L, false /* unsigned */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_short(flag, op1, AT, L, false /* unsigned */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, L, true /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_short(flag, op1, R0, L, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_short(flag, op1, AT, L, true /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, target, true /* signed */); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ Label& target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, target, false /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ long val = $src2$$constant; ++ ++ if (val == 0) { ++ __ cmp_branch_short(flag, op1, R0, target, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_short(flag, op1, AT, target, true /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ long val = $src2$$constant; ++ Label& target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_short(flag, op1, R0, target, false /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_short(flag, op1, AT, target, false /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x02: //not_equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x02: //not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++// =================== End of branch instructions ========================== ++ ++// Call Runtime Instruction ++instruct CallRuntimeDirect(method meth) %{ ++ match(CallRuntime ); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,runtime #@CallRuntimeDirect" %} ++ ins_encode( Java_To_Runtime( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_alignment(4); ++%} ++ ++ ++ ++//------------------------MemBar Instructions------------------------------- ++//Memory barrier flavors ++ ++instruct unnecessary_membar_acquire() %{ ++ predicate(unnecessary_acquire(n)); ++ match(MemBarAcquire); ++ ins_cost(0); ++ ++ format %{ "membar_acquire (elided)" %} ++ ++ ins_encode %{ ++ __ block_comment("membar_acquire (elided)"); ++ %} ++ ++ ins_pipe(empty); ++%} ++ ++instruct membar_acquire() %{ ++ match(MemBarAcquire); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-acquire @ membar_acquire" %} ++ ins_encode %{ ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ %} ++ ins_pipe(empty); ++%} ++ ++instruct load_fence() %{ ++ match(LoadFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ load_fence" %} ++ ins_encode %{ ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_acquire_lock() ++%{ ++ match(MemBarAcquireLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct unnecessary_membar_release() %{ ++ predicate(unnecessary_release(n)); ++ match(MemBarRelease); ++ ins_cost(0); ++ ++ format %{ "membar_release (elided)" %} ++ ++ ins_encode %{ ++ __ block_comment("membar_release (elided)"); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_release() %{ ++ match(MemBarRelease); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-release @ membar_release" %} ++ ++ ins_encode %{ ++ // Attention: DO NOT DELETE THIS GUY! ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct store_fence() %{ ++ match(StoreFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ store_fence" %} ++ ++ ins_encode %{ ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_release_lock() ++%{ ++ match(MemBarReleaseLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct unnecessary_membar_volatile() %{ ++ predicate(unnecessary_volatile(n)); ++ match(MemBarVolatile); ++ ins_cost(0); ++ ++ format %{ "membar_volatile (elided)" %} ++ ++ ins_encode %{ ++ __ block_comment("membar_volatile (elided)"); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_volatile() %{ ++ match(MemBarVolatile); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-volatile" %} ++ ins_encode %{ ++ if( !os::is_MP() ) return; // Not needed on single CPU ++ __ membar(__ StoreLoad); ++ ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_storestore() %{ ++ match(MemBarStoreStore); ++ ++ ins_cost(400); ++ format %{ "MEMBAR-storestore @ membar_storestore" %} ++ ins_encode %{ ++ __ membar(__ StoreStore); ++ %} ++ ins_pipe(empty); ++%} ++ ++//----------Move Instructions-------------------------------------------------- ++instruct castX2P(mRegP dst, mRegL src) %{ ++ match(Set dst (CastX2P src)); ++ format %{ "castX2P $dst, $src @ castX2P" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_cost(10); ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct castP2X(mRegL dst, mRegP src ) %{ ++ match(Set dst (CastP2X src)); ++ ++ format %{ "mov $dst, $src\t #@castP2X" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{ ++ match(Set dst (MoveF2I src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveF2I $dst, $src @ MoveF2I_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ movfr2gr_s(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{ ++ match(Set dst (MoveI2F src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveI2F $dst, $src @ MoveI2F_reg_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ movgr2fr_w(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{ ++ match(Set dst (MoveD2L src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveD2L $dst, $src @ MoveD2L_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ movfr2gr_d(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{ ++ match(Set dst (MoveL2D src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveL2D $dst, $src @ MoveL2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ movgr2fr_d(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Conditional Move--------------------------------------------------- ++// Conditional move ++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src1, mRegI src2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI src1 src2)) (Binary src1 src2))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovI_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src1, $src2 \t @cmovI_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, true); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpI_reg_reg2(mRegI dst, mRegI src1, mRegI src2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI src1 src2)) (Binary src2 src1))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovI_cmpI_reg_reg2\n" ++ "\tCMOV $dst,$src2, $src1 \t @cmovI_cmpI_reg_reg2" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, true); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpI_dst_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpI_dst_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpI_dst_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegIorL2I src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegIorL2I src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ Label L; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src1, mRegL src2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL src1 src2)) (Binary src1 src2))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovL_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src1, $src2 \t @cmovL_cmpL_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, true); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src1, mRegL src2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL src1 src2)) (Binary src1 src2))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovL_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src1, $src2 \t @cmovL_cmpUL_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpL_reg_reg2(mRegL dst, mRegL src1, mRegL src2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL src1 src2)) (Binary src2 src1))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovL_cmpL_reg_reg2\n" ++ "\tCMOV $dst,$src2, $src1 \t @cmovL_cmpL_reg_reg2" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, true); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpUL_reg_reg2(mRegL dst, mRegL src1, mRegL src2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL src1 src2)) (Binary src2 src1))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovL_cmpUL_reg_reg2\n" ++ "\tCMOV $dst,$src2, $src1 \t @cmovL_cmpUL_reg_reg2" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovL_cmpL_dst_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpL_dst_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpL_dst_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpUL_dst_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpUL_dst_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpUL_dst_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovD_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ ++ match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovF_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); ++ FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ ++ match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); ++ FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ ++ match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpP_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpP_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); ++ FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); ++ int flag = $cop$$cmpcode; ++ ++ // Use signed comparison here, because the most significant bit of the ++ // user-space virtual address must be 0. ++ __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//FIXME ++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovF_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Manifest a CmpL result in an integer register. Very painful. ++// This is the test to avoid. ++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (CmpL3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpL3 $dst, $src1, $src2 @ cmpL3_reg_reg" %} ++ ins_encode %{ ++ Register opr1 = as_Register($src1$$reg); ++ Register opr2 = as_Register($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ slt(AT, opr1, opr2); ++ __ slt(dst, opr2, opr1); ++ __ sub_d(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ++// less_rsult = -1 ++// greater_result = 1 ++// equal_result = 0 ++// nan_result = -1 ++// ++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{ ++ match(Set dst (CmpF3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpF3 $dst, $src1, $src2 @ cmpF3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ fcmp_clt_s(FCC0, src2, src1); ++ __ fcmp_cult_s(FCC1, src1, src2); ++ __ movcf2gr(dst, FCC0); ++ __ movcf2gr(AT, FCC1); ++ __ sub_d(dst, dst, AT); ++ ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{ ++ match(Set dst (CmpD3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpD3 $dst, $src1, $src2 @ cmpD3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ fcmp_clt_d(FCC0, src2, src1); ++ __ fcmp_cult_d(FCC1, src1, src2); ++ __ movcf2gr(dst, FCC0); ++ __ movcf2gr(AT, FCC1); ++ __ sub_d(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct clear_array(t8RegL cnt, t3_RegP base, Universe dummy) %{ ++ match(Set dummy (ClearArray cnt base)); ++ effect(USE_KILL cnt, USE_KILL base); ++ format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} ++ ins_encode %{ ++ //Assume cnt is the number of bytes in an array to be cleared, ++ //and base points to the starting address of the array. ++ Register base = $base$$Register; ++ Register cnt = $cnt$$Register; ++ Label Loop, done; ++ ++ __ beq(cnt, R0, done); ++ ++ __ bind(Loop); ++ __ st_d(R0, base, 0); ++ __ addi_d(cnt, cnt, -1); ++ __ addi_d(base, base, wordSize); ++ __ bne(cnt, R0, Loop); ++ ++ __ bind(done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct clear_array_imm(immL cnt, t3_RegP base, Universe dummy) %{ ++ match(Set dummy (ClearArray cnt base)); ++ effect(USE_KILL base); ++ format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} ++ ins_encode %{ ++ //Assume cnt is the number of bytes in an array to be cleared, ++ //and base points to the starting address of the array. ++ Register base = $base$$Register; ++ long cnt = $cnt$$constant; ++ Label Loop, done; ++ ++ int tmp = cnt % 8; ++ int i = 0; ++ for (; i < tmp; i++) { ++ __ st_d(R0, base, i * 8); ++ } ++ if (cnt - tmp) { ++ __ li(AT, cnt); ++ __ alsl_d(AT, AT, base, 2); ++ __ addi_d(base, base, i * 8); ++ __ bind(Loop); ++ __ st_d(R0, base, 0); ++ __ st_d(R0, base, 8); ++ __ st_d(R0, base, 16); ++ __ st_d(R0, base, 24); ++ __ st_d(R0, base, 32); ++ __ st_d(R0, base, 40); ++ __ st_d(R0, base, 48); ++ __ st_d(R0, base, 56); ++ __ addi_d(base, base, 64); ++ __ blt(base, AT, Loop); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct has_negatives(a4_RegP ary1, mA5RegI len, no_Ax_mRegI result) %{ ++ match(Set result (HasNegatives ary1 len)); ++ effect(USE_KILL ary1, USE_KILL len); ++ format %{ "has negatives byte[] ary1:$ary1, len:$len -> $result @ has_negatives" %} ++ ++ ins_encode %{ ++ __ has_negatives($ary1$$Register, $len$$Register, $result$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_indexofU_char(a4_RegP str1, mA5RegI cnt1, mA6RegI ch, no_Ax_mRegI result, mRegL tmp1, mRegL tmp2, mRegL tmp3) %{ ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ "String IndexOf char[] $str1, len:$cnt1, char:$ch, res:$result, tmp1:$tmp1, tmp2:$tmp2, tmp3:$tmp3 -> $result @ string_indexof_char" %} ++ ++ ins_encode %{ ++ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, ++ $result$$Register, $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::LL); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareU" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::UU); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareLU" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::LU); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareUL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::UL); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// fast char[] to byte[] compression ++instruct string_compress(a4_RegP src, a5_RegP dst, mA6RegI len, no_Ax_mRegI result, ++ mRegL tmp1, mRegL tmp2, mRegL tmp3) ++%{ ++ match(Set result (StrCompressedCopy src (Binary dst len))); ++ effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ "String Compress $src,$dst -> $result @ string_compress " %} ++ ins_encode %{ ++ __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, ++ $result$$Register, $tmp1$$Register, ++ $tmp2$$Register, $tmp3$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// byte[] to char[] inflation ++instruct string_inflate(Universe dummy, a4_RegP src, a5_RegP dst, mA6RegI len, ++ mRegL tmp1, mRegL tmp2) ++%{ ++ match(Set dummy (StrInflatedCopy src (Binary dst len))); ++ effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP tmp1, TEMP tmp2); ++ ++ format %{ "String Inflate $src,$dst @ string_inflate " %} ++ ins_encode %{ ++ __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, ++ $tmp1$$Register, $tmp2$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// intrinsic optimization ++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, no_Ax_mRegI result, t8RegL tmp1, t3RegL tmp2) %{ ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp1, KILL tmp2); ++ ++ format %{ "String Equal $str1, $str2, len:$cnt, tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_equals" %} ++ ins_encode %{ ++ __ arrays_equals($str1$$Register, $str2$$Register, ++ $cnt$$Register, $tmp1$$Register, $tmp2$$Register, $result$$Register, ++ false/* byte */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Arithmetic Instructions------------------------------------------- ++//----------Addition Instructions--------------------------------------------- ++instruct addI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ add_w(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addI_Reg_imm(mRegI dst, mRegIorL2I src1, immI12 src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_imm12" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ int imm = $src2$$constant; ++ ++ __ addi_w(dst, src1, imm); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addI_salI_Reg_Reg_immI_1_4(mRegI dst, mRegI src1, mRegI src2, immI_1_4 shift) %{ ++ match(Set dst (AddI src1 (LShiftI src2 shift))); ++ ++ format %{ "alsl $dst, $src1, $src2, $shift #@addI_salI_Reg_Reg_immI_1_4" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ int sh = $shift$$constant; ++ __ alsl_w(dst, src2, src1, sh - 1); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegLorI2L src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addP_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ add_d(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_reg_M8(mRegP dst, mRegP src1, mRegLorI2L src2, immL_M8 M8) %{ ++ match(Set dst (AddP src1 (AndL src2 M8))); ++ format %{ "dadd $dst, $src1, $src2 #@addP_reg_reg_M8" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ bstrins_d(src2, R0, 2, 0); ++ __ add_d(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_imm12(mRegP dst, mRegP src1, immL12 src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addP_reg_imm12" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ long src2 = $src2$$constant; ++ Register dst = $dst$$Register; ++ ++ __ addi_d(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct addP_salL_Reg_RegI2L_immI_1_4(mRegP dst, mRegP src1, mRegI src2, immI_1_4 shift) %{ ++ match(Set dst (AddP src1 (LShiftL (ConvI2L src2) shift))); ++ ++ format %{ "alsl $dst, $src1, $src2, $shift #@addP_salL_Reg_RegI2L_immI_1_4" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ int sh = $shift$$constant; ++ __ alsl_d(dst, src2, src1, sh - 1); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Add Long Register with Register ++instruct addL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (AddL src1 src2)); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_Reg\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ add_d(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_Reg_imm(mRegL dst, mRegLorI2L src1, immL12 src2) ++%{ ++ match(Set dst (AddL src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_imm " %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ int src2_imm = $src2$$constant; ++ ++ __ addi_d(dst_reg, src1_reg, src2_imm); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//----------Abs Instructions------------------------------------------- ++ ++// Integer Absolute Instructions ++instruct absI_rReg(mRegI dst, mRegI src) ++%{ ++ match(Set dst (AbsI src)); ++ effect(TEMP dst); ++ format %{ "AbsI $dst, $src" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ srai_w(AT, src, 31); ++ __ xorr(dst, src, AT); ++ __ sub_w(dst, dst, AT); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Long Absolute Instructions ++instruct absL_rReg(mRegL dst, mRegLorI2L src) ++%{ ++ match(Set dst (AbsL src)); ++ effect(TEMP dst); ++ format %{ "AbsL $dst, $src" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ srai_d(AT, src, 63); ++ __ xorr(dst, src, AT); ++ __ sub_d(dst, dst, AT); ++ %} ++ ++ ins_pipe(ialu_regL_regL); ++%} ++ ++//----------Subtraction Instructions------------------------------------------- ++// Integer Subtraction Instructions ++instruct subI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(100); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ sub_w(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subI_Reg_immI_M2047_2048(mRegI dst, mRegIorL2I src1, immI_M2047_2048 src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_immI_M2047_2048" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ addi_w(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negI_Reg(mRegI dst, immI_0 zero, mRegIorL2I src) %{ ++ match(Set dst (SubI zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negI_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ sub_w(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negL_Reg(mRegL dst, immL_0 zero, mRegLorI2L src) %{ ++ match(Set dst (SubL zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negL_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ sub_d(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subL_Reg_immL_M2047_2048(mRegL dst, mRegL src1, immL_M2047_2048 src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subL_Reg_immL_M2047_2048" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ addi_d(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Subtract Long Register with Register. ++instruct subL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(100); ++ format %{ "SubL $dst, $src1, $src2 @ subL_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ sub_d(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Integer MOD with Register ++instruct modI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{ ++ match(Set dst (ModI src1 src2)); ++ ins_cost(300); ++ format %{ "modi $dst, $src1, $src2 @ modI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ mod_w(dst, src1, src2); ++ %} ++ ++ //ins_pipe( ialu_mod ); ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct modL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (ModL src1 src2)); ++ format %{ "modL $dst, $src1, $src2 @modL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ mod_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (MulI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "mul $dst, $src1, $src2 @ mulI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ __ mul_w(dst, src1, src2); ++ %} ++ ins_pipe( ialu_mult ); ++%} ++ ++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (DivI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "div $dst, $src1, $src2 @ divI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ __ div_w(dst, src1, src2); ++ ++ %} ++ ins_pipe( ialu_mod ); ++%} ++ ++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (DivF src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divF $dst, $src1, $src2 @ divF_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fdiv_s(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (DivD src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divD $dst, $src1, $src2 @ divD_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fdiv_d(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (MulL src1 src2)); ++ format %{ "mulL $dst, $src1, $src2 @mulL_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ mul_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulHiL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (MulHiL src1 src2)); ++ format %{ "mulHiL $dst, $src1, $src2 @mulL_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ mulh_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (DivL src1 src2)); ++ format %{ "divL $dst, $src1, $src2 @divL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ div_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (AddF src1 src2)); ++ format %{ "AddF $dst, $src1, $src2 @addF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fadd_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (SubF src1 src2)); ++ format %{ "SubF $dst, $src1, $src2 @subF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsub_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (AddD src1 src2)); ++ format %{ "AddD $dst, $src1, $src2 @addD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fadd_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (SubD src1 src2)); ++ format %{ "SubD $dst, $src1, $src2 @subD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsub_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negF_reg(regF dst, regF src) %{ ++ match(Set dst (NegF src)); ++ format %{ "negF $dst, $src @negF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fneg_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negD_reg(regD dst, regD src) %{ ++ match(Set dst (NegD src)); ++ format %{ "negD $dst, $src @negD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fneg_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (MulF src1 src2)); ++ format %{ "MULF $dst, $src1, $src2 @mulF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fmul_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// Mul two double precision floating piont number ++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (MulD src1 src2)); ++ format %{ "MULD $dst, $src1, $src2 @mulD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fmul_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct absF_reg(regF dst, regF src) %{ ++ match(Set dst (AbsF src)); ++ ins_cost(100); ++ format %{ "absF $dst, $src @absF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fabs_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// intrinsics for math_native. ++// AbsD SqrtD CosD SinD TanD LogD Log10D ++ ++instruct absD_reg(regD dst, regD src) %{ ++ match(Set dst (AbsD src)); ++ ins_cost(100); ++ format %{ "absD $dst, $src @absD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fabs_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtD_reg(regD dst, regD src) %{ ++ match(Set dst (SqrtD src)); ++ ins_cost(100); ++ format %{ "SqrtD $dst, $src @sqrtD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsqrt_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtF_reg(regF dst, regF src) %{ ++ match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); ++ ins_cost(100); ++ format %{ "SqrtF $dst, $src @sqrtF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsqrt_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// src1 * src2 + src3 ++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary src1 src2))); ++ ++ format %{ "fmadd_s $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 + src3 ++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary src1 src2))); ++ ++ format %{ "fmadd_d $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 - src3 ++instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary src1 src2))); ++ ++ format %{ "fmsub_s $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 - src3 ++instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary src1 src2))); ++ ++ format %{ "fmsub_d $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 - src3 ++instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); ++ match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); ++ ++ format %{ "fnmadds $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fnmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 - src3 ++instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); ++ match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); ++ ++ format %{ "fnmaddd $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fnmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 + src3 ++instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary (NegF src1) src2))); ++ match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); ++ ++ format %{ "fnmsubs $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fnmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 + src3 ++instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary (NegD src1) src2))); ++ match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); ++ ++ format %{ "fnmsubd $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fnmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++instruct copySignF_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (CopySignF src1 src2)); ++ effect(TEMP_DEF dst, USE src1, USE src2); ++ ++ format %{ "fcopysign_s $dst $src1 $src2 @ copySignF_reg" %} ++ ++ ins_encode %{ ++ __ fcopysign_s($dst$$FloatRegister, ++ $src1$$FloatRegister, ++ $src2$$FloatRegister); ++ %} ++ ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct copySignD_reg(regD dst, regD src1, regD src2, immD_0 zero) %{ ++ match(Set dst (CopySignD src1 (Binary src2 zero))); ++ effect(TEMP_DEF dst, USE src1, USE src2); ++ ++ format %{ "fcopysign_d $dst $src1 $src2 @ copySignD_reg" %} ++ ++ ins_encode %{ ++ __ fcopysign_d($dst$$FloatRegister, ++ $src1$$FloatRegister, ++ $src2$$FloatRegister); ++ %} ++ ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++//----------------------------------Logical Instructions---------------------- ++//__________________________________Integer Logical Instructions------------- ++ ++//And Instuctions ++// And Register with Immediate ++instruct andI_Reg_imm_0_4095(mRegI dst, mRegI src1, immI_0_4095 src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ bstrpick_w(dst, src, size-1, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1, immL_nonneg_mask mask) %{ ++ match(Set dst (AndL src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_jlong_mask($mask$$constant); ++ ++ __ bstrpick_d(dst, src, size-1, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_imm_0_4095(mRegI dst, mRegI src1, immI_0_4095 src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorI_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_immI_M1(mRegI dst, mRegIorL2I src1, immI_M1 M1) %{ ++ match(Set dst (XorI src1 M1)); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorI_Reg_immI_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ orn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorL_Reg_imm_0_4095(mRegL dst, mRegL src1, immL_0_4095 src2) %{ ++ match(Set dst (XorL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorL_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct lbu_and_lmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI mask (LoadB mem))); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_lmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct lbu_and_rmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadB mem) mask)); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_rmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct andI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ andr(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI src1 (XorI src2 M1))); ++ ++ format %{ "andn $dst, $src1, $src2 #@andnI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ andn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI src1 (XorI src2 M1))); ++ ++ format %{ "orn $dst, $src1, $src2 #@ornI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ orn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI (XorI src1 M1) src2)); ++ ++ format %{ "andn $dst, $src2, $src1 #@andnI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ andn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI (XorI src1 M1) src2)); ++ ++ format %{ "orn $dst, $src2, $src1 #@ornI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ orn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// And Long Register with Register ++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegLorI2L src2) %{ ++ match(Set dst (AndL src1 src2)); ++ format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ andr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct andL_Reg_imm_0_4095(mRegL dst, mRegL src1, immL_0_4095 src2) %{ ++ match(Set dst (AndL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL2I_Reg_imm_0_4095(mRegI dst, mRegL src1, immL_0_4095 src2) %{ ++ match(Set dst (ConvL2I (AndL src1 src2))); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL2I_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct andL_Reg_immL_M8(mRegL dst, immL_M8 M8) %{ ++ match(Set dst (AndL dst M8)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M8 #@andL_Reg_immL_M8" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 2, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M5(mRegL dst, immL_M5 M5) %{ ++ match(Set dst (AndL dst M5)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M5 #@andL_Reg_immL_M5" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 2, 2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M7(mRegL dst, immL_M7 M7) %{ ++ match(Set dst (AndL dst M7)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M7 #@andL_Reg_immL_M7" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 2, 1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M4(mRegL dst, immL_M4 M4) %{ ++ match(Set dst (AndL dst M4)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M4 #@andL_Reg_immL_M4" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 1, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M121(mRegL dst, immL_M121 M121) %{ ++ match(Set dst (AndL dst M121)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M121 #@andL_Reg_immL_M121" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 6, 3); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Long Register with Register ++instruct orL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (OrL src1 src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegLorI2L src2) %{ ++ match(Set dst (OrL (CastP2X src1) src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Long Register with Register ++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (XorL src1 src2)); ++ format %{ "XOR $dst, $src1, $src2 @ xorL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ xorr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left by 5-bit immediate ++instruct salI_Reg_imm(mRegI dst, mRegIorL2I src, immIU5 shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ slli_w(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{ ++ match(Set dst (AndI (LShiftI src shift) mask)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm_and_M65536" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ slli_w(dst, src, 16); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen)); ++ ++ format %{ "andi $dst, $src, 7\t# @land7_2_s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ andi(dst, src, 7); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. ++// This idiom is used by the compiler the i2s bytecode. ++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); ++ ++ format %{ "i2s $dst, $src\t# @i2s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ ext_w_h(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. ++// This idiom is used by the compiler for the i2b bytecode. ++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour) ++%{ ++ match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); ++ ++ format %{ "i2b $dst, $src\t# @i2b" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ ext_w_b(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immIU5 shift) %{ ++ match(Set dst (LShiftI (ConvL2I src) shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_RegL2I_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ slli_w(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Shift Left by 8-bit immediate ++instruct salI_Reg_Reg(mRegI dst, mRegIorL2I src, mRegI shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shamt = $shift$$Register; ++ __ sll_w(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++// Shift Left Long 6-bit immI ++instruct salL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ slli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left Long ++instruct salL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ sll_d(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long 6-bit ++instruct sarL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srai_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (RShiftL src shift))); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srai_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long arithmetically ++instruct sarL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ sra_d(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long logically ++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(100); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ srl_d(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegLorI2L src, immI_0_31 shift, immI_MaxI max_int) %{ ++ match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int)); ++ ins_cost(80); ++ format %{ "bstrpick_d $dst, $src, $shift+30, shift @ slrL_Reg_immI_0_31_and_max_int" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ bstrpick_d(dst_reg, src_reg, shamt+30, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (URShiftL src shift))); ++ predicate(n->in(1)->in(2)->get_int() > 32); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_convL2I" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Instructions ++// Xor Register with Register ++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ++ format %{ "XOR $dst, $src1, $src2 #@xorI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ xorr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Instructions ++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_4095 src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_imm" %} ++ ins_encode %{ ++ __ ori($dst$$Register, $src1$$Register, $src2$$constant); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Register with Register ++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{ ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift))); ++ predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()))); ++ ++ format %{ "rotri_w $dst, $src, 1 ...\n\t" ++ "srli_w $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int rshift = $rshift$$constant; ++ ++ __ rotri_w(dst, src, 1); ++ if (rshift - 1) { ++ __ srli_w(dst, dst, rshift - 1); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{ ++ match(Set dst (OrI src1 (CastP2X src2))); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_castP2X" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right by 5-bit immediate ++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{ ++ match(Set dst (URShiftI src shift)); ++ //effect(KILL cr); ++ ++ format %{ "SRLI_W $dst, $src, $shift #@shr_logical_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ ++ __ srli_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI (URShiftI src shift) mask)); ++ ++ format %{ "bstrpick_w $dst, $src, $shift+one-bits($mask)-1, shift #@shr_logical_Reg_imm_nonneg_mask" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int pos = $shift$$constant; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ bstrpick_w(dst, src, pos+size-1, pos); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_w $dst, $src, $rshift #@rolI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_w(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_32_63 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rolL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_0_31 lshift, immI_32_63 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rolL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_w $dst, $src, $rshift #@rorI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_w(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 rshift, immI_32_63 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rorL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rorL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right ++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (URShiftI src shift)); ++ ++ format %{ "SRL_W $dst, $src, $shift #@shr_logical_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ srl_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRAI_W $dst, $src, $shift #@shr_arith_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ __ srai_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRA_W $dst, $src, $shift #@shr_arith_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ sra_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++//----------Convert Int to Boolean--------------------------------------------- ++ ++instruct convI2B(mRegI dst, mRegI src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convI2B $dst, $src @ convI2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, src); ++ } else { ++ __ move(AT, src); ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct convI2L_reg( mRegL dst, mRegI src) %{ ++ match(Set dst (ConvI2L src)); ++ ++ ins_cost(100); ++ format %{ "SLLI_W $dst, $src @ convI2L_reg\t" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if(dst != src) __ slli_w(dst, src, 0); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct convL2I_reg( mRegI dst, mRegLorI2L src ) %{ ++ match(Set dst (ConvL2I src)); ++ ++ format %{ "MOV $dst, $src @ convL2I_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ slli_w(dst, src, 0); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct convL2D_reg( regD dst, mRegL src ) %{ ++ match(Set dst (ConvL2D src)); ++ format %{ "convL2D $dst, $src @ convL2D_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ movgr2fr_d(dst, src); ++ __ ffint_d_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++// Convert double to int. ++// If the double is NaN, stuff a zero in instead. ++instruct convD2I_reg_reg(mRegI dst, regD src, regD tmp) %{ ++ match(Set dst (ConvD2I src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convd2i $dst, $src, using $tmp as TEMP @ convD2I_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_w_d($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convD2L_reg_reg(mRegL dst, regD src, regD tmp) %{ ++ match(Set dst (ConvD2L src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convd2l $dst, $src, using $tmp as TEMP @ convD2L_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_l_d($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// Convert float to int. ++// If the float is NaN, stuff a zero in instead. ++instruct convF2I_reg_reg(mRegI dst, regF src, regF tmp) %{ ++ match(Set dst (ConvF2I src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convf2i $dst, $src, using $tmp as TEMP @ convF2I_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_w_s($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convF2L_reg_reg(mRegL dst, regF src, regF tmp) %{ ++ match(Set dst (ConvF2L src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convf2l $dst, $src, using $tmp as TEMP @ convF2L_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_l_s($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convL2F_reg( regF dst, mRegL src ) %{ ++ match(Set dst (ConvL2F src)); ++ format %{ "convl2f $dst, $src @ convL2F_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ Register src = as_Register($src$$reg); ++ Label L; ++ ++ __ movgr2fr_d(dst, src); ++ __ ffint_s_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convI2F_reg( regF dst, mRegI src ) %{ ++ match(Set dst (ConvI2F src)); ++ format %{ "convi2f $dst, $src @ convI2F_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ movgr2fr_w(dst, src); ++ __ ffint_s_w(dst, dst); ++ %} ++ ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{ ++ match(Set dst (CmpLTMask p zero)); ++ ins_cost(100); ++ ++ format %{ "srai_w $dst, $p, 31 @ cmpLTMask_immI_0" %} ++ ins_encode %{ ++ Register src = $p$$Register; ++ Register dst = $dst$$Register; ++ ++ __ srai_w(dst, src, 31); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{ ++ match(Set dst (CmpLTMask p q)); ++ ins_cost(400); ++ ++ format %{ "cmpLTMask $dst, $p, $q @ cmpLTMask" %} ++ ins_encode %{ ++ Register p = $p$$Register; ++ Register q = $q$$Register; ++ Register dst = $dst$$Register; ++ ++ __ slt(dst, p, q); ++ __ sub_d(dst, R0, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convP2B(mRegI dst, mRegP src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convP2B $dst, $src @ convP2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, src); ++ } else { ++ __ move(AT, src); ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct convI2D_reg_reg(regD dst, mRegI src) %{ ++ match(Set dst (ConvI2D src)); ++ format %{ "conI2D $dst, $src @convI2D_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ __ movgr2fr_w(dst ,src); ++ __ ffint_d_w(dst, dst); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convF2D_reg_reg(regD dst, regF src) %{ ++ match(Set dst (ConvF2D src)); ++ format %{ "convF2D $dst, $src\t# @convF2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ fcvt_d_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convD2F_reg_reg(regF dst, regD src) %{ ++ match(Set dst (ConvD2F src)); ++ format %{ "convD2F $dst, $src\t# @convD2F_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ fcvt_s_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// Convert oop pointer into compressed form ++instruct encodeHeapOop(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop $dst,$src" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ encode_heap_oop(dst, src); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %} ++ ins_encode %{ ++ __ encode_heap_oop_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && ++ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ ++ __ decode_heap_oop(d, s); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || ++ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_heap_oop_not_null(d, s); ++ } else { ++ __ decode_heap_oop_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{ ++ match(Set dst (EncodePKlass src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %} ++ ins_encode %{ ++ __ encode_klass_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{ ++ match(Set dst (DecodeNKlass src)); ++ format %{ "decode_heap_klass_not_null $dst,$src" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_klass_not_null(d, s); ++ } else { ++ __ decode_klass_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//FIXME ++instruct tlsLoadP(mRegP dst) %{ ++ match(Set dst (ThreadLocal)); ++ ++ ins_cost(0); ++ format %{ " get_thread in $dst #@tlsLoadP" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++#ifdef OPT_THREAD ++ __ move(dst, TREG); ++#else ++ __ get_thread(dst); ++#endif ++ %} ++ ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct checkCastPP( mRegP dst ) %{ ++ match(Set dst (CheckCastPP dst)); ++ ++ format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_pipe( empty ); ++%} ++ ++instruct castPP(mRegP dst) ++%{ ++ match(Set dst (CastPP dst)); ++ ++ size(0); ++ format %{ "# castPP of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_pipe(empty); ++%} ++ ++instruct castII( mRegI dst ) %{ ++ match(Set dst (CastII dst)); ++ format %{ "#castII of $dst empty encoding" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_cost(0); ++ ins_pipe( empty ); ++%} ++ ++// Return Instruction ++// Remove the return address & jump to it. ++instruct Ret() %{ ++ match(Return); ++ format %{ "RET #@Ret" %} ++ ++ ins_encode %{ ++ __ jr(RA); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++ ++ ++// Tail Jump; remove the return address; jump to target. ++// TailCall above leaves the return address around. ++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). ++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a ++// "restore" before this instruction (in Epilogue), we need to materialize it ++// in %i0. ++//FIXME ++instruct tailjmpInd(no_Ax_mRegP jump_target, mRegP ex_oop) %{ ++ match( TailJump jump_target ex_oop ); ++ ins_cost(200); ++ format %{ "Jmp $jump_target ; ex_oop = $ex_oop #@tailjmpInd" %} ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ ++ // V0, V1 are indicated in: ++ // [stubGenerator_loongarch.cpp] generate_forward_exception() ++ // [runtime_loongarch.cpp] OptoRuntime::generate_exception_blob() ++ // ++ Register oop = $ex_oop$$Register; ++ Register exception_oop = V0; ++ Register exception_pc = V1; ++ ++ __ move(exception_pc, RA); ++ __ move(exception_oop, oop); ++ ++ __ jr(target); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Procedure Call/Return Instructions ++// Call Java Static Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallStaticJavaDirect(method meth) %{ ++ match(CallStaticJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,static #@CallStaticJavaDirect " %} ++ ins_encode( Java_Static_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++// Call Java Dynamic Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallDynamicJavaDirect(method meth) %{ ++ match(CallDynamicJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t" ++ "CallDynamic @ CallDynamicJavaDirect" %} ++ ins_encode( Java_Dynamic_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++instruct CallLeafNoFPDirect(method meth) %{ ++ match(CallLeafNoFP); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF_NOFP,runtime " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++// Prefetch instructions for allocation. ++ ++instruct prefetchAlloc(memory mem) %{ ++ match(PrefetchAllocation mem); ++ ins_cost(125); ++ format %{ "preld $mem\t# Prefetch allocation @ prefetchAlloc" %} ++ ins_encode %{ ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if (index != 0) { ++ if (scale == 0) { ++ __ add_d(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1); ++ } ++ ++ if (Assembler::is_simm(disp, 12)) { ++ __ preld(8, AT, disp); ++ } else { ++ __ li(T4, disp); ++ __ add_d(AT, AT, T4); ++ __ preld(8, AT, 0); ++ } ++ } else { ++ if (Assembler::is_simm(disp, 12)) { ++ __ preld(8, as_Register(base), disp); ++ } else { ++ __ li(T4, disp); ++ __ add_d(AT, as_Register(base), T4); ++ __ preld(8, AT, 0); ++ } ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// Call runtime without safepoint ++instruct CallLeafDirect(method meth) %{ ++ match(CallLeaf); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF,runtime #@CallLeafDirect " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++// Load Char (16bit unsigned) ++instruct loadUS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUS mem)); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadC" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUS mem))); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadUS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Char (16bit unsigned) ++instruct storeC(memory mem, mRegIorL2I src) %{ ++ match(Set mem (StoreC mem src)); ++ ++ ins_cost(125); ++ format %{ "storeC $src, $mem @ storeC" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeC_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreC mem zero)); ++ ++ ins_cost(125); ++ format %{ "storeC $zero, $mem @ storeC_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct loadConF_immF_0(regF dst, immF_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConF_immF_0\n"%} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ movgr2fr_w(dst, R0); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConF(regF dst, immF src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "fld_s $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm(con_offset, 12)) { ++ __ fld_s($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ li(AT, con_offset); ++ __ fldx_s($dst$$FloatRegister, $constanttablebase, AT); ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConD_immD_0(regD dst, immD_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConD_immD_0"%} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ movgr2fr_d(dst, R0); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++instruct loadConD(regD dst, immD src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "fld_d $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm(con_offset, 12)) { ++ __ fld_d($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ li(AT, con_offset); ++ __ fldx_d($dst$$FloatRegister, $constanttablebase, AT); ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++// Store register Float value (it is faster than store from FPU register) ++instruct storeF_reg( memory mem, regF src) %{ ++ match(Set mem (StoreF mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeF_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeF_immF_0( memory mem, immF_0 zero) %{ ++ match(Set mem (StoreF mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeF_immF_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Double ++instruct loadD(regD dst, memory mem) %{ ++ match(Set dst (LoadD mem)); ++ ++ ins_cost(150); ++ format %{ "loadD $dst, $mem #@loadD" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Double - UNaligned ++instruct loadD_unaligned(regD dst, memory mem ) %{ ++ match(Set dst (LoadD_unaligned mem)); ++ ins_cost(250); ++ // FIXME: Need more effective ldl/ldr ++ format %{ "loadD_unaligned $dst, $mem #@loadD_unaligned" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeD_reg( memory mem, regD src) %{ ++ match(Set mem (StoreD mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeD_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeD_immD_0( memory mem, immD_0 zero) %{ ++ match(Set mem (StoreD mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeD_immD_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct loadSSI(mRegI dst, stackSlotI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld_w $dst, $src\t# int stk @ loadSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSI) !"); ++ __ ld_w($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSI(stackSlotI dst, mRegI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "st_w $dst, $src\t# int stk @ storeSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSI) !"); ++ __ st_w($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSL(mRegL dst, stackSlotL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld_d $dst, $src\t# long stk @ loadSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSL) !"); ++ __ ld_d($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSL(stackSlotL dst, mRegL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "st_d $dst, $src\t# long stk @ storeSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSL) !"); ++ __ st_d($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSP(mRegP dst, stackSlotP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld_d $dst, $src\t# ptr stk @ loadSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSP) !"); ++ __ ld_d($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSP(stackSlotP dst, mRegP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sd $dst, $src\t# ptr stk @ storeSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSP) !"); ++ __ st_d($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSF(regF dst, stackSlotF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "fld_s $dst, $src\t# float stk @ loadSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSF) !"); ++ __ fld_s($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSF(stackSlotF dst, regF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "fst_s $dst, $src\t# float stk @ storeSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSF) !"); ++ __ fst_s($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++// Use the same format since predicate() can not be used here. ++instruct loadSSD(regD dst, stackSlotD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "fld_d $dst, $src\t# double stk @ loadSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSD) !"); ++ __ fld_d($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSD(stackSlotD dst, regD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "fst_d $dst, $src\t# double stk @ storeSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSD) !"); ++ __ fst_d($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastLock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %} ++ ins_encode %{ ++ __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastUnlock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %} ++ ins_encode %{ ++ __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++// Store CMS card-mark Immediate 0 ++instruct storeImmCM_order(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreCM mem zero)); ++ predicate(UseConcMarkSweepGC && !UseCondCardMark); ++ ins_cost(100); ++ format %{ "StoreCM MEMBAR storestore\n\t" ++ "st_b $mem, zero\t! card-mark imm0" %} ++ ins_encode %{ ++ __ membar(__ StoreStore); ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmCM(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreCM mem zero)); ++ ++ ins_cost(150); ++ format %{ "st_b $mem, zero\t! card-mark imm0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Die now ++instruct ShouldNotReachHere( ) ++%{ ++ match(Halt); ++ ins_cost(300); ++ ++ // Use the following format syntax ++ format %{ "ILLTRAP ;#@ShouldNotReachHere" %} ++ ins_encode %{ ++ if (is_reachable()) { ++ // Here we should emit illtrap! ++ __ stop("ShouldNotReachHere"); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++instruct leaP12Narrow(mRegP dst, indOffset12Narrow mem) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# ptr off12narrow @ leaP12Narrow" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ int disp = $mem$$disp; ++ ++ __ addi_d(dst, base, disp); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct leaPIdxScale(mRegP dst, mRegP reg, mRegLorI2L lreg, immI_0_3 scale) ++%{ ++ match(Set dst (AddP reg (LShiftL lreg scale))); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, [$reg + $lreg << $scale]\t# @ leaPIdxScale" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = $reg$$Register; ++ Register index = $lreg$$Register; ++ int scale = $scale$$constant; ++ ++ if (scale == 0) { ++ __ add_d($dst$$Register, $reg$$Register, index); ++ } else { ++ __ alsl_d(dst, index, base, scale - 1); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++ ++// ============================================================================ ++// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass ++// array for an instance of the superklass. Set a hidden internal cache on a ++// hit (cache is checked with exposed code in gen_subtype_check()). Return ++// NZ for a miss or zero for a hit. The encoding ALSO sets flags. ++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{ ++ match(Set result (PartialSubtypeCheck sub super)); ++ effect(KILL tmp); ++ ins_cost(1100); // slightly larger than the next version ++ format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %} ++ ++ ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) ); ++ ins_pipe( pipe_slow ); ++%} ++ ++// Conditional-store of the updated heap-top. ++// Used during allocation of the shared heap. ++ ++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{ ++ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); ++ ++ format %{ "move AT, $newval\n\t" ++ "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t" ++ "move $cr, AT\n" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp); ++ ++ int index = $heap_top_ptr$$index; ++ int scale = $heap_top_ptr$$scale; ++ int disp = $heap_top_ptr$$disp; ++ ++ guarantee(Assembler::is_simm(disp, 12), ""); ++ ++ if (index != 0) { ++ __ stop("in storePConditional: index != 0"); ++ } else { ++ __ move(AT, newval); ++ __ sc_d(AT, addr); ++ __ move($cr$$Register, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of an int value. ++// AT flag is set on success, reset otherwise. ++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{ ++ match(Set cr (StoreIConditional mem (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, $mem, $oldval \t# @storeIConditional" %} ++ ++ ins_encode %{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm(disp, 12), ""); ++ ++ if (index != 0) { ++ __ stop("in storeIConditional: index != 0"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg32(addr, oldval, newval, cr, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(cr, AT); ++ } ++ } ++ %} ++ ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of a long value. ++// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. ++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr) ++%{ ++ match(Set cr (StoreLConditional mem (Binary oldval newval))); ++ ++ format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm(disp, 12), ""); ++ ++ if (index != 0) { ++ __ stop("in storeIConditional: index != 0"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg(addr, oldval, newval, cr, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(cr, AT); ++ } ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Implement LoadPLocked. Must be ordered against changes of the memory location ++// by storePConditional. ++instruct loadPLocked(mRegP dst, memory mem) %{ ++ match(Set dst (LoadPLocked mem)); ++ ins_cost(MEMORY_REF_COST); ++ ++ format %{ "ll_d $dst, $mem #@loadPLocked\n\t" %} ++ size(12); ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{ ++ match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); ++ ins_cost(3 * MEMORY_REF_COST); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{ ++ predicate(VM_Version::supports_cx8()); ++ match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); ++ ins_cost(3 * MEMORY_REF_COST); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{ ++ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); ++ ins_cost(3 * MEMORY_REF_COST); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{ ++ match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); ++ ins_cost(3 * MEMORY_REF_COST); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, false, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, false, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct get_and_setI(indirect mem, mRegI newv, mRegI prev) %{ ++ match(Set prev (GetAndSetI mem newv)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amswap_db_w $prev, $newv, [$mem]" %} ++ ins_encode %{ ++ Register prev = $prev$$Register; ++ Register newv = $newv$$Register; ++ Register addr = as_Register($mem$$base); ++ if (prev == newv || prev == addr) { ++ __ amswap_db_w(AT, newv, addr); ++ __ move(prev, AT); ++ } else { ++ __ amswap_db_w(prev, newv, addr); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_setL(indirect mem, mRegL newv, mRegL prev) %{ ++ match(Set prev (GetAndSetL mem newv)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amswap_db_d $prev, $newv, [$mem]" %} ++ ins_encode %{ ++ Register prev = $prev$$Register; ++ Register newv = $newv$$Register; ++ Register addr = as_Register($mem$$base); ++ if (prev == newv || prev == addr) { ++ __ amswap_db_d(AT, newv, addr); ++ __ move(prev, AT); ++ } else { ++ __ amswap_db_d(prev, newv, addr); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_setN(indirect mem, mRegN newv, mRegN prev) %{ ++ match(Set prev (GetAndSetN mem newv)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amswap_db_w $prev, $newv, [$mem]" %} ++ ins_encode %{ ++ Register prev = $prev$$Register; ++ Register newv = $newv$$Register; ++ Register addr = as_Register($mem$$base); ++ __ amswap_db_w(AT, newv, addr); ++ __ bstrpick_d(prev, AT, 31, 0); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_setP(indirect mem, mRegP newv, mRegP prev) %{ ++ match(Set prev (GetAndSetP mem newv)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amswap_db_d $prev, $newv, [$mem]" %} ++ ins_encode %{ ++ Register prev = $prev$$Register; ++ Register newv = $newv$$Register; ++ Register addr = as_Register($mem$$base); ++ if (prev == newv || prev == addr) { ++ __ amswap_db_d(AT, newv, addr); ++ __ move(prev, AT); ++ } else { ++ __ amswap_db_d(prev, newv, addr); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_addL(indirect mem, mRegL newval, mRegL incr) %{ ++ match(Set newval (GetAndAddL mem incr)); ++ ins_cost(2 * MEMORY_REF_COST + 1); ++ format %{ "amadd_db_d $newval, [$mem], $incr" %} ++ ins_encode %{ ++ Register newv = $newval$$Register; ++ Register incr = $incr$$Register; ++ Register addr = as_Register($mem$$base); ++ if (newv == incr || newv == addr) { ++ __ amadd_db_d(AT, incr, addr); ++ __ move(newv, AT); ++ } else { ++ __ amadd_db_d(newv, incr, addr); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_addL_no_res(indirect mem, Universe dummy, mRegL incr) %{ ++ predicate(n->as_LoadStore()->result_not_used()); ++ match(Set dummy (GetAndAddL mem incr)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amadd_db_d [$mem], $incr" %} ++ ins_encode %{ ++ __ amadd_db_d(R0, $incr$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_addI(indirect mem, mRegI newval, mRegIorL2I incr) %{ ++ match(Set newval (GetAndAddI mem incr)); ++ ins_cost(2 * MEMORY_REF_COST + 1); ++ format %{ "amadd_db_w $newval, [$mem], $incr" %} ++ ins_encode %{ ++ Register newv = $newval$$Register; ++ Register incr = $incr$$Register; ++ Register addr = as_Register($mem$$base); ++ if (newv == incr || newv == addr) { ++ __ amadd_db_w(AT, incr, addr); ++ __ move(newv, AT); ++ } else { ++ __ amadd_db_w(newv, incr, addr); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_addI_no_res(indirect mem, Universe dummy, mRegIorL2I incr) %{ ++ predicate(n->as_LoadStore()->result_not_used()); ++ match(Set dummy (GetAndAddI mem incr)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amadd_db_w [$mem], $incr" %} ++ ins_encode %{ ++ __ amadd_db_w(R0, $incr$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++//----------Max and Min-------------------------------------------------------- ++ ++// Min Register with Register (generic version) ++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MinI dst src)); ++ //effect(KILL flags); ++ ins_cost(80); ++ ++ format %{ "MIN $dst, $src @minI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, src, dst); ++ __ masknez(dst, dst, AT); ++ __ maskeqz(AT, src, AT); ++ __ OR(dst, dst, AT); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// Max Register with Register (generic version) ++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MaxI dst src)); ++ ins_cost(80); ++ ++ format %{ "MAX $dst, $src @maxI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, dst, src); ++ __ masknez(dst, dst, AT); ++ __ maskeqz(AT, src, AT); ++ __ OR(dst, dst, AT); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{ ++ match(Set dst (MaxI dst zero)); ++ ins_cost(50); ++ ++ format %{ "MAX $dst, 0 @maxI_Reg_zero" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ slt(AT, dst, R0); ++ __ masknez(dst, dst, AT); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL src mask)); ++ ++ format %{ "movl $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ bstrpick_d(dst, src, 31, 0); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32) ++%{ ++ match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32))); ++ ++ format %{ "combine_i2l $dst, $src2(H), $src1(L) @ combine_i2l" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ if (src1 == dst) { ++ __ bstrins_d(dst, src2, 63, 32); ++ } else if (src2 == dst) { ++ __ slli_d(dst, dst, 32); ++ __ bstrins_d(dst, src1, 31, 0); ++ } else { ++ __ bstrpick_d(dst, src1, 31, 0); ++ __ bstrins_d(dst, src2, 63, 32); ++ } ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Zero-extend convert int to long ++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L src) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ bstrpick_d(dst, src, 31, 0); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L (ConvL2I src)) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ bstrpick_d(dst, src, 31, 0); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Match loading integer and casting it to unsigned int in long register. ++// LoadI + ConvI2L + AndL 0xffffffff. ++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ++ ++ format %{ "ld_wu $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL mask (ConvI2L (LoadI mem)))); ++ ++ format %{ "ld_wu $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++ ++// ============================================================================ ++// Safepoint Instruction ++ ++instruct safePoint_poll() %{ ++ predicate(SafepointMechanism::uses_global_page_poll()); ++ match(SafePoint); ++ ++ ins_cost(105); ++ format %{ "poll for GC @ safePoint_poll" %} ++ ++ ins_encode %{ ++ __ block_comment("Safepoint:"); ++ __ li(T4, (long)os::get_polling_page()); ++ __ relocate(relocInfo::poll_type); ++ __ ld_w(AT, T4, 0); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct safePoint_poll_tls(mRegP poll) %{ ++ match(SafePoint poll); ++ predicate(SafepointMechanism::uses_thread_local_poll()); ++ effect(USE poll); ++ ++ ins_cost(125); ++ format %{ "ld_w AT, [$poll]\t" ++ "Safepoint @ [$poll] : poll for GC" %} ++ size(4); ++ ins_encode %{ ++ Register poll_reg = $poll$$Register; ++ ++ __ block_comment("Safepoint:"); ++ __ relocate(relocInfo::poll_type); ++ address pre_pc = __ pc(); ++ __ ld_w(AT, poll_reg, 0); ++ assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit lw AT, [$poll]"); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++//----------Arithmetic Conversion Instructions--------------------------------- ++ ++instruct roundFloat_nop(regF dst) ++%{ ++ match(Set dst (RoundFloat dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct roundDouble_nop(regD dst) ++%{ ++ match(Set dst (RoundDouble dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++//----------BSWAP Instructions------------------------------------------------- ++instruct bytes_reverse_int(mRegI dst, mRegIorL2I src) %{ ++ match(Set dst (ReverseBytesI src)); ++ ++ format %{ "RevB_I $dst, $src" %} ++ ins_encode %{ ++ __ revb_2w($dst$$Register, $src$$Register); ++ __ slli_w($dst$$Register, $dst$$Register, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct bytes_reverse_long(mRegL dst, mRegL src) %{ ++ match(Set dst (ReverseBytesL src)); ++ ++ format %{ "RevB_L $dst, $src" %} ++ ins_encode %{ ++ __ revb_d($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct bytes_reverse_unsigned_short(mRegI dst, mRegIorL2I src) %{ ++ match(Set dst (ReverseBytesUS src)); ++ ++ format %{ "RevB_US $dst, $src" %} ++ ins_encode %{ ++ __ revb_2h($dst$$Register, $src$$Register); ++ __ bstrpick_d($dst$$Register, $dst$$Register, 15, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct bytes_reverse_short(mRegI dst, mRegIorL2I src) %{ ++ match(Set dst (ReverseBytesS src)); ++ ++ format %{ "RevB_S $dst, $src" %} ++ ins_encode %{ ++ __ revb_2h($dst$$Register, $src$$Register); ++ __ ext_w_h($dst$$Register, $dst$$Register); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++//---------- Zeros Count Instructions ------------------------------------------ ++// CountLeadingZerosINode CountTrailingZerosINode ++instruct countLeadingZerosI(mRegI dst, mRegIorL2I src) %{ ++ match(Set dst (CountLeadingZerosI src)); ++ ++ format %{ "clz_w $dst, $src\t# count leading zeros (int)" %} ++ ins_encode %{ ++ __ clz_w($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countLeadingZerosL(mRegI dst, mRegL src) %{ ++ match(Set dst (CountLeadingZerosL src)); ++ ++ format %{ "clz_d $dst, $src\t# count leading zeros (long)" %} ++ ins_encode %{ ++ __ clz_d($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosI(mRegI dst, mRegIorL2I src) %{ ++ match(Set dst (CountTrailingZerosI src)); ++ ++ format %{ "ctz_w $dst, $src\t# count trailing zeros (int)" %} ++ ins_encode %{ ++ __ ctz_w($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosL(mRegI dst, mRegL src) %{ ++ match(Set dst (CountTrailingZerosL src)); ++ ++ format %{ "ctz_d $dst, $src\t# count trailing zeros (long)" %} ++ ins_encode %{ ++ __ ctz_d($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// ====================VECTOR INSTRUCTIONS===================================== ++ ++// --------------------------------- Load ------------------------------------- ++ ++instruct loadV16(vecX dst, memory mem) %{ ++ predicate(n->as_LoadVector()->memory_size() == 16); ++ match(Set dst (LoadVector mem)); ++ format %{ "vload $dst, $mem\t# @loadV16" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORX); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct loadV32(vecY dst, memory mem) %{ ++ predicate(n->as_LoadVector()->memory_size() == 32); ++ match(Set dst (LoadVector mem)); ++ format %{ "xvload $dst, $mem\t# @loadV32" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORY); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- Store ------------------------------------ ++ ++instruct storeV16(memory mem, vecX src) %{ ++ predicate(n->as_StoreVector()->memory_size() == 16); ++ match(Set mem (StoreVector mem src)); ++ format %{ "vstore $src, $mem\t# @storeV16" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORX); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct storeV32(memory mem, vecY src) %{ ++ predicate(n->as_StoreVector()->memory_size() == 32); ++ match(Set mem (StoreVector mem src)); ++ format %{ "xvstore $src, $mem\t# @storeV32" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORY); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------------- Replicate ---------------------------------- ++ ++instruct repl16B(vecX dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (ReplicateB src)); ++ format %{ "vreplgr2vr.b $dst, $src\t# @repl16B" %} ++ ins_encode %{ ++ __ vreplgr2vr_b($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl16B_imm(vecX dst, immI_M128_255 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (ReplicateB imm)); ++ format %{ "vldi $dst, $imm\t# @repl16B_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, ($imm$$constant & 0xff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8S(vecX dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateS src)); ++ format %{ "vreplgr2vr.h $dst, $src\t# @repl8S" %} ++ ins_encode %{ ++ __ vreplgr2vr_h($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8S_imm(vecX dst, immI10 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateS imm)); ++ format %{ "vldi $dst, $imm\t# @repl8S_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4I(vecX dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateI src)); ++ format %{ "vreplgr2vr.w $dst, $src\t# @repl4I" %} ++ ins_encode %{ ++ __ vreplgr2vr_w($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4I_imm(vecX dst, immI10 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateI imm)); ++ format %{ "vldi $dst, $imm\t# @repl4I_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl2L(vecX dst, mRegL src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateL src)); ++ format %{ "vreplgr2vr.d $dst, $src\t# @repl2L" %} ++ ins_encode %{ ++ __ vreplgr2vr_d($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl2L_imm(vecX dst, immL10 imm) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateL imm)); ++ format %{ "vldi $dst, $imm\t# @repl2L_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4F(vecX dst, regF src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateF src)); ++ format %{ "vreplvei.w $dst, $src, 0\t# @repl4F" %} ++ ins_encode %{ ++ __ vreplvei_w($dst$$FloatRegister, $src$$FloatRegister, 0); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl2D(vecX dst, regD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateD src)); ++ format %{ "vreplvei.d $dst, $src, 0\t# @repl2D" %} ++ ins_encode %{ ++ __ vreplvei_d($dst$$FloatRegister, $src$$FloatRegister, 0); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl32B(vecY dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (ReplicateB src)); ++ format %{ "xvreplgr2vr.b $dst, $src\t# @repl32B" %} ++ ins_encode %{ ++ __ xvreplgr2vr_b($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl32B_imm(vecY dst, immI_M128_255 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (ReplicateB imm)); ++ format %{ "xvldi $dst, $imm\t# @repl32B_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, ($imm$$constant & 0xff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl16S(vecY dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (ReplicateS src)); ++ format %{ "xvreplgr2vr.h $dst, $src\t# @repl16S" %} ++ ins_encode %{ ++ __ xvreplgr2vr_h($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl16S_imm(vecY dst, immI10 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (ReplicateS imm)); ++ format %{ "xvldi $dst, $imm\t# @repl16S_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8I(vecY dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateI src)); ++ format %{ "xvreplgr2vr.w $dst, $src\t# @repl8I" %} ++ ins_encode %{ ++ __ xvreplgr2vr_w($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8I_imm(vecY dst, immI10 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateI imm)); ++ format %{ "xvldi $dst, $imm\t# @repl8I_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4L(vecY dst, mRegL src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateL src)); ++ format %{ "xvreplgr2vr.d $dst, $src\t# @repl4L" %} ++ ins_encode %{ ++ __ xvreplgr2vr_d($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4L_imm(vecY dst, immL10 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateL imm)); ++ format %{ "xvldi $dst, $imm\t# @repl4L_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8F(vecY dst, regF src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateF src)); ++ format %{ "xvreplve0.w $dst, $src\t# @repl8F" %} ++ ins_encode %{ ++ __ xvreplve0_w($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4D(vecY dst, regD src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateD src)); ++ format %{ "xvreplve0.d $dst, $src\t# @repl4D" %} ++ ins_encode %{ ++ __ xvreplve0_d($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ADD -------------------------------------- ++ ++instruct add16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVB src1 src2)); ++ format %{ "vadd.b $dst, $src1, $src2\t# @add16B" %} ++ ins_encode %{ ++ __ vadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add16B_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVB src (ReplicateB imm))); ++ format %{ "vaddi.bu $dst, $src, $imm\t# @add16B_imm" %} ++ ins_encode %{ ++ __ vaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVS src1 src2)); ++ format %{ "vadd.h $dst, $src1, $src2\t# @add8S" %} ++ ins_encode %{ ++ __ vadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8S_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVS src (ReplicateS imm))); ++ format %{ "vaddi.hu $dst, $src, $imm\t# @add8S_imm" %} ++ ins_encode %{ ++ __ vaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVI src1 src2)); ++ format %{ "vadd.w $dst, $src1, src2\t# @add4I" %} ++ ins_encode %{ ++ __ vadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4I_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVI src (ReplicateI imm))); ++ format %{ "vaddi.wu $dst, $src, $imm\t# @add4I_imm" %} ++ ins_encode %{ ++ __ vaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVL src1 src2)); ++ format %{ "vadd.d $dst, $src1, $src2\t# @add2L" %} ++ ins_encode %{ ++ __ vadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add2L_imm(vecX dst, vecX src, immLU5 imm) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVL src (ReplicateL imm))); ++ format %{ "vaddi.du $dst, $src, $imm\t# @add2L_imm" %} ++ ins_encode %{ ++ __ vaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVF src1 src2)); ++ format %{ "vfadd.s $dst, $src1, $src2\t# @add4F" %} ++ ins_encode %{ ++ __ vfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVD src1 src2)); ++ format %{ "vfadd.d $dst, $src1, $src2\t# @add2D" %} ++ ins_encode %{ ++ __ vfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (AddVB src1 src2)); ++ format %{ "xvadd.b $dst, $src1, $src2\t# @add32B" %} ++ ins_encode %{ ++ __ xvadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add32B_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (AddVB src (ReplicateB imm))); ++ format %{ "xvaddi.bu $dst, $src, $imm\t# @add32B_imm" %} ++ ins_encode %{ ++ __ xvaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVS src1 src2)); ++ format %{ "xvadd.h $dst, $src1, $src2\t# @add16S" %} ++ ins_encode %{ ++ __ xvadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add16S_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVS src (ReplicateS imm))); ++ format %{ "xvaddi.hu $dst, $src, $imm\t# @add16S_imm" %} ++ ins_encode %{ ++ __ xvaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVI src1 src2)); ++ format %{ "xvadd.wu $dst, $src1, $src2\t# @add8I" %} ++ ins_encode %{ ++ __ xvadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8I_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVI src (ReplicateI imm))); ++ format %{ "xvaddi.wu $dst, $src, $imm\t# @add8I_imm" %} ++ ins_encode %{ ++ __ xvaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVL src1 src2)); ++ format %{ "xvadd.d $dst, $src1, $src2\t# @add4L" %} ++ ins_encode %{ ++ __ xvadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4L_imm(vecY dst, vecY src, immLU5 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVL src (ReplicateL imm))); ++ format %{ "xvaddi.du $dst, $src, $imm\t# @add4L_imm" %} ++ ins_encode %{ ++ __ xvaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVF src1 src2)); ++ format %{ "xvfadd.s $dst, $src1, $src2\t# @add8F" %} ++ ins_encode %{ ++ __ xvfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVD src1 src2)); ++ format %{ "xvfadd.d $dst, $src1, $src2\t# @add4D" %} ++ ins_encode %{ ++ __ xvfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- SUB -------------------------------------- ++ ++instruct sub16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVB src1 src2)); ++ format %{ "vsub.b $dst, $src1, $src2\t# @sub16B" %} ++ ins_encode %{ ++ __ vsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub16B_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVB src (ReplicateB imm))); ++ format %{ "vsubi.bu $dst, $src, $imm\t# @sub16B_imm" %} ++ ins_encode %{ ++ __ vsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVS src1 src2)); ++ format %{ "vsub.h $dst, $src1, $src2\t# @sub8S" %} ++ ins_encode %{ ++ __ vsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8S_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVS src (ReplicateS imm))); ++ format %{ "vsubi.hu $dst, $src, $imm\t# @sub8S_imm" %} ++ ins_encode %{ ++ __ vsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVI src1 src2)); ++ format %{ "vsub.w $dst, $src1, src2\t# @sub4I" %} ++ ins_encode %{ ++ __ vsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4I_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVI src (ReplicateI imm))); ++ format %{ "vsubi.wu $dst, $src, $imm\t# @sub4I_imm" %} ++ ins_encode %{ ++ __ vsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVL src1 src2)); ++ format %{ "vsub.d $dst, $src1, $src2\t# @sub2L" %} ++ ins_encode %{ ++ __ vsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub2L_imm(vecX dst, vecX src, immLU5 imm) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVL src (ReplicateL imm))); ++ format %{ "vsubi.du $dst, $src, $imm\t# @sub2L_imm" %} ++ ins_encode %{ ++ __ vsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVF src1 src2)); ++ format %{ "vfsub.s $dst, $src1, $src2\t# @sub4F" %} ++ ins_encode %{ ++ __ vfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVD src1 src2)); ++ format %{ "vfsub.d $dst, $src1, $src2\t# @sub2D" %} ++ ins_encode %{ ++ __ vfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (SubVB src1 src2)); ++ format %{ "xvsub.b $dst, $src1, $src2\t# @sub32B" %} ++ ins_encode %{ ++ __ xvsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub32B_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (SubVB src (ReplicateB imm))); ++ format %{ "xvsubi.bu $dst, $src, $imm\t# @sub32B_imm" %} ++ ins_encode %{ ++ __ xvsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVS src1 src2)); ++ format %{ "xvsub.h $dst, $src1, $src2\t# @sub16S" %} ++ ins_encode %{ ++ __ xvsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub16S_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVS src (ReplicateS imm))); ++ format %{ "xvsubi.hu $dst, $src, $imm\t# @sub16S_imm" %} ++ ins_encode %{ ++ __ xvsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVI src1 src2)); ++ format %{ "xvsub.w $dst, $src1, $src2\t# @sub8I" %} ++ ins_encode %{ ++ __ xvsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8I_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVI src (ReplicateI imm))); ++ format %{ "xvsubi.wu $dst, $src, $imm\t# @sub8I_imm" %} ++ ins_encode %{ ++ __ xvsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVL src1 src2)); ++ format %{ "xvsub.d $dst, $src1, $src2\t# @sub4L" %} ++ ins_encode %{ ++ __ xvsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4L_imm(vecY dst, vecY src, immLU5 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVL src (ReplicateL imm))); ++ format %{ "xvsubi.du $dst, $src, $imm\t# @sub4L_imm" %} ++ ins_encode %{ ++ __ xvsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVF src1 src2)); ++ format %{ "xvfsub.s $dst, $src1, $src2\t# @sub8F" %} ++ ins_encode %{ ++ __ xvfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVD src1 src2)); ++ format %{ "xvfsub.d $dst,$src1,$src2\t# @sub4D" %} ++ ins_encode %{ ++ __ xvfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MUL -------------------------------------- ++instruct mul16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (MulVB src1 src2)); ++ format %{ "vmul.b $dst, $src1, $src2\t# @mul16B" %} ++ ins_encode %{ ++ __ vmul_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (MulVS src1 src2)); ++ format %{ "vmul.h $dst, $src1, $src2\t# @mul8S" %} ++ ins_encode %{ ++ __ vmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (MulVI src1 src2)); ++ format %{ "vmul.w $dst, $src1, $src2\t# @mul4I" %} ++ ins_encode %{ ++ __ vmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (MulVL src1 src2)); ++ format %{ "vmul.d $dst, $src1, $src2\t# @mul2L" %} ++ ins_encode %{ ++ __ vmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (MulVF src1 src2)); ++ format %{ "vfmul.s $dst, $src1, $src2\t# @mul4F" %} ++ ins_encode %{ ++ __ vfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (MulVD src1 src2)); ++ format %{ "vfmul.d $dst, $src1, $src2\t# @mul2D" %} ++ ins_encode %{ ++ __ vfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (MulVB src1 src2)); ++ format %{ "xvmul.b $dst, $src1, $src2\t# @mul32B" %} ++ ins_encode %{ ++ __ xvmul_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (MulVS src1 src2)); ++ format %{ "xvmul.h $dst, $src1, $src2\t# @mul16S" %} ++ ins_encode %{ ++ __ xvmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (MulVI src1 src2)); ++ format %{ "xvmul.w $dst, $src1, $src2\t# @mul8I" %} ++ ins_encode %{ ++ __ xvmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (MulVL src1 src2)); ++ format %{ "xvmul.d $dst, $src1, $src2\t# @mul4L" %} ++ ins_encode %{ ++ __ xvmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (MulVF src1 src2)); ++ format %{ "xvfmul.s $dst, $src1, $src2\t# @mul8F" %} ++ ins_encode %{ ++ __ xvfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (MulVD src1 src2)); ++ format %{ "xvfmul.d $dst, $src1, $src2\t# @mul4D" %} ++ ins_encode %{ ++ __ xvfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- DIV -------------------------------------- ++instruct div4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (DivVF src1 src2)); ++ format %{ "vfdiv.s $dst, $src1, $src2\t# @div4F" %} ++ ins_encode %{ ++ __ vfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct div2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (DivVD src1 src2)); ++ format %{ "vfdiv.d $dst, $src1, $src2\t# @div2D" %} ++ ins_encode %{ ++ __ vfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct div8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (DivVF src1 src2)); ++ format %{ "xvfdiv.s $dst, $src1, $src2\t# @div8F" %} ++ ins_encode %{ ++ __ xvfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct div4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (DivVD src1 src2)); ++ format %{ "xvfdiv.d $dst, $src1, $src2\t# @div4D" %} ++ ins_encode %{ ++ __ xvfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ABS -------------------------------------- ++ ++instruct abs16B(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AbsVB src)); ++ effect(TEMP_DEF dst); ++ format %{ "vabs $dst, $src\t# @abs16B" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ vabsd_b($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs8S(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AbsVS src)); ++ effect(TEMP_DEF dst); ++ format %{ "vabs $dst, $src\t# @abs8S" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ vabsd_h($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs4I(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AbsVI src)); ++ effect(TEMP_DEF dst); ++ format %{ "vabs $dst, $src\t# @abs4I" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ vabsd_w($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs2L(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AbsVL src)); ++ effect(TEMP_DEF dst); ++ format %{ "vabs $dst, $src\t# @abs2L" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ vabsd_d($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs4F(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AbsVF src)); ++ format %{ "vbitclri.w $dst, $src\t# @abs4F" %} ++ ins_encode %{ ++ __ vbitclri_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs2D(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AbsVD src)); ++ format %{ "vbitclri.d $dst, $src\t# @abs2D" %} ++ ins_encode %{ ++ __ vbitclri_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs32B(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (AbsVB src)); ++ effect(TEMP_DEF dst); ++ format %{ "xvabs $dst, $src\t# @abs32B" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ xvabsd_b($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs16S(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AbsVS src)); ++ effect(TEMP_DEF dst); ++ format %{ "xvabs $dst, $src\t# @abs16S" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ xvabsd_h($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs8I(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AbsVI src)); ++ effect(TEMP_DEF dst); ++ format %{ "xvabs $dst, $src\t# @abs8I" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ xvabsd_w($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs4L(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AbsVL src)); ++ effect(TEMP_DEF dst); ++ format %{ "xvabs $dst, $src\t# @abs4L" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ xvabsd_d($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs8F(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AbsVF src)); ++ format %{ "xvbitclri.w $dst, $src\t# @abs8F" %} ++ ins_encode %{ ++ __ xvbitclri_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs4D(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AbsVD src)); ++ format %{ "xvbitclri.d $dst, $src\t# @abs4D" %} ++ ins_encode %{ ++ __ xvbitclri_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ABS DIFF --------------------------------- ++ ++instruct absd4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AbsVI (SubVI src1 src2))); ++ format %{ "vabsd.w $dst, $src1, $src2\t# @absd4I" %} ++ ins_encode %{ ++ __ vabsd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct absd2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AbsVL (SubVL src1 src2))); ++ format %{ "vabsd.d $dst, $src1, $src2\t# @absd2L" %} ++ ins_encode %{ ++ __ vabsd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct absd8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AbsVI (SubVI src1 src2))); ++ format %{ "xvabsd.w $dst, $src1, $src2\t# @absd8I" %} ++ ins_encode %{ ++ __ xvabsd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct absd4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AbsVL (SubVL src1 src2))); ++ format %{ "xvabsd.d $dst, $src1, $src2\t# @absd4L" %} ++ ins_encode %{ ++ __ xvabsd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MAX -------------------------------------- ++ ++instruct max16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "vmax.b $dst, $src1, $src2\t# @max16B" %} ++ ins_encode %{ ++ __ vmax_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "vmax.h $dst, $src1, $src2\t# @max8S" %} ++ ins_encode %{ ++ __ vmax_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "vmax.w $dst, $src1, $src2\t# @max4I" %} ++ ins_encode %{ ++ __ vmax_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "vmax.d $dst, $src1, $src2\t# @max2L" %} ++ ins_encode %{ ++ __ vmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max4F(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "vfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max4F" %} ++ ins_encode %{ ++ __ vfmax_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max2D(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ ++ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "vfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max2D" %} ++ ins_encode %{ ++ __ vfmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "xvmax.b $dst, $src1, $src2\t# @max32B" %} ++ ins_encode %{ ++ __ xvmax_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "xvmax.h $dst, $src1, $src2\t# @max16S" %} ++ ins_encode %{ ++ __ xvmax_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "xvmax.w $dst, $src1, $src2\t# @max8I" %} ++ ins_encode %{ ++ __ xvmax_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "xvmax.d $dst, $src1, $src2\t# @max4L" %} ++ ins_encode %{ ++ __ xvmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max8F(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ ++ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "xvfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max8F" %} ++ ins_encode %{ ++ __ xvfmax_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max4D(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "xvfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max4D" %} ++ ins_encode %{ ++ __ xvfmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MIN -------------------------------------- ++ ++instruct min16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MinV src1 src2)); ++ format %{ "vmin.b $dst, $src1, $src2\t# @min16B" %} ++ ins_encode %{ ++ __ vmin_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MinV src1 src2)); ++ format %{ "vmin.h $dst, $src1, $src2\t# @min8S" %} ++ ins_encode %{ ++ __ vmin_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MinV src1 src2)); ++ format %{ "vmin.w $dst, $src1, $src2\t# @min4I" %} ++ ins_encode %{ ++ __ vmin_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MinV src1 src2)); ++ format %{ "vmin.d $dst, $src1, $src2\t# @min2L" %} ++ ins_encode %{ ++ __ vmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min4F(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "vfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min4F" %} ++ ins_encode %{ ++ __ vfmin_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min2D(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ ++ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "vfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min2D" %} ++ ins_encode %{ ++ __ vfmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MinV src1 src2)); ++ format %{ "xvmin.b $dst, $src1, $src2\t# @min32B" %} ++ ins_encode %{ ++ __ xvmin_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MinV src1 src2)); ++ format %{ "xvmin.h $dst, $src1, $src2\t# @min16S" %} ++ ins_encode %{ ++ __ xvmin_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MinV src1 src2)); ++ format %{ "xvmin.w $dst, $src1, $src2\t# @min8I" %} ++ ins_encode %{ ++ __ xvmin_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MinV src1 src2)); ++ format %{ "xvmin.d $dst, $src1, $src2\t# @min4L" %} ++ ins_encode %{ ++ __ xvmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min8F(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ ++ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "xvfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min8F" %} ++ ins_encode %{ ++ __ xvfmin_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min4D(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "xvfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min4D" %} ++ ins_encode %{ ++ __ xvfmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- NEG -------------------------------------- ++ ++instruct neg4F(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (NegVF src)); ++ format %{ "vbitrevi.w $dst, $src\t# @neg4F" %} ++ ins_encode %{ ++ __ vbitrevi_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct neg2D(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (NegVD src)); ++ format %{ "vbitrevi.d $dst, $src\t# @neg2D" %} ++ ins_encode %{ ++ __ vbitrevi_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct neg8F(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (NegVF src)); ++ format %{ "xvbitrevi.w $dst, $src\t# @neg8F" %} ++ ins_encode %{ ++ __ xvbitrevi_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct neg4D(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (NegVD src)); ++ format %{ "xvbitrevi.d $dst, $src\t# @neg4D" %} ++ ins_encode %{ ++ __ xvbitrevi_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- SQRT ------------------------------------- ++ ++instruct sqrt4F(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SqrtVF src)); ++ format %{ "vfsqrt.s $dst, $src\t# @sqrt4F" %} ++ ins_encode %{ ++ __ vfsqrt_s($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sqrt2D(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SqrtVD src)); ++ format %{ "vfsqrt.d $dst, $src\t# @sqrt2D" %} ++ ins_encode %{ ++ __ vfsqrt_d($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sqrt8F(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SqrtVF src)); ++ format %{ "xvfsqrt.s $dst, $src\t# @sqrt8F" %} ++ ins_encode %{ ++ __ xvfsqrt_s($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sqrt4D(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SqrtVD src)); ++ format %{ "xvfsqrt.d $dst, $src\t# @sqrt4D" %} ++ ins_encode %{ ++ __ xvfsqrt_d($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MADD ------------------------------------- ++ ++instruct madd16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVB dst (MulVB src1 src2))); ++ format %{ "vmadd.b $dst, $src1, $src2\t# @madd16B" %} ++ ins_encode %{ ++ __ vmadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVS dst (MulVS src1 src2))); ++ format %{ "vmadd.h $dst, $src1, $src2\t# @madd8S" %} ++ ins_encode %{ ++ __ vmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVI dst (MulVI src1 src2))); ++ format %{ "vmadd $dst, $src1, $src2\t# @madd4I" %} ++ ins_encode %{ ++ __ vmadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVL dst (MulVL src1 src2))); ++ format %{ "vmadd.d $dst, $src1, $src2\t# @madd2L" %} ++ ins_encode %{ ++ __ vmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 + src3 ++instruct madd4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVF src3 (Binary src1 src2))); ++ format %{ "vfmadd.s $dst, $src1, $src2, $src3\t# @madd4F" %} ++ ins_encode %{ ++ __ vfmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 + src3 ++instruct madd2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 2); ++ match(Set dst (FmaVD src3 (Binary src1 src2))); ++ format %{ "vfmadd.d $dst, $src1, $src2, $src3\t# @madd2D" %} ++ ins_encode %{ ++ __ vfmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (AddVB dst (MulVB src1 src2))); ++ format %{ "xvmadd.b $dst, $src1, $src2\t# @madd32B" %} ++ ins_encode %{ ++ __ xvmadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVS dst (MulVS src1 src2))); ++ format %{ "xvmadd.h $dst, $src1, $src2\t# @madd16S" %} ++ ins_encode %{ ++ __ xvmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVI dst (MulVI src1 src2))); ++ format %{ "xvmadd.w $dst, $src1, $src2\t# @madd8I" %} ++ ins_encode %{ ++ __ xvmadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVL dst (MulVL src1 src2))); ++ format %{ "xvmadd.d $dst, $src1, $src2\t# @madd4L" %} ++ ins_encode %{ ++ __ xvmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 + src3 ++instruct madd8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 8); ++ match(Set dst (FmaVF src3 (Binary src1 src2))); ++ format %{ "xvfmadd.s $dst, $src1, $src2, $src3\t# @madd8F" %} ++ ins_encode %{ ++ __ xvfmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 + src3 ++instruct madd4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVD src3 (Binary src1 src2))); ++ format %{ "xvfmadd.d $dst, $src1, $src2, $src3\t# @madd4D" %} ++ ins_encode %{ ++ __ xvfmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MSUB ------------------------------------- ++ ++instruct msub16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVB dst (MulVB src1 src2))); ++ format %{ "vmsub.b $dst, $src1, $src2\t# @msub16B" %} ++ ins_encode %{ ++ __ vmsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVS dst (MulVS src1 src2))); ++ format %{ "vmsub.h $dst, $src1, $src2\t# @msub8S" %} ++ ins_encode %{ ++ __ vmsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVI dst (MulVI src1 src2))); ++ format %{ "vmsub.w $dst, $src1, $src2\t# @msub4I" %} ++ ins_encode %{ ++ __ vmsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVL dst (MulVL src1 src2))); ++ format %{ "vmsub.d $dst, $src1, $src2\t# @msub2L" %} ++ ins_encode %{ ++ __ vmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 - src3 ++instruct msub4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVF (NegVF src3) (Binary src1 src2))); ++ format %{ "vfmsub.s $dst, $src1, $src2, $src3\t# @msub4F" %} ++ ins_encode %{ ++ __ vfmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 - src3 ++instruct msub2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 2); ++ match(Set dst (FmaVD (NegVD src3) (Binary src1 src2))); ++ format %{ "vfmsub.d $dst, $src1, $src2, $src3\t# @msub2D" %} ++ ins_encode %{ ++ __ vfmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (SubVB dst (MulVB src1 src2))); ++ format %{ "xvmsub.b $dst, $src1, $src2\t# @msub32B" %} ++ ins_encode %{ ++ __ xvmsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVS dst (MulVS src1 src2))); ++ format %{ "xvmsub.h $dst, $src1, $src2\t# @msub16S" %} ++ ins_encode %{ ++ __ xvmsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVI dst (MulVI src1 src2))); ++ format %{ "xvmsub.w $dst, $src1, $src2\t# @msub8I" %} ++ ins_encode %{ ++ __ xvmsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVL dst (MulVL src1 src2))); ++ format %{ "xvmsub.d $dst, $src1, $src2\t# @msub4L" %} ++ ins_encode %{ ++ __ xvmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 - src3 ++instruct msub8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 8); ++ match(Set dst (FmaVF (NegVF src3) (Binary src1 src2))); ++ format %{ "xvfmsub.s $dst, $src1, $src2, $src3\t# @msub8F" %} ++ ins_encode %{ ++ __ xvfmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 - src3 ++instruct msub4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVD (NegVD src3) (Binary src1 src2))); ++ format %{ "xvfmsub.d $dst, $src1, $src2, $src3\t# @msub4D" %} ++ ins_encode %{ ++ __ xvfmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- FNMADD ----------------------------------- ++ ++// -src1 * src2 - src3 ++instruct nmadd4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVF (NegVF src3) (Binary (NegVF src1) src2))); ++ match(Set dst (FmaVF (NegVF src3) (Binary src1 (NegVF src2)))); ++ format %{ "vfnmadd.s $dst, $src1, $src2, $src3\t# @nmadd4F" %} ++ ins_encode %{ ++ __ vfnmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 - src3 ++instruct nmadd2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 2); ++ match(Set dst (FmaVD (NegVD src3) (Binary (NegVD src1) src2))); ++ match(Set dst (FmaVD (NegVD src3) (Binary src1 (NegVD src2)))); ++ format %{ "vfnmadd.d $dst, $src1, $src2, $src3\t# @nmadd2D" %} ++ ins_encode %{ ++ __ vfnmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 - src3 ++instruct nmadd8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 8); ++ match(Set dst (FmaVF (NegVF src3) (Binary (NegVF src1) src2))); ++ match(Set dst (FmaVF (NegVF src3) (Binary src1 (NegVF src2)))); ++ format %{ "xvfnmadd.s $dst, $src1, $src2, $src3\t# @nmadd8F" %} ++ ins_encode %{ ++ __ xvfnmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 - src3 ++instruct nmadd4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVD (NegVD src3) (Binary (NegVD src1) src2))); ++ match(Set dst (FmaVD (NegVD src3) (Binary src1 (NegVD src2)))); ++ format %{ "xvfnmadd.d $dst, $src1, $src2, $src3\t# @nmadd4D" %} ++ ins_encode %{ ++ __ xvfnmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- FNMSUB ----------------------------------- ++ ++// -src1 * src2 + src3 ++instruct nmsub4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVF src3 (Binary (NegVF src1) src2))); ++ match(Set dst (FmaVF src3 (Binary src1 (NegVF src2)))); ++ format %{ "vfnmsub.s $dst, $src1, $src2, $src3\t# @nmsub4F" %} ++ ins_encode %{ ++ __ vfnmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 + src3 ++instruct nmsub2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 2); ++ match(Set dst (FmaVD src3 (Binary (NegVD src1) src2))); ++ match(Set dst (FmaVD src3 (Binary src1 (NegVD src2)))); ++ format %{ "vfnmsub.d $dst, $src1, $src2, $src3\t# @nmsub2D" %} ++ ins_encode %{ ++ __ vfnmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 + src3 ++instruct nmsub8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 8); ++ match(Set dst (FmaVF src3 (Binary (NegVF src1) src2))); ++ match(Set dst (FmaVF src3 (Binary src1 (NegVF src2)))); ++ format %{ "xvfnmsub.s $dst, $src1, $src2, $src3\t# @nmsub8F" %} ++ ins_encode %{ ++ __ xvfnmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 + src3 ++instruct nmsub4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVD src3 (Binary (NegVD src1) src2))); ++ match(Set dst (FmaVD src3 (Binary src1 (NegVD src2)))); ++ format %{ "xvfnmsub.d $dst, $src1, $src2, $src3\t# @nmsub4D" %} ++ ins_encode %{ ++ __ xvfnmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------------ Shift --------------------------------------- ++ ++instruct shiftcntX(vecX dst, mRegI cnt) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "vreplgr2vr.b $dst, $cnt\t# @shiftcntX" %} ++ ins_encode %{ ++ __ vreplgr2vr_b($dst$$FloatRegister, $cnt$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct shiftcntY(vecY dst, mRegI cnt) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "xvreplgr2vr.b $dst, $cnt\t# @shiftcntY" %} ++ ins_encode %{ ++ __ xvreplgr2vr_b($dst$$FloatRegister, $cnt$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------------ LeftShift ----------------------------------- ++ ++instruct sll16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (LShiftVB src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "vsll $dst, $src, $shift\t# TEMP($tmp) @sll16B" %} ++ ins_encode %{ ++ __ vsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll16B_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (LShiftVB src shift)); ++ format %{ "vslli.b $dst, $src, $shift\t# @sll16B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (LShiftVS src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "vsll $dst, $src, $shift\t# TEMP($tmp) @sll8S" %} ++ ins_encode %{ ++ __ vsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8S_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (LShiftVS src shift)); ++ format %{ "vslli.h $dst, $src, $shift\t# @sll8S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4I(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (LShiftVI src shift)); ++ format %{ "vsll.w $dst, $src, $shift\t# @sll4I" %} ++ ins_encode %{ ++ __ vsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4I_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (LShiftVI src shift)); ++ format %{ "vslli.w $dst, $src, $shift\t# @sll4I_imm" %} ++ ins_encode %{ ++ __ vslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll2L(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (LShiftVL src shift)); ++ format %{ "vsll.d $dst, $src, $shift\t# @sll2L" %} ++ ins_encode %{ ++ __ vsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll2L_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (LShiftVL src shift)); ++ format %{ "vslli.d $dst, $src, $shift\t# @sll2L_imm" %} ++ ins_encode %{ ++ __ vslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (LShiftVB src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "xvsll $dst, $src, $shift\t# TEMP($tmp) @sll32B" %} ++ ins_encode %{ ++ __ xvsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll32B_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (LShiftVB src shift)); ++ format %{ "xvslli.b $dst, $src, $shift\t# @sll32B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (LShiftVS src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "xvsll $dst, $src, $shift\t# TEMP($tmp) @sll16S" %} ++ ins_encode %{ ++ __ xvsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll16S_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (LShiftVS src shift)); ++ format %{ "xvslli.h $dst, $src, $shift\t# @sll16S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8I(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (LShiftVI src shift)); ++ format %{ "xvsll.w $dst, $src, $shift\t# @sll8I" %} ++ ins_encode %{ ++ __ xvsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8I_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (LShiftVI src shift)); ++ format %{ "xvslli.w $dst, $src, $shift\t# @sll8I_imm" %} ++ ins_encode %{ ++ __ xvslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4L(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (LShiftVL src shift)); ++ format %{ "xvsll.d $dst, $src, $shift\t# @sll4L" %} ++ ins_encode %{ ++ __ xvsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4L_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (LShiftVL src shift)); ++ format %{ "xvslli.d $dst, $src, $shift\t# @sll4L_imm" %} ++ ins_encode %{ ++ __ xvslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ----------------------- LogicalRightShift ---------------------------------- ++ ++instruct srl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (URShiftVB src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "vsrl $dst, $src, $shift\t# TEMP($tmp) @srl16B" %} ++ ins_encode %{ ++ __ vsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl16B_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (URShiftVB src shift)); ++ format %{ "vsrli.b $dst, $src, $shift\t# @srl16B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (URShiftVS src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "vsrl $dst, $src, $shift\t# TEMP($tmp) @srl8S" %} ++ ins_encode %{ ++ __ vsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8S_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (URShiftVS src shift)); ++ format %{ "vsrli.h $dst, $src, $shift\t# @srl8S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4I(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (URShiftVI src shift)); ++ format %{ "vsrl.w $dst, $src, $shift\t# @srl4I" %} ++ ins_encode %{ ++ __ vsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4I_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (URShiftVI src shift)); ++ format %{ "vsrli.w $dst, $src, $shift\t# @srl4I_imm" %} ++ ins_encode %{ ++ __ vsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl2L(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (URShiftVL src shift)); ++ format %{ "vsrl.d $dst, $src, $shift\t# @srl2L" %} ++ ins_encode %{ ++ __ vsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl2L_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (URShiftVL src shift)); ++ format %{ "vsrli.d $dst, $src, $shift\t# @srl2L_imm" %} ++ ins_encode %{ ++ __ vsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (URShiftVB src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "xvsrl $dst, $src, $shift\t# TEMP($tmp) @srl32B" %} ++ ins_encode %{ ++ __ xvsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl32B_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (URShiftVB src shift)); ++ format %{ "xvsrli.b $dst, $src, $shift\t# @srl32B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (URShiftVS src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "xvsrl $dst, $src, $shift\t# TEMP($tmp) @srl16S" %} ++ ins_encode %{ ++ __ xvsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl16S_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (URShiftVS src shift)); ++ format %{ "xvsrli.h $dst, $src, $shift\t# @srl16S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8I(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (URShiftVI src shift)); ++ format %{ "xvsrl.w $dst, $src, $shift\t# @srl8I" %} ++ ins_encode %{ ++ __ xvsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8I_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (URShiftVI src shift)); ++ format %{ "xvsrli.w $dst, $src, $shift\t# @srl8I_imm" %} ++ ins_encode %{ ++ __ xvsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4L(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (URShiftVL src shift)); ++ format %{ "xvsrl.d $dst, $src, $shift\t# @srl4L" %} ++ ins_encode %{ ++ __ xvsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4L_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (URShiftVL src shift)); ++ format %{ "xvsrli.d $dst, $src, $shift\t# @srl4L_imm" %} ++ ins_encode %{ ++ __ xvsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------- ArithmeticRightShift ----------------------------- ++ ++instruct sra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (RShiftVB src shift)); ++ effect(TEMP tmp); ++ format %{ "vsra $dst, $src, $shift\t# TEMP($tmp) @sra16B" %} ++ ins_encode %{ ++ __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ vsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra16B_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (RShiftVB src shift)); ++ format %{ "vsrai.b $dst, $src, $shift\t# @sra16B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7); ++ } else { ++ __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (RShiftVS src shift)); ++ effect(TEMP tmp); ++ format %{ "vsra $dst, $src, $shift\t# TEMP($tmp) @sra8S" %} ++ ins_encode %{ ++ __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ vsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8S_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (RShiftVS src shift)); ++ format %{ "vsrai.h $dst, $src, $shift\t# @sra8S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15); ++ } else { ++ __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4I(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (RShiftVI src shift)); ++ format %{ "vsra.w $dst, $src, $shift\t# @sra4I" %} ++ ins_encode %{ ++ __ vsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4I_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (RShiftVI src shift)); ++ format %{ "vsrai.w $dst, $src, $shift\t# @sra4I_imm" %} ++ ins_encode %{ ++ __ vsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra2L(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (RShiftVL src shift)); ++ format %{ "vsra.d $dst, $src, $shift\t# @sra2L" %} ++ ins_encode %{ ++ __ vsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra2L_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (RShiftVL src shift)); ++ format %{ "vsrai.d $dst, $src, $shift\t# @sra2L_imm" %} ++ ins_encode %{ ++ __ vsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (RShiftVB src shift)); ++ effect(TEMP tmp); ++ format %{ "xvsra $dst, $src, $shift\t# TEMP($tmp) @sra32B" %} ++ ins_encode %{ ++ __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ xvsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra32B_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (RShiftVB src shift)); ++ format %{ "xvsrai.b $dst, $src, $shift\t# @sra32B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7); ++ } else { ++ __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (RShiftVS src shift)); ++ effect(TEMP tmp); ++ format %{ "xvsra $dst, $src, $shift\t# TEMP($tmp) @sra16S" %} ++ ins_encode %{ ++ __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ xvsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra16S_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (RShiftVS src shift)); ++ format %{ "xvsrai.h $dst, $src, $shift\t# @sra16S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15); ++ } else { ++ __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8I(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (RShiftVI src shift)); ++ format %{ "xvsra.w $dst, $src, $shift\t# @sra8I" %} ++ ins_encode %{ ++ __ xvsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8I_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (RShiftVI src shift)); ++ format %{ "xvsrai.w $dst, $src, $shift\t# @sra8I_imm" %} ++ ins_encode %{ ++ __ xvsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4L(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (RShiftVL src shift)); ++ format %{ "xvsra.d $dst, $src, $shift\t# @sra4L" %} ++ ins_encode %{ ++ __ xvsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4L_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (RShiftVL src shift)); ++ format %{ "xvsrai.d $dst, $src, $shift\t# @sra4L_imm" %} ++ ins_encode %{ ++ __ xvsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- AND -------------------------------------- ++ ++instruct andV16(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (AndV src1 src2)); ++ format %{ "vand.v $dst, $src1, $src2\t# @andV16" %} ++ ins_encode %{ ++ __ vand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct and16B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AndV src (ReplicateB imm))); ++ format %{ "vandi.b $dst, $src, $imm\t# @and16B_imm" %} ++ ins_encode %{ ++ __ vandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct andV32(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (AndV src1 src2)); ++ format %{ "xvand.v $dst, $src1, $src2\t# @andV32" %} ++ ins_encode %{ ++ __ xvand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct and32B_imm(vecY dst, vecY src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (AndV src (ReplicateB imm))); ++ format %{ "xvandi.b $dst, $src, $imm\t# @and32B_imm" %} ++ ins_encode %{ ++ __ xvandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- OR --------------------------------------- ++ ++instruct orV16(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (OrV src1 src2)); ++ format %{ "vor.v $dst, $src1, $src2\t# @orV16" %} ++ ins_encode %{ ++ __ vor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct or16B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (OrV src (ReplicateB imm))); ++ format %{ "vori.b $dst, $src, $imm\t# @or16B_imm" %} ++ ins_encode %{ ++ __ vori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct orV32(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (OrV src1 src2)); ++ format %{ "xvor.v $dst, $src1, $src2\t# @orV32" %} ++ ins_encode %{ ++ __ xvor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct or32B_imm(vecY dst, vecY src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (OrV src (ReplicateB imm))); ++ format %{ "xvori.b $dst, $src, $imm\t# @or32B_imm" %} ++ ins_encode %{ ++ __ xvori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- XOR -------------------------------------- ++ ++instruct xorV16(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (XorV src1 src2)); ++ format %{ "vxor.v $dst, $src1, $src2\t# @xorV16" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct xor16B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (XorV src (ReplicateB imm))); ++ format %{ "vxori.b $dst, $src, $imm\t# @xor16B_imm" %} ++ ins_encode %{ ++ __ vxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct xorV32(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (XorV src1 src2)); ++ format %{ "xvxor.v $dst, $src1, $src2\t# @xorV32" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct xor32B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (XorV src (ReplicateB imm))); ++ format %{ "xvxori.b $dst, $src, $imm\t# @xor32B_imm" %} ++ ins_encode %{ ++ __ xvxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- NOR -------------------------------------- ++ ++instruct norV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateB m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateS m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateI m1))); ++ format %{ "vnor.v $dst, $src1, $src2\t# @norV16" %} ++ ins_encode %{ ++ __ vnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct nor16B_imm(vecX dst, vecX src, immIU8 imm, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1))); ++ format %{ "vnori.b $dst, $src, $imm\t# @nor16B_imm" %} ++ ins_encode %{ ++ __ vnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct norV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateB m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateS m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateI m1))); ++ format %{ "xvnor.v $dst, $src1, $src2\t# @norV32" %} ++ ins_encode %{ ++ __ xvnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct nor32B_imm(vecY dst, vecY src, immIU8 imm, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1))); ++ format %{ "xvnori.b $dst, $src, $imm\t# @nor32B_imm" %} ++ ins_encode %{ ++ __ xvnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ANDN ------------------------------------- ++ ++instruct andnV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateB m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateS m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateI m1)))); ++ format %{ "vandn.v $dst, $src1, $src2\t# @andnV16" %} ++ ins_encode %{ ++ __ vandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct andnV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateB m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateS m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateI m1)))); ++ format %{ "xvandn.v $dst, $src1, $src2\t# @andnV32" %} ++ ins_encode %{ ++ __ xvandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ORN -------------------------------------- ++ ++instruct ornV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateB m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateS m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateI m1)))); ++ format %{ "vorn.v $dst, $src1, $src2\t# @ornV16" %} ++ ins_encode %{ ++ __ vorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct ornV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateB m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateS m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateI m1)))); ++ format %{ "xvorn.v $dst, $src1, $src2\t# @ornV32" %} ++ ins_encode %{ ++ __ xvorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ----------------------------- Reduction Add -------------------------------- ++ ++instruct reduce_add16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add16B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add8S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add4I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (AddReductionVL src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add2L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add4F(regF dst, regF src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (AddReductionVF src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add4F" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add2D(regD dst, regD src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (AddReductionVD src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add2D" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add32B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add16S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add8I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (AddReductionVL src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add4L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add8F(regF dst, regF src, vecY vsrc, vecY tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (AddReductionVF src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add8F" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add4D(regD dst, regD src, vecY vsrc, vecY tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (AddReductionVD src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add4D" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ----------------------------- Reduction Mul -------------------------------- ++ ++instruct reduce_mul16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul16B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul8S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul4I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MulReductionVL src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul2L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul4F(regF dst, regF src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MulReductionVF src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul4F" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul2D(regD dst, regD src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MulReductionVD src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul2D" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul32B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul16S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul8I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MulReductionVL src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul4L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul8F(regF dst, regF src, vecY vsrc, vecY tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MulReductionVF src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul8F" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul4D(regD dst, regD src, vecY vsrc, vecY tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MulReductionVD src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul4D" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ----------------------------- Reduction Max -------------------------------- ++ ++instruct reduce_max16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max16B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max8S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max4I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_max2L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max32B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max16S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max8I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max4L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ----------------------------- Reduction Min -------------------------------- ++ ++instruct reduce_min16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min16B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min8S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min4I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_min2L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min32B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min16S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min8I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min4L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ------------------------------ RoundDoubleModeV ---------------------------- ++ ++instruct round2D(vecX dst, vecX src, immI rmode) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (RoundDoubleModeV src rmode)); ++ format %{ "vfrint $dst, $src, $rmode\t# @round2D" %} ++ ins_encode %{ ++ DEBUG_ONLY(Unimplemented()); // unverified ++ switch ($rmode$$constant) { ++ case 0: __ vfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ case 1: __ vfrintrm_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ case 2: __ vfrintrp_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct round4D(vecY dst, vecY src, immI rmode) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (RoundDoubleModeV src rmode)); ++ format %{ "xvfrint $dst, $src, $rmode\t# @round4D" %} ++ ins_encode %{ ++ DEBUG_ONLY(Unimplemented()); // unverified ++ switch ($rmode$$constant) { ++ case 0: __ xvfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ case 1: __ xvfrintrm_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ case 2: __ xvfrintrp_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ---------------------------- PopCount -------------------------------------- ++ ++instruct popcount4I(vecX dst, vecX src) %{ ++ predicate(UsePopCountInstruction && n->as_Vector()->length() == 4); ++ match(Set dst (PopCountVI src)); ++ format %{ "vpcnt.w $dst, $src\t# @popcount4I" %} ++ ins_encode %{ ++ DEBUG_ONLY(Unimplemented()); // unverified ++ __ vpcnt_w($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct popcount8I(vecY dst, vecY src) %{ ++ predicate(UsePopCountInstruction && n->as_Vector()->length() == 8); ++ match(Set dst (PopCountVI src)); ++ format %{ "xvpcnt.w $dst, $src\t# @popcount8I" %} ++ ins_encode %{ ++ DEBUG_ONLY(Unimplemented()); // unverified ++ __ xvpcnt_w($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++//----------PEEPHOLE RULES----------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++// ++// peepmatch ( root_instr_name [preceeding_instruction]* ); ++// ++// peepconstraint %{ ++// (instruction_number.operand_name relational_op instruction_number.operand_name ++// [, ...] ); ++// // instruction numbers are zero-based using left to right order in peepmatch ++// ++// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); ++// // provide an instruction_number.operand_name for each operand that appears ++// // in the replacement instruction's match rule ++// ++// ---------VM FLAGS--------------------------------------------------------- ++// ++// All peephole optimizations can be turned off using -XX:-OptoPeephole ++// ++// Each peephole rule is given an identifying number starting with zero and ++// increasing by one in the order seen by the parser. An individual peephole ++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# ++// on the command-line. ++// ++// ---------CURRENT LIMITATIONS---------------------------------------------- ++// ++// Only match adjacent instructions in same basic block ++// Only equality constraints ++// Only constraints between operands, not (0.dest_reg == EAX_enc) ++// Only one replacement instruction ++// ++// ---------EXAMPLE---------------------------------------------------------- ++// ++// // pertinent parts of existing instructions in architecture description ++// instruct movI(eRegI dst, eRegI src) %{ ++// match(Set dst (CopyI src)); ++// %} ++// ++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{ ++// match(Set dst (AddI dst src)); ++// effect(KILL cr); ++// %} ++// ++// // Change (inc mov) to lea ++// peephole %{ ++// // increment preceeded by register-register move ++// peepmatch ( incI_eReg movI ); ++// // require that the destination register of the increment ++// // match the destination register of the move ++// peepconstraint ( 0.dst == 1.dst ); ++// // construct a replacement instruction that sets ++// // the destination to ( move's source register + one ) ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// Implementation no longer uses movX instructions since ++// machine-independent system no longer uses CopyX nodes. ++// ++// peephole %{ ++// peepmatch ( incI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( decI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addI_eReg_imm movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addP_eReg_imm movP ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++ ++// // Change load of spilled value to only a spill ++// instruct storeI(memory mem, eRegI src) %{ ++// match(Set mem (StoreI mem src)); ++// %} ++// ++// instruct loadI(eRegI dst, memory mem) %{ ++// match(Set dst (LoadI mem)); ++// %} ++// ++//peephole %{ ++// peepmatch ( loadI storeI ); ++// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); ++// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); ++//%} ++ ++//----------SMARTSPILL RULES--------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/loongarch.ad b/src/hotspot/cpu/loongarch/loongarch.ad +--- a/src/hotspot/cpu/loongarch/loongarch.ad 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/loongarch.ad 2024-01-30 10:00:11.838098438 +0800 +@@ -0,0 +1,25 @@ ++// ++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,4567 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "jvm.h" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "compiler/disassembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "memory/universe.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/safepoint.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++ ++#ifdef COMPILER2 ++#include "opto/compile.hpp" ++#include "opto/intrinsicnode.hpp" ++#endif ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// Implementation of MacroAssembler ++ ++intptr_t MacroAssembler::i[32] = {0}; ++float MacroAssembler::f[32] = {0.0}; ++ ++void MacroAssembler::print(outputStream *s) { ++ unsigned int k; ++ for(k=0; kprint_cr("i%d = 0x%.16lx", k, i[k]); ++ } ++ s->cr(); ++ ++ for(k=0; kprint_cr("f%d = %f", k, f[k]); ++ } ++ s->cr(); ++} ++ ++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } ++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } ++ ++void MacroAssembler::save_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ st_w (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ fst_s (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++void MacroAssembler::restore_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ ld_w (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ fld_s (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++ ++void MacroAssembler::pd_patch_instruction(address branch, address target) { ++ jint& stub_inst = *(jint*)branch; ++ jint *pc = (jint *)branch; ++ ++ if (high(stub_inst, 7) == pcaddu18i_op) { ++ // far: ++ // pcaddu18i reg, si20 ++ // jirl r0, reg, si18 ++ ++ assert(high(pc[1], 6) == jirl_op, "Not a branch label patch"); ++ jlong offs = target - branch; ++ CodeBuffer cb(branch, 2 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ if (reachable_from_branch_short(offs)) { ++ // convert far to short ++#define __ masm. ++ __ b(target); ++ __ nop(); ++#undef __ ++ } else { ++ masm.patchable_jump_far(R0, offs); ++ } ++ return; ++ } else if (high(stub_inst, 7) == pcaddi_op) { ++ // see MacroAssembler::set_last_Java_frame: ++ // pcaddi reg, si20 ++ ++ jint offs = (target - branch) >> 2; ++ guarantee(is_simm(offs, 20), "Not signed 20-bit offset"); ++ CodeBuffer cb(branch, 1 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.pcaddi(as_Register(low(stub_inst, 5)), offs); ++ return; ++ } else if (high(stub_inst, 7) == pcaddu12i_op) { ++ // pc-relative ++ jlong offs = target - branch; ++ guarantee(is_simm(offs, 32), "Not signed 32-bit offset"); ++ jint si12, si20; ++ jint& stub_instNext = *(jint*)(branch+4); ++ split_simm32(offs, si12, si20); ++ CodeBuffer cb(branch, 2 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.pcaddu12i(as_Register(low(stub_inst, 5)), si20); ++ masm.addi_d(as_Register(low((stub_instNext), 5)), as_Register(low((stub_instNext) >> 5, 5)), si12); ++ return; ++ } else if (high(stub_inst, 7) == lu12i_w_op) { ++ // long call (absolute) ++ CodeBuffer cb(branch, 3 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.call_long(target); ++ return; ++ } ++ ++ stub_inst = patched_branch(target - branch, stub_inst, 0); ++} ++ ++bool MacroAssembler::reachable_from_branch_short(jlong offs) { ++ if (ForceUnreachable) { ++ return false; ++ } ++ return is_simm(offs >> 2, 26); ++} ++ ++void MacroAssembler::patchable_jump_far(Register ra, jlong offs) { ++ jint si18, si20; ++ guarantee(is_simm(offs, 38), "Not signed 38-bit offset"); ++ split_simm38(offs, si18, si20); ++ pcaddu18i(T4, si20); ++ jirl(ra, T4, si18); ++} ++ ++void MacroAssembler::patchable_jump(address target, bool force_patchable) { ++ assert(ReservedCodeCacheSize < 4*G, "branch out of range"); ++ assert(CodeCache::find_blob(target) != NULL, ++ "destination of jump not found in code cache"); ++ if (force_patchable || patchable_branches()) { ++ jlong offs = target - pc(); ++ if (reachable_from_branch_short(offs)) { // Short jump ++ b(offset26(target)); ++ nop(); ++ } else { // Far jump ++ patchable_jump_far(R0, offs); ++ } ++ } else { // Real short jump ++ b(offset26(target)); ++ } ++} ++ ++void MacroAssembler::patchable_call(address target, address call_site) { ++ jlong offs = target - (call_site ? call_site : pc()); ++ if (reachable_from_branch_short(offs - BytesPerInstWord)) { // Short call ++ nop(); ++ bl((offs - BytesPerInstWord) >> 2); ++ } else { // Far call ++ patchable_jump_far(RA, offs); ++ } ++} ++ ++// Maybe emit a call via a trampoline. If the code cache is small ++// trampolines won't be emitted. ++ ++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) { ++ assert(JavaThread::current()->is_Compiler_thread(), "just checking"); ++ assert(entry.rspec().type() == relocInfo::runtime_call_type ++ || entry.rspec().type() == relocInfo::opt_virtual_call_type ++ || entry.rspec().type() == relocInfo::static_call_type ++ || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); ++ ++ // We need a trampoline if branches are far. ++ if (far_branches()) { ++ bool in_scratch_emit_size = false; ++#ifdef COMPILER2 ++ // We don't want to emit a trampoline if C2 is generating dummy ++ // code during its branch shortening phase. ++ CompileTask* task = ciEnv::current()->task(); ++ in_scratch_emit_size = ++ (task != NULL && is_c2_compile(task->comp_level()) && ++ Compile::current()->in_scratch_emit_size()); ++#endif ++ if (!in_scratch_emit_size) { ++ address stub = emit_trampoline_stub(offset(), entry.target()); ++ if (stub == NULL) { ++ postcond(pc() == badAddress); ++ return NULL; // CodeCache is full ++ } ++ } ++ } ++ ++ if (cbuf) cbuf->set_insts_mark(); ++ relocate(entry.rspec()); ++ if (!far_branches()) { ++ bl(entry.target()); ++ } else { ++ bl(pc()); ++ } ++ // just need to return a non-null address ++ postcond(pc() != badAddress); ++ return pc(); ++} ++ ++// Emit a trampoline stub for a call to a target which is too far away. ++// ++// code sequences: ++// ++// call-site: ++// branch-and-link to or ++// ++// Related trampoline stub for this call site in the stub section: ++// load the call target from the constant pool ++// branch (RA still points to the call site above) ++ ++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, ++ address dest) { ++ // Start the stub ++ address stub = start_a_stub(NativeInstruction::nop_instruction_size ++ + NativeCallTrampolineStub::instruction_size); ++ if (stub == NULL) { ++ return NULL; // CodeBuffer::expand failed ++ } ++ ++ // Create a trampoline stub relocation which relates this trampoline stub ++ // with the call instruction at insts_call_instruction_offset in the ++ // instructions code-section. ++ align(wordSize); ++ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() ++ + insts_call_instruction_offset)); ++ const int stub_start_offset = offset(); ++ ++ // Now, create the trampoline stub's code: ++ // - load the call ++ // - call ++ pcaddi(T4, 0); ++ ld_d(T4, T4, 16); ++ jr(T4); ++ nop(); //align ++ assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, ++ "should be"); ++ emit_int64((int64_t)dest); ++ ++ const address stub_start_addr = addr_at(stub_start_offset); ++ ++ NativeInstruction* ni = nativeInstruction_at(stub_start_addr); ++ assert(ni->is_NativeCallTrampolineStub_at(), "doesn't look like a trampoline"); ++ ++ end_a_stub(); ++ return stub_start_addr; ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, address entry) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ beq(rs, rt, offset16(entry)); ++ } else { // Far jump ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ beq_far(rs, rt, target(L)); ++ } else { ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, address entry) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ bne(rs, rt, offset16(entry)); ++ } else { // Far jump ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ bne_far(rs, rt, target(L)); ++ } else { ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::blt_far(Register rs, Register rt, address entry, bool is_signed) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ if (is_signed) { ++ blt(rs, rt, offset16(entry)); ++ } else { ++ bltu(rs, rt, offset16(entry)); ++ } ++ } else { // Far jump ++ Label not_jump; ++ if (is_signed) { ++ bge(rs, rt, not_jump); ++ } else { ++ bgeu(rs, rt, not_jump); ++ } ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::blt_far(Register rs, Register rt, Label& L, bool is_signed) { ++ if (L.is_bound()) { ++ blt_far(rs, rt, target(L), is_signed); ++ } else { ++ Label not_jump; ++ if (is_signed) { ++ bge(rs, rt, not_jump); ++ } else { ++ bgeu(rs, rt, not_jump); ++ } ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bge_far(Register rs, Register rt, address entry, bool is_signed) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ if (is_signed) { ++ bge(rs, rt, offset16(entry)); ++ } else { ++ bgeu(rs, rt, offset16(entry)); ++ } ++ } else { // Far jump ++ Label not_jump; ++ if (is_signed) { ++ blt(rs, rt, not_jump); ++ } else { ++ bltu(rs, rt, not_jump); ++ } ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bge_far(Register rs, Register rt, Label& L, bool is_signed) { ++ if (L.is_bound()) { ++ bge_far(rs, rt, target(L), is_signed); ++ } else { ++ Label not_jump; ++ if (is_signed) { ++ blt(rs, rt, not_jump); ++ } else { ++ bltu(rs, rt, not_jump); ++ } ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::beq_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ bne(rs, rt, not_taken); ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::bne_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ beq(rs, rt, not_taken); ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::bc1t_long(Label& L) { ++ Label not_taken; ++ bceqz(FCC0, not_taken); ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::blt_long(Register rs, Register rt, Label& L, bool is_signed) { ++ Label not_taken; ++ if (is_signed) { ++ bge(rs, rt, not_taken); ++ } else { ++ bgeu(rs, rt, not_taken); ++ } ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::bge_long(Register rs, Register rt, Label& L, bool is_signed) { ++ Label not_taken; ++ if (is_signed) { ++ blt(rs, rt, not_taken); ++ } else { ++ bltu(rs, rt, not_taken); ++ } ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::bc1f_long(Label& L) { ++ Label not_taken; ++ bcnez(FCC0, not_taken); ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::b_far(Label& L) { ++ if (L.is_bound()) { ++ b_far(target(L)); ++ } else { ++ L.add_patch_at(code(), locator()); ++ if (ForceUnreachable) { ++ patchable_jump_far(R0, 0); ++ } else { ++ b(0); ++ } ++ } ++} ++ ++void MacroAssembler::b_far(address entry) { ++ jlong offs = entry - pc(); ++ if (reachable_from_branch_short(offs)) { // Short jump ++ b(offset26(entry)); ++ } else { // Far jump ++ patchable_jump_far(R0, offs); ++ } ++} ++ ++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) { ++ ldx_d(rt, base, offset); ++} ++ ++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) { ++ stx_d(rt, base, offset); ++} ++ ++Address MacroAssembler::as_Address(AddressLiteral adr) { ++ return Address(adr.target(), adr.rspec()); ++} ++ ++Address MacroAssembler::as_Address(ArrayAddress adr) { ++ return Address::make_array(adr); ++} ++ ++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). ++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { ++ li(tmp_reg1, inc); ++ li(tmp_reg2, counter_addr); ++ amadd_w(R0, tmp_reg1, tmp_reg2); ++} ++ ++void MacroAssembler::reserved_stack_check() { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // testing if reserved zone needs to be enabled ++ Label no_reserved_zone_enabling; ++ ++ ld_d(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); ++ sub_d(AT, SP, AT); ++ blt(AT, R0, no_reserved_zone_enabling); ++ ++ enter(); // RA and FP are live. ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); ++ leave(); ++ ++ // We have already removed our own frame. ++ // throw_delayed_StackOverflowError will think that it's been ++ // called by our caller. ++ li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry()); ++ jr(AT); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++} ++ ++int MacroAssembler::biased_locking_enter(Register lock_reg, ++ Register obj_reg, ++ Register swap_reg, ++ Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, ++ Label* slow_case, ++ BiasedLockingCounters* counters) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ bool need_tmp_reg = false; ++ if (tmp_reg == noreg) { ++ need_tmp_reg = true; ++ tmp_reg = T4; ++ } ++ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); ++ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); ++ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); ++ Address saved_mark_addr(lock_reg, 0); ++ ++ // Biased locking ++ // See whether the lock is currently biased toward our thread and ++ // whether the epoch is still valid ++ // Note that the runtime guarantees sufficient alignment of JavaThread ++ // pointers to allow age to be placed into low bits ++ // First check to see whether biasing is even enabled for this object ++ Label cas_label; ++ int null_check_offset = -1; ++ if (!swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ ld_ptr(swap_reg, mark_addr); ++ } ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ move(tmp_reg, swap_reg); ++ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ addi_d(AT, R0, markOopDesc::biased_lock_pattern); ++ sub_d(AT, AT, tmp_reg); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ ++ bne(AT, R0, cas_label); ++ ++ ++ // The bias pattern is present in the object's header. Need to check ++ // whether the bias owner and the epoch are both still current. ++ // Note that because there is no current thread register on LA we ++ // need to store off the mark word we read out of the object to ++ // avoid reloading it and needing to recheck invariants below. This ++ // store is unfortunate but it makes the overall code shorter and ++ // simpler. ++ st_ptr(swap_reg, saved_mark_addr); ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ if (swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ xorr(tmp_reg, tmp_reg, swap_reg); ++#ifndef OPT_THREAD ++ get_thread(swap_reg); ++ xorr(swap_reg, swap_reg, tmp_reg); ++#else ++ xorr(swap_reg, TREG, tmp_reg); ++#endif ++ ++ li(AT, ~((int) markOopDesc::age_mask_in_place)); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(swap_reg, R0, L); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ beq(swap_reg, R0, done); ++ Label try_revoke_bias; ++ Label try_rebias; ++ ++ // At this point we know that the header has the bias pattern and ++ // that we are not the bias owner in the current epoch. We need to ++ // figure out more details about the state of the header in order to ++ // know what operations can be legally performed on the object's ++ // header. ++ ++ // If the low three bits in the xor result aren't clear, that means ++ // the prototype header is no longer biased and we have to revoke ++ // the bias on this object. ++ ++ li(AT, markOopDesc::biased_lock_mask_in_place); ++ andr(AT, swap_reg, AT); ++ bne(AT, R0, try_revoke_bias); ++ // Biasing is still enabled for this data type. See whether the ++ // epoch of the current bias is still valid, meaning that the epoch ++ // bits of the mark word are equal to the epoch bits of the ++ // prototype header. (Note that the prototype header's epoch bits ++ // only change at a safepoint.) If not, attempt to rebias the object ++ // toward the current thread. Note that we must be absolutely sure ++ // that the current epoch is invalid in order to do this because ++ // otherwise the manipulations it performs on the mark word are ++ // illegal. ++ ++ li(AT, markOopDesc::epoch_mask_in_place); ++ andr(AT,swap_reg, AT); ++ bne(AT, R0, try_rebias); ++ // The epoch of the current bias is still valid but we know nothing ++ // about the owner; it might be set or it might be clear. Try to ++ // acquire the bias of the object using an atomic operation. If this ++ // fails we will go in to the runtime to revoke the object's bias. ++ // Note that we first construct the presumed unbiased header so we ++ // don't accidentally blow away another thread's valid bias. ++ ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ li(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++#ifndef OPT_THREAD ++ get_thread(tmp_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++#else ++ orr(tmp_reg, TREG, swap_reg); ++#endif ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, this means that ++ // another thread succeeded in biasing it toward itself and we ++ // need to revoke that bias. The revocation will occur in the ++ // interpreter runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ } ++ b(done); ++ ++ bind(try_rebias); ++ // At this point we know the epoch has expired, meaning that the ++ // current "bias owner", if any, is actually invalid. Under these ++ // circumstances _only_, we are allowed to use the current header's ++ // value as the comparison value when doing the cas to acquire the ++ // bias in the current epoch. In other words, we allow transfer of ++ // the bias from one thread to another directly in this situation. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++#ifndef OPT_THREAD ++ get_thread(swap_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++#else ++ orr(tmp_reg, tmp_reg, TREG); ++#endif ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, then another thread ++ // succeeded in biasing it toward itself and we need to revoke that ++ // bias. The revocation will occur in the runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ } ++ ++ b(done); ++ bind(try_revoke_bias); ++ // The prototype mark in the klass doesn't have the bias bit set any ++ // more, indicating that objects of this data type are not supposed ++ // to be biased any more. We are going to try to reset the mark of ++ // this object to the prototype value and fall through to the ++ // CAS-based locking scheme. Note that if our CAS fails, it means ++ // that another thread raced us for the privilege of revoking the ++ // bias of this particular object, so it's okay to continue in the ++ // normal locking code. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // Fall through to the normal CAS-based lock, because no matter what ++ // the result of the above CAS, some thread must have succeeded in ++ // removing the bias bit from the object's header. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ ++ bind(cas_label); ++ return null_check_offset; ++} ++ ++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ ++ // Check for biased locking unlock case, which is a no-op ++ // Note: we do not have to check the thread ID for two reasons. ++ // First, the interpreter checks for IllegalMonitorStateException at ++ // a higher level. Second, if the bias was revoked while we held the ++ // lock, the object could not be rebiased toward another thread, so ++ // the bias bit would be clear. ++ ld_d(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); ++ addi_d(AT, R0, markOopDesc::biased_lock_pattern); ++ ++ beq(AT, temp_reg, done); ++} ++ ++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf ++// this method will handle the stack problem, you need not to preserve the stack space for the argument now ++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { ++ Label L, E; ++ ++ assert(number_of_arguments <= 4, "just check"); ++ ++ andi(AT, SP, 0xf); ++ beq(AT, R0, L); ++ addi_d(SP, SP, -8); ++ call(entry_point, relocInfo::runtime_call_type); ++ addi_d(SP, SP, 8); ++ b(E); ++ ++ bind(L); ++ call(entry_point, relocInfo::runtime_call_type); ++ bind(E); ++} ++ ++void MacroAssembler::jmp(address entry) { ++ jlong offs = entry - pc(); ++ if (reachable_from_branch_short(offs)) { // Short jump ++ b(offset26(entry)); ++ } else { // Far jump ++ patchable_jump_far(R0, offs); ++ } ++} ++ ++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::none: ++ jmp(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ patchable_jump(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::jmp_far(Label& L) { ++ if (L.is_bound()) { ++ assert(target(L) != NULL, "jmp most probably wrong"); ++ patchable_jump(target(L), true /* force patchable */); ++ } else { ++ L.add_patch_at(code(), locator()); ++ patchable_jump_far(R0, 0); ++ } ++} ++ ++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_li52(AT, (long)obj); ++ st_d(AT, dst); ++} ++ ++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_li52(dst, (long)obj); ++} ++ ++void MacroAssembler::call(address entry) { ++ jlong offs = entry - pc(); ++ if (reachable_from_branch_short(offs)) { // Short call (pc-rel) ++ bl(offset26(entry)); ++ } else if (is_simm(offs, 38)) { // Far call (pc-rel) ++ patchable_jump_far(RA, offs); ++ } else { // Long call (absolute) ++ call_long(entry); ++ } ++} ++ ++void MacroAssembler::call(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::none: ++ call(entry); ++ break; ++ case relocInfo::runtime_call_type: ++ if (!is_simm(entry - pc(), 38)) { ++ call_long(entry); ++ break; ++ } ++ // fallthrough ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ patchable_call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::call(address entry, RelocationHolder& rh){ ++ switch (rh.type()) { ++ case relocInfo::none: ++ call(entry); ++ break; ++ case relocInfo::runtime_call_type: ++ if (!is_simm(entry - pc(), 38)) { ++ call_long(entry); ++ break; ++ } ++ // fallthrough ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rh); ++ patchable_call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::call_long(address entry) { ++ jlong value = (jlong)entry; ++ lu12i_w(T4, split_low20(value >> 12)); ++ lu32i_d(T4, split_low20(value >> 32)); ++ jirl(RA, T4, split_low12(value)); ++} ++ ++address MacroAssembler::ic_call(address entry, jint method_index) { ++ RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); ++ patchable_li52(IC_Klass, (long)Universe::non_oop_word()); ++ assert(entry != NULL, "call most probably wrong"); ++ InstructionMark im(this); ++ return trampoline_call(AddressLiteral(entry, rh)); ++} ++ ++void MacroAssembler::c2bool(Register r) { ++ sltu(r, R0, r); ++} ++ ++#ifndef PRODUCT ++extern "C" void findpc(intptr_t x); ++#endif ++ ++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { ++ if ( ShowMessageBoxOnError ) { ++ JavaThreadState saved_state = JavaThread::current()->thread_state(); ++ JavaThread::current()->set_thread_state(_thread_in_vm); ++ { ++ // In order to get locks work, we need to fake a in_VM state ++ ttyLocker ttyl; ++ ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); ++ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { ++ BytecodeCounter::print(); ++ } ++ ++ } ++ ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); ++ } ++ else ++ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); ++} ++ ++ ++void MacroAssembler::stop(const char* msg) { ++ li(A0, (long)msg); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ brk(17); ++} ++ ++void MacroAssembler::warn(const char* msg) { ++ pushad(); ++ li(A0, (long)msg); ++ push(S2); ++ li(AT, -(StackAlignmentInBytes)); ++ move(S2, SP); // use S2 as a sender SP holder ++ andr(SP, SP, AT); // align stack as required by ABI ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ move(SP, S2); // use S2 as a sender SP holder ++ pop(S2); ++ popad(); ++} ++ ++void MacroAssembler::increment(Register reg, int imm) { ++ if (!imm) return; ++ if (is_simm(imm, 12)) { ++ addi_d(reg, reg, imm); ++ } else { ++ li(AT, imm); ++ add_d(reg, reg, AT); ++ } ++} ++ ++void MacroAssembler::decrement(Register reg, int imm) { ++ increment(reg, -imm); ++} ++ ++void MacroAssembler::increment(Address addr, int imm) { ++ if (!imm) return; ++ assert(is_simm(imm, 12), "must be"); ++ ld_ptr(AT, addr); ++ addi_d(AT, AT, imm); ++ st_ptr(AT, addr); ++} ++ ++void MacroAssembler::decrement(Address addr, int imm) { ++ increment(addr, -imm); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions) { ++ call_VM_helper(oop_result, entry_point, 0, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ call_VM_helper(oop_result, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); ++ assert(arg_2 != A1, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ // debugging support ++ assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); ++ assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); ++ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); ++ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); ++ ++ assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp"); ++ ++ // set last Java frame before call ++ Label before_call; ++ bind(before_call); ++ set_last_Java_frame(java_thread, last_java_sp, FP, before_call); ++ ++ // do the call ++ move(A0, java_thread); ++ call(entry_point, relocInfo::runtime_call_type); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ get_thread(java_thread); ++#else ++#ifdef ASSERT ++ { ++ Label L; ++ get_thread(AT); ++ beq(java_thread, AT, L); ++ stop("MacroAssembler::call_VM_base: TREG not callee saved?"); ++ bind(L); ++ } ++#endif ++#endif ++ ++ // discard thread and arguments ++ ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // reset last Java frame ++ reset_last_Java_frame(java_thread, false); ++ ++ check_and_handle_popframe(java_thread); ++ check_and_handle_earlyret(java_thread); ++ if (check_exceptions) { ++ // check for pending exceptions (java_thread is set upon return) ++ Label L; ++ ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ beq(AT, R0, L); ++ li(AT, target(before_call)); ++ push(AT); ++ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ bind(L); ++ } ++ ++ // get oop result if there is one and reset the value in the thread ++ if (oop_result->is_valid()) { ++ ld_d(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ st_d(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ verify_oop(oop_result); ++ } ++} ++ ++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { ++ move(V0, SP); ++ //we also reserve space for java_thread here ++ li(AT, -(StackAlignmentInBytes)); ++ andr(SP, SP, AT); ++ call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { ++ call_VM_leaf_base(entry_point, number_of_arguments); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { ++ if (arg_0 != A0) move(A0, arg_0); ++ call_VM_leaf(entry_point, 1); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ call_VM_leaf(entry_point, 2); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); ++ call_VM_leaf(entry_point, 3); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point) { ++ MacroAssembler::call_VM_leaf_base(entry_point, 0); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1) { ++ if (arg_1 != A0) move(A0, arg_1); ++ MacroAssembler::call_VM_leaf_base(entry_point, 1); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 2); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 3); ++} ++ ++void MacroAssembler::check_and_handle_earlyret(Register java_thread) { ++} ++ ++void MacroAssembler::check_and_handle_popframe(Register java_thread) { ++} ++ ++void MacroAssembler::null_check(Register reg, int offset) { ++ if (needs_explicit_null_check(offset)) { ++ // provoke OS NULL exception if reg = NULL by ++ // accessing M[reg] w/o changing any (non-CC) registers ++ // NOTE: cmpl is plenty here to provoke a segv ++ ld_w(AT, reg, 0); ++ } else { ++ // nothing to do, (later) access of M[reg + offset] ++ // will provoke OS NULL exception if reg = NULL ++ } ++} ++ ++void MacroAssembler::enter() { ++ push2(RA, FP); ++ move(FP, SP); ++} ++ ++void MacroAssembler::leave() { ++ move(SP, FP); ++ pop2(RA, FP); ++} ++ ++void MacroAssembler::build_frame(int framesize) { ++ assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA"); ++ assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment"); ++ if (Assembler::is_simm(-framesize, 12)) { ++ addi_d(SP, SP, -framesize); ++ st_ptr(FP, Address(SP, framesize - 2 * wordSize)); ++ st_ptr(RA, Address(SP, framesize - 1 * wordSize)); ++ if (PreserveFramePointer) ++ addi_d(FP, SP, framesize - 2 * wordSize); ++ } else { ++ addi_d(SP, SP, -2 * wordSize); ++ st_ptr(FP, Address(SP, 0 * wordSize)); ++ st_ptr(RA, Address(SP, 1 * wordSize)); ++ if (PreserveFramePointer) ++ move(FP, SP); ++ li(SCR1, framesize - 2 * wordSize); ++ sub_d(SP, SP, SCR1); ++ } ++} ++ ++void MacroAssembler::remove_frame(int framesize) { ++ assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA"); ++ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); ++ if (Assembler::is_simm(framesize, 12)) { ++ ld_ptr(FP, Address(SP, framesize - 2 * wordSize)); ++ ld_ptr(RA, Address(SP, framesize - 1 * wordSize)); ++ addi_d(SP, SP, framesize); ++ } else { ++ li(SCR1, framesize - 2 * wordSize); ++ add_d(SP, SP, SCR1); ++ ld_ptr(FP, Address(SP, 0 * wordSize)); ++ ld_ptr(RA, Address(SP, 1 * wordSize)); ++ addi_d(SP, SP, 2 * wordSize); ++ } ++} ++ ++void MacroAssembler::unimplemented(const char* what) { ++ const char* buf = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("unimplemented: %s", what); ++ buf = code_string(ss.as_string()); ++ } ++ stop(buf); ++} ++ ++void MacroAssembler::get_thread(Register thread) { ++#ifdef MINIMIZE_RAM_USAGE ++ Register tmp; ++ ++ if (thread == AT) ++ tmp = T4; ++ else ++ tmp = AT; ++ ++ move(thread, SP); ++ shr(thread, PAGE_SHIFT); ++ ++ push(tmp); ++ li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1)); ++ andr(thread, thread, tmp); ++ shl(thread, Address::times_ptr); // sizeof(Thread *) ++ li(tmp, (long)ThreadLocalStorage::sp_map_addr()); ++ add_d(tmp, tmp, thread); ++ ld_ptr(thread, tmp, 0); ++ pop(tmp); ++#else ++ if (thread != V0) { ++ push(V0); ++ } ++ pushad_except_v0(); ++ ++ push(S5); ++ move(S5, SP); ++ li(AT, -StackAlignmentInBytes); ++ andr(SP, SP, AT); ++ // TODO: confirm reloc ++ call(CAST_FROM_FN_PTR(address, Thread::current), relocInfo::runtime_call_type); ++ move(SP, S5); ++ pop(S5); ++ ++ popad_except_v0(); ++ if (thread != V0) { ++ move(thread, V0); ++ pop(V0); ++ } ++#endif // MINIMIZE_RAM_USAGE ++} ++ ++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T1; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // we must set sp to zero to clear frame ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is possible ++ // that we need it only for debugging ++ if(clear_fp) { ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); ++} ++ ++void MacroAssembler::reset_last_Java_frame(bool clear_fp) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // we must set sp to zero to clear frame ++ st_d(R0, Address(thread, JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is ++ // possible that we need it only for debugging ++ if (clear_fp) { ++ st_d(R0, Address(thread, JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ st_d(R0, Address(thread, JavaThread::last_Java_pc_offset())); ++} ++ ++// Write serialization page so VM thread can do a pseudo remote membar. ++// We use the current thread pointer to calculate a thread specific ++// offset to write to within the page. This minimizes bus traffic ++// due to cache line collision. ++void MacroAssembler::serialize_memory(Register thread, Register tmp) { ++ assert_different_registers(AT, tmp); ++ juint sps = os::get_serialize_page_shift_count(); ++ juint lsb = sps + 2; ++ juint msb = sps + log2_uint(os::vm_page_size()) - 1; ++ bstrpick_w(AT, thread, msb, lsb); ++ li(tmp, os::get_memory_serialize_page()); ++ alsl_d(tmp, AT, tmp, Address::times_2 - 1); ++ st_w(R0, tmp, 0); ++} ++ ++void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld_d(AT, thread_reg, in_bytes(Thread::polling_page_offset())); ++ andi(AT, AT, SafepointMechanism::poll_bit()); ++ bne(AT, R0, slow_path); ++ } else { ++ li(AT, SafepointSynchronize::address_of_state()); ++ ld_w(AT, AT, 0); ++ addi_d(AT, AT, -SafepointSynchronize::_not_synchronized); ++ bne(AT, R0, slow_path); ++ } ++} ++ ++// Just like safepoint_poll, but use an acquiring load for thread- ++// local polling. ++// ++// We need an acquire here to ensure that any subsequent load of the ++// global SafepointSynchronize::_state flag is ordered after this load ++// of the local Thread::_polling page. We don't want this poll to ++// return false (i.e. not safepointing) and a later poll of the global ++// SafepointSynchronize::_state spuriously to return true. ++// ++// This is to avoid a race when we're in a native->Java transition ++// racing the code which wakes up from a safepoint. ++// ++void MacroAssembler::safepoint_poll_acquire(Label& slow_path, Register thread_reg) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld_d(AT, thread_reg, in_bytes(Thread::polling_page_offset())); ++ membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore)); ++ andi(AT, AT, SafepointMechanism::poll_bit()); ++ bne(AT, R0, slow_path); ++ } else { ++ safepoint_poll(slow_path, thread_reg); ++ } ++} ++ ++// Calls to C land ++// ++// When entering C land, the fp, & sp of the last Java frame have to be recorded ++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp ++// has to be reset to 0. This is required to allow proper stack traversal. ++void MacroAssembler::set_last_Java_frame(Register java_thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // last_java_pc ++ lipc(AT, last_java_pc); ++ st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + ++ JavaFrameAnchor::last_Java_pc_offset())); ++ ++ st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++} ++ ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc) { ++ set_last_Java_frame(NOREG, last_java_sp, last_java_fp, last_java_pc); ++} ++ ++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. ++void MacroAssembler::tlab_allocate(Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Register t2, ++ Label& slow_case) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void MacroAssembler::eden_allocate(Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Label& slow_case) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); ++} ++ ++ ++void MacroAssembler::incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ if (!thread->is_valid()) { ++#ifndef OPT_THREAD ++ assert(t1->is_valid(), "need temp reg"); ++ thread = t1; ++ get_thread(thread); ++#else ++ thread = TREG; ++#endif ++ } ++ ++ ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++ if (var_size_in_bytes->is_valid()) { ++ add_d(AT, AT, var_size_in_bytes); ++ } else { ++ addi_d(AT, AT, con_size_in_bytes); ++ } ++ st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++} ++ ++void MacroAssembler::li(Register rd, jlong value) { ++ jlong hi12 = bitfield(value, 52, 12); ++ jlong lo52 = bitfield(value, 0, 52); ++ ++ if (hi12 != 0 && lo52 == 0) { ++ lu52i_d(rd, R0, hi12); ++ } else { ++ jlong hi20 = bitfield(value, 32, 20); ++ jlong lo20 = bitfield(value, 12, 20); ++ jlong lo12 = bitfield(value, 0, 12); ++ ++ if (lo20 == 0) { ++ ori(rd, R0, lo12); ++ } else if (bitfield(simm12(lo12), 12, 20) == lo20) { ++ addi_w(rd, R0, simm12(lo12)); ++ } else { ++ lu12i_w(rd, lo20); ++ if (lo12 != 0) ++ ori(rd, rd, lo12); ++ } ++ if (hi20 != bitfield(simm20(lo20), 20, 20)) ++ lu32i_d(rd, hi20); ++ if (hi12 != bitfield(simm20(hi20), 20, 12)) ++ lu52i_d(rd, rd, hi12); ++ } ++} ++ ++void MacroAssembler::patchable_li52(Register rd, jlong value) { ++ int count = 0; ++ ++ if (value <= max_jint && value >= min_jint) { ++ if (is_simm(value, 12)) { ++ addi_d(rd, R0, value); ++ count++; ++ } else { ++ lu12i_w(rd, split_low20(value >> 12)); ++ count++; ++ if (split_low12(value)) { ++ ori(rd, rd, split_low12(value)); ++ count++; ++ } ++ } ++ } else if (is_simm(value, 52)) { ++ lu12i_w(rd, split_low20(value >> 12)); ++ count++; ++ if (split_low12(value)) { ++ ori(rd, rd, split_low12(value)); ++ count++; ++ } ++ lu32i_d(rd, split_low20(value >> 32)); ++ count++; ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 3) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::lipc(Register rd, Label& L) { ++ if (L.is_bound()) { ++ jint offs = (target(L) - pc()) >> 2; ++ guarantee(is_simm(offs, 20), "Not signed 20-bit offset"); ++ pcaddi(rd, offs); ++ } else { ++ InstructionMark im(this); ++ L.add_patch_at(code(), locator()); ++ pcaddi(rd, 0); ++ } ++} ++ ++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { ++ assert(UseCompressedClassPointers, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int klass_index = oop_recorder()->find_index(k); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ long narrowKlass = (long)Klass::encode_klass(k); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_li52(dst, narrowKlass); ++} ++ ++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { ++ assert(UseCompressedOops, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int oop_index = oop_recorder()->find_index(obj); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_li52(dst, oop_index); ++} ++ ++// ((OopHandle)result).resolve(); ++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { ++ // OopHandle::resolve is an indirection. ++ access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG); ++} ++ ++void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { ++ // get mirror ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ ld_ptr(mirror, method, in_bytes(Method::const_offset())); ++ ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset())); ++ ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes()); ++ ld_ptr(mirror, mirror, mirror_offset); ++ resolve_oop_handle(mirror, tmp); ++} ++ ++void MacroAssembler::verify_oop(Register reg, const char* s) { ++ if (!VerifyOops) return; ++ ++ const char * b = NULL; ++ stringStream ss; ++ ss.print("verify_oop: %s: %s", reg->name(), s); ++ b = code_string(ss.as_string()); ++ ++ addi_d(SP, SP, -6 * wordSize); ++ st_ptr(SCR1, Address(SP, 0 * wordSize)); ++ st_ptr(SCR2, Address(SP, 1 * wordSize)); ++ st_ptr(RA, Address(SP, 2 * wordSize)); ++ st_ptr(A0, Address(SP, 3 * wordSize)); ++ st_ptr(A1, Address(SP, 4 * wordSize)); ++ ++ move(A1, reg); ++ patchable_li52(A0, (uintptr_t)(address)b); // Fixed size instructions ++ li(SCR2, StubRoutines::verify_oop_subroutine_entry_address()); ++ ld_ptr(SCR2, Address(SCR2)); ++ jalr(SCR2); ++ ++ ld_ptr(SCR1, Address(SP, 0 * wordSize)); ++ ld_ptr(SCR2, Address(SP, 1 * wordSize)); ++ ld_ptr(RA, Address(SP, 2 * wordSize)); ++ ld_ptr(A0, Address(SP, 3 * wordSize)); ++ ld_ptr(A1, Address(SP, 4 * wordSize)); ++ addi_d(SP, SP, 6 * wordSize); ++} ++ ++void MacroAssembler::verify_oop_addr(Address addr, const char* s) { ++ if (!VerifyOops) return; ++ ++ const char* b = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("verify_oop_addr: %s", s); ++ b = code_string(ss.as_string()); ++ } ++ ++ addi_d(SP, SP, -6 * wordSize); ++ st_ptr(SCR1, Address(SP, 0 * wordSize)); ++ st_ptr(SCR2, Address(SP, 1 * wordSize)); ++ st_ptr(RA, Address(SP, 2 * wordSize)); ++ st_ptr(A0, Address(SP, 3 * wordSize)); ++ st_ptr(A1, Address(SP, 4 * wordSize)); ++ ++ patchable_li52(A0, (uintptr_t)(address)b); // Fixed size instructions ++ // addr may contain sp so we will have to adjust it based on the ++ // pushes that we just did. ++ if (addr.uses(SP)) { ++ lea(A1, addr); ++ ld_ptr(A1, Address(A1, 6 * wordSize)); ++ } else { ++ ld_ptr(A1, addr); ++ } ++ ++ // call indirectly to solve generation ordering problem ++ li(SCR2, StubRoutines::verify_oop_subroutine_entry_address()); ++ ld_ptr(SCR2, Address(SCR2)); ++ jalr(SCR2); ++ ++ ld_ptr(SCR1, Address(SP, 0 * wordSize)); ++ ld_ptr(SCR2, Address(SP, 1 * wordSize)); ++ ld_ptr(RA, Address(SP, 2 * wordSize)); ++ ld_ptr(A0, Address(SP, 3 * wordSize)); ++ ld_ptr(A1, Address(SP, 4 * wordSize)); ++ addi_d(SP, SP, 6 * wordSize); ++} ++ ++// used registers : SCR1, SCR2 ++void MacroAssembler::verify_oop_subroutine() { ++ // RA: ra ++ // A0: char* error message ++ // A1: oop object to verify ++ Label exit, error; ++ // increment counter ++ li(SCR2, (long)StubRoutines::verify_oop_count_addr()); ++ ld_w(SCR1, SCR2, 0); ++ addi_d(SCR1, SCR1, 1); ++ st_w(SCR1, SCR2, 0); ++ ++ // make sure object is 'reasonable' ++ beqz(A1, exit); // if obj is NULL it is ok ++ ++ // Check if the oop is in the right area of memory ++ // const int oop_mask = Universe::verify_oop_mask(); ++ // const int oop_bits = Universe::verify_oop_bits(); ++ const uintptr_t oop_mask = Universe::verify_oop_mask(); ++ const uintptr_t oop_bits = Universe::verify_oop_bits(); ++ li(SCR1, oop_mask); ++ andr(SCR2, A1, SCR1); ++ li(SCR1, oop_bits); ++ bne(SCR2, SCR1, error); ++ ++ // make sure klass is 'reasonable' ++ // add for compressedoops ++ load_klass(SCR2, A1); ++ beqz(SCR2, error); // if klass is NULL it is broken ++ // return if everything seems ok ++ bind(exit); ++ ++ jr(RA); ++ ++ // handle errors ++ bind(error); ++ pushad(); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ popad(); ++ jr(RA); ++} ++ ++void MacroAssembler::verify_tlab(Register t1, Register t2) { ++#ifdef ASSERT ++ assert_different_registers(t1, t2, AT); ++ if (UseTLAB && VerifyOops) { ++ Label next, ok; ++ ++ get_thread(t1); ++ ++ ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); ++ bgeu(t2, AT, next); ++ ++ stop("assert(top >= start)"); ++ ++ bind(next); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); ++ bgeu(AT, t2, ok); ++ ++ stop("assert(top <= end)"); ++ ++ bind(ok); ++ ++ } ++#endif ++} ++ ++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++ return RegisterOrConstant(tmp); ++} ++ ++void MacroAssembler::hswap(Register reg) { ++ //short ++ //andi(reg, reg, 0xffff); ++ srli_w(AT, reg, 8); ++ slli_w(reg, reg, 24); ++ srai_w(reg, reg, 16); ++ orr(reg, reg, AT); ++} ++ ++void MacroAssembler::huswap(Register reg) { ++ srli_d(AT, reg, 8); ++ slli_d(reg, reg, 24); ++ srli_d(reg, reg, 16); ++ orr(reg, reg, AT); ++ bstrpick_d(reg, reg, 15, 0); ++} ++ ++// something funny to do this will only one more register AT ++// 32 bits ++void MacroAssembler::swap(Register reg) { ++ srli_w(AT, reg, 8); ++ slli_w(reg, reg, 24); ++ orr(reg, reg, AT); ++ //reg : 4 1 2 3 ++ srli_w(AT, AT, 16); ++ xorr(AT, AT, reg); ++ andi(AT, AT, 0xff); ++ //AT : 0 0 0 1^3); ++ xorr(reg, reg, AT); ++ //reg : 4 1 2 1 ++ slli_w(AT, AT, 16); ++ xorr(reg, reg, AT); ++ //reg : 4 3 2 1 ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register resflag, bool retold, bool barrier) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ Label again, succ, fail; ++ ++ bind(again); ++ ll_d(resflag, addr); ++ bne(resflag, oldval, fail); ++ move(resflag, newval); ++ sc_d(resflag, addr); ++ beqz(resflag, again); ++ b(succ); ++ ++ bind(fail); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ move(resflag, R0); ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register tmp, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ ll_d(tmp, addr); ++ bne(tmp, oldval, neq); ++ move(tmp, newval); ++ sc_d(tmp, addr); ++ beqz(tmp, again); ++ b(succ); ++ ++ bind(neq); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) ++ b(*fail); ++} ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, ++ Register resflag, bool sign, bool retold, bool barrier) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ Label again, succ, fail; ++ ++ bind(again); ++ ll_w(resflag, addr); ++ if (!sign) ++ lu32i_d(resflag, 0); ++ bne(resflag, oldval, fail); ++ move(resflag, newval); ++ sc_w(resflag, addr); ++ beqz(resflag, again); ++ b(succ); ++ ++ bind(fail); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ move(resflag, R0); ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ ll_w(tmp, addr); ++ if (!sign) ++ lu32i_d(tmp, 0); ++ bne(tmp, oldval, neq); ++ move(tmp, newval); ++ sc_w(tmp, addr); ++ beqz(tmp, again); ++ b(succ); ++ ++ bind(neq); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) ++ b(*fail); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++#ifdef COMPILER2 ++// Fast_Lock and Fast_Unlock used by C2 ++ ++// Because the transitions from emitted code to the runtime ++// monitorenter/exit helper stubs are so slow it's critical that ++// we inline both the stack-locking fast-path and the inflated fast path. ++// ++// See also: cmpFastLock and cmpFastUnlock. ++// ++// What follows is a specialized inline transliteration of the code ++// in slow_enter() and slow_exit(). If we're concerned about I$ bloat ++// another option would be to emit TrySlowEnter and TrySlowExit methods ++// at startup-time. These methods would accept arguments as ++// (Obj, Self, box, Scratch) and return success-failure ++// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply ++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. ++// In practice, however, the # of lock sites is bounded and is usually small. ++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer ++// if the processor uses simple bimodal branch predictors keyed by EIP ++// Since the helper routines would be called from multiple synchronization ++// sites. ++// ++// An even better approach would be write "MonitorEnter()" and "MonitorExit()" ++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites ++// to those specialized methods. That'd give us a mostly platform-independent ++// implementation that the JITs could optimize and inline at their pleasure. ++// Done correctly, the only time we'd need to cross to native could would be ++// to park() or unpark() threads. We'd also need a few more unsafe operators ++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and ++// (b) explicit barriers or fence operations. ++// ++// TODO: ++// ++// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). ++// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. ++// Given TLAB allocation, Self is usually manifested in a register, so passing it into ++// the lock operators would typically be faster than reifying Self. ++// ++// * Ideally I'd define the primitives as: ++// fast_lock (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED. ++// fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED ++// Unfortunately ADLC bugs prevent us from expressing the ideal form. ++// Instead, we're stuck with a rather awkward and brittle register assignments below. ++// Furthermore the register assignments are overconstrained, possibly resulting in ++// sub-optimal code near the synchronization site. ++// ++// * Eliminate the sp-proximity tests and just use "== Self" tests instead. ++// Alternately, use a better sp-proximity test. ++// ++// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. ++// Either one is sufficient to uniquely identify a thread. ++// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. ++// ++// * Intrinsify notify() and notifyAll() for the common cases where the ++// object is locked by the calling thread but the waitlist is empty. ++// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). ++// ++// * use jccb and jmpb instead of jcc and jmp to improve code density. ++// But beware of excessive branch density on AMD Opterons. ++// ++// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success ++// or failure of the fast-path. If the fast-path fails then we pass ++// control to the slow-path, typically in C. In Fast_Lock and ++// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 ++// will emit a conditional branch immediately after the node. ++// So we have branches to branches and lots of ICC.ZF games. ++// Instead, it might be better to have C2 pass a "FailureLabel" ++// into Fast_Lock and Fast_Unlock. In the case of success, control ++// will drop through the node. ICC.ZF is undefined at exit. ++// In the case of failure, the node will branch directly to the ++// FailureLabel ++ ++// obj: object to lock ++// box: on-stack box address (displaced header location) ++// tmp: tmp -- KILLED ++// scr: tmp -- KILLED ++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label IsInflated, DONE, DONE_SET; ++ ++ // Ensure the register assignents are disjoint ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg); ++ } ++ ++ if (EmitSync & 1) { ++ move(AT, R0); ++ return; ++ } else ++ if (EmitSync & 2) { ++ Label DONE_LABEL ; ++ if (UseBiasedLocking) { ++ // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); ++ } ++ ++ ld_d(tmpReg, Address(objReg, 0)) ; // fetch markword ++ ori(tmpReg, tmpReg, 0x1); ++ st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg ++ ++ // Recursive locking ++ sub_d(tmpReg, tmpReg, SP); ++ li(AT, (7 - os::vm_page_size() )); ++ andr(tmpReg, tmpReg, AT); ++ st_d(tmpReg, Address(boxReg, 0)); ++ bind(DONE_LABEL) ; ++ } else { ++ // Possible cases that we'll encounter in fast_lock ++ // ------------------------------------------------ ++ // * Inflated ++ // -- unlocked ++ // -- Locked ++ // = by self ++ // = by other ++ // * biased ++ // -- by Self ++ // -- by other ++ // * neutral ++ // * stack-locked ++ // -- by self ++ // = sp-proximity test hits ++ // = sp-proximity test generates false-negative ++ // -- by other ++ // ++ ++ // TODO: optimize away redundant LDs of obj->mark and improve the markword triage ++ // order to reduce the number of conditional branches in the most common cases. ++ // Beware -- there's a subtle invariant that fetch of the markword ++ // at [FETCH], below, will never observe a biased encoding (*101b). ++ // If this invariant is not held we risk exclusion (safety) failure. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL); ++ b(fail); ++ bind(succ); ++ li(resReg, 1); ++ b(DONE); ++ bind(fail); ++ } ++ ++ ld_d(tmpReg, Address(objReg, 0)); //Fetch the markword of the object. ++ andi(AT, tmpReg, markOopDesc::monitor_value); ++ bnez(AT, IsInflated); // inflated vs stack-locked|neutral|bias ++ ++ // Attempt stack-locking ... ++ ori(tmpReg, tmpReg, markOopDesc::unlocked_value); ++ st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ ++ if (PrintBiasedLockingStatistics) { ++ Label SUCC, FAIL; ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg ++ bind(SUCC); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ li(resReg, 1); ++ b(DONE); ++ bind(FAIL); ++ } else { ++ // If cmpxchg is succ, then scrReg = 1 ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg ++ } ++ ++ // Recursive locking ++ // The object is stack-locked: markword contains stack pointer to BasicLock. ++ // Locked by current thread if difference with current SP is less than one page. ++ sub_d(tmpReg, tmpReg, SP); ++ li(AT, 7 - os::vm_page_size()); ++ andr(tmpReg, tmpReg, AT); ++ st_d(tmpReg, Address(boxReg, 0)); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ ++ bnez(tmpReg, L); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ bind(L); ++ } ++ ++ sltui(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0 ++ b(DONE); ++ ++ bind(IsInflated); ++ // The object's monitor m is unlocked iff m->owner == NULL, ++ // otherwise m->owner may contain a thread or a stack address. ++ ++ // TODO: someday avoid the ST-before-CAS penalty by ++ // relocating (deferring) the following ST. ++ // We should also think about trying a CAS without having ++ // fetched _owner. If the CAS is successful we may ++ // avoid an RTO->RTS upgrade on the $line. ++ // Without cast to int32_t a movptr will destroy r10 which is typically obj ++ li(AT, (int32_t)intptr_t(markOopDesc::unused_mark())); ++ st_d(AT, Address(boxReg, 0)); ++ ++ ld_d(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ // if (m->owner != 0) => AT = 0, goto slow path. ++ move(scrReg, R0); ++ bnez(AT, DONE_SET); ++ ++#ifndef OPT_THREAD ++ get_thread(TREG) ; ++#endif ++ // It's inflated and appears unlocked ++ addi_d(tmpReg, tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2); ++ cmpxchg(Address(tmpReg, 0), R0, TREG, scrReg, false, false); ++ // Intentional fall-through into DONE ... ++ ++ bind(DONE_SET); ++ move(resReg, scrReg); ++ ++ // DONE is a hot target - we'd really like to place it at the ++ // start of cache line by padding with NOPs. ++ // See the AMD and Intel software optimization manuals for the ++ // most efficient "long" NOP encodings. ++ // Unfortunately none of our alignment mechanisms suffice. ++ bind(DONE); ++ // At DONE the resReg is set as follows ... ++ // Fast_Unlock uses the same protocol. ++ // resReg == 1 -> Success ++ // resREg == 0 -> Failure - force control through the slow-path ++ ++ // Avoid branch-to-branch on AMD processors ++ // This appears to be superstition. ++ if (EmitSync & 32) nop() ; ++ ++ } ++} ++ ++// obj: object to unlock ++// box: box address (displaced header location), killed. ++// tmp: killed tmp; cannot be obj nor box. ++// ++// Some commentary on balanced locking: ++// ++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. ++// Methods that don't have provably balanced locking are forced to run in the ++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. ++// The interpreter provides two properties: ++// I1: At return-time the interpreter automatically and quietly unlocks any ++// objects acquired the current activation (frame). Recall that the ++// interpreter maintains an on-stack list of locks currently held by ++// a frame. ++// I2: If a method attempts to unlock an object that is not held by the ++// the frame the interpreter throws IMSX. ++// ++// Lets say A(), which has provably balanced locking, acquires O and then calls B(). ++// B() doesn't have provably balanced locking so it runs in the interpreter. ++// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O ++// is still locked by A(). ++// ++// The only other source of unbalanced locking would be JNI. The "Java Native Interface: ++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter ++// should not be unlocked by "normal" java-level locking and vice-versa. The specification ++// doesn't specify what will occur if a program engages in such mixed-mode locking, however. ++ ++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label DONE, DONE_SET, Stacked, Inflated; ++ ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastUnlock"); ++ ++ if (EmitSync & 4) { ++ // Disable - inhibit all inlining. Force control through the slow-path ++ move(AT, R0); ++ return; ++ } else ++ if (EmitSync & 8) { ++ Label DONE_LABEL ; ++ if (UseBiasedLocking) { ++ biased_locking_exit(objReg, tmpReg, DONE_LABEL); ++ } ++ // classic stack-locking code ... ++ ld_d(tmpReg, Address(boxReg, 0)) ; ++ assert_different_registers(AT, tmpReg); ++ li(AT, 0x1); ++ beq(tmpReg, R0, DONE_LABEL) ; ++ ++ cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); ++ bind(DONE_LABEL); ++ } else { ++ Label CheckSucc; ++ ++ // Critically, the biased locking test must have precedence over ++ // and appear before the (box->dhw == 0) recursive stack-lock test. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_exit(objReg, tmpReg, succ); ++ b(fail); ++ bind(succ); ++ li(resReg, 1); ++ b(DONE); ++ bind(fail); ++ } ++ ++ ld_d(tmpReg, Address(boxReg, 0)); // Examine the displaced header ++ sltui(AT, tmpReg, 1); ++ beqz(tmpReg, DONE_SET); // 0 indicates recursive stack-lock ++ ++ ld_d(tmpReg, Address(objReg, 0)); // Examine the object's markword ++ andi(AT, tmpReg, markOopDesc::monitor_value); ++ beqz(AT, Stacked); // Inflated? ++ ++ bind(Inflated); ++ // It's inflated. ++ // Despite our balanced locking property we still check that m->_owner == Self ++ // as java routines or native JNI code called by this thread might ++ // have released the lock. ++ // Refer to the comments in synchronizer.cpp for how we might encode extra ++ // state in _succ so we can avoid fetching EntryList|cxq. ++ // ++ // I'd like to add more cases in fast_lock() and fast_unlock() -- ++ // such as recursive enter and exit -- but we have to be wary of ++ // I$ bloat, T$ effects and BP$ effects. ++ // ++ // If there's no contention try a 1-0 exit. That is, exit without ++ // a costly MEMBAR or CAS. See synchronizer.cpp for details on how ++ // we detect and recover from the race that the 1-0 exit admits. ++ // ++ // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier ++ // before it STs null into _owner, releasing the lock. Updates ++ // to data protected by the critical section must be visible before ++ // we drop the lock (and thus before any other thread could acquire ++ // the lock and observe the fields protected by the lock). ++#ifndef OPT_THREAD ++ get_thread(TREG); ++#endif ++ ++ // It's inflated ++ ld_d(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ xorr(scrReg, scrReg, TREG); ++ ++ ld_d(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2)); ++ orr(scrReg, scrReg, AT); ++ ++ move(AT, R0); ++ bnez(scrReg, DONE_SET); ++ ++ ld_d(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2)); ++ ld_d(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2)); ++ orr(scrReg, scrReg, AT); ++ ++ move(AT, R0); ++ bnez(scrReg, DONE_SET); ++ ++ membar(Assembler::Membar_mask_bits(LoadStore|StoreStore)); ++ st_d(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ li(resReg, 1); ++ b(DONE); ++ ++ bind(Stacked); ++ ld_d(tmpReg, Address(boxReg, 0)); ++ cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); ++ ++ bind(DONE_SET); ++ move(resReg, AT); ++ ++ if (EmitSync & 65536) { ++ bind (CheckSucc); ++ } ++ ++ bind(DONE); ++ ++ // Avoid branch to branch on AMD processors ++ if (EmitSync & 32768) { nop() ; } ++ } ++} ++#endif // COMPILER2 ++ ++void MacroAssembler::align(int modulus) { ++ while (offset() % modulus != 0) nop(); ++} ++ ++ ++void MacroAssembler::verify_FPU(int stack_depth, const char* s) { ++ //Unimplemented(); ++} ++ ++Register caller_saved_registers[] = {T7, T5, T6, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP}; ++Register caller_saved_registers_except_v0[] = {T7, T5, T6, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP}; ++ ++ //TODO: LA ++//In LA, F0~23 are all caller-saved registers ++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13}; ++ ++// We preserve all caller-saved register ++void MacroAssembler::pushad(){ ++ int i; ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ addi_d(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ st_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ addi_d(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++}; ++ ++void MacroAssembler::popad(){ ++ int i; ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ addi_d(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ ld_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ addi_d(SP, SP, len * wordSize); ++}; ++ ++// We preserve all caller-saved register except V0 ++void MacroAssembler::pushad_except_v0() { ++ int i; ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ addi_d(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ st_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ addi_d(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++} ++ ++void MacroAssembler::popad_except_v0() { ++ int i; ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) { ++ fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ addi_d(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ for (i = 0; i < len; i++) { ++ ld_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ addi_d(SP, SP, len * wordSize); ++} ++ ++void MacroAssembler::push2(Register reg1, Register reg2) { ++ addi_d(SP, SP, -16); ++ st_d(reg1, SP, 8); ++ st_d(reg2, SP, 0); ++} ++ ++void MacroAssembler::pop2(Register reg1, Register reg2) { ++ ld_d(reg1, SP, 8); ++ ld_d(reg2, SP, 0); ++ addi_d(SP, SP, 16); ++} ++ ++void MacroAssembler::push(unsigned int bitset) { ++ unsigned char regs[31]; ++ int count = 0; ++ ++ bitset >>= 1; ++ for (int reg = 1; reg < 31; reg++) { ++ if (1 & bitset) ++ regs[count++] = reg; ++ bitset >>= 1; ++ } ++ ++ addi_d(SP, SP, -align_up(count, 2) * wordSize); ++ for (int i = 0; i < count; i ++) ++ st_d(as_Register(regs[i]), SP, i * wordSize); ++} ++ ++void MacroAssembler::pop(unsigned int bitset) { ++ unsigned char regs[31]; ++ int count = 0; ++ ++ bitset >>= 1; ++ for (int reg = 1; reg < 31; reg++) { ++ if (1 & bitset) ++ regs[count++] = reg; ++ bitset >>= 1; ++ } ++ ++ for (int i = 0; i < count; i ++) ++ ld_d(as_Register(regs[i]), SP, i * wordSize); ++ addi_d(SP, SP, align_up(count, 2) * wordSize); ++} ++ ++// for UseCompressedOops Option ++void MacroAssembler::load_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ ld_wu(dst, Address(src, oopDesc::klass_offset_in_bytes())); ++ decode_klass_not_null(dst); ++ } else { ++ ld_d(dst, src, oopDesc::klass_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::store_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ encode_klass_not_null(src); ++ st_w(src, dst, oopDesc::klass_offset_in_bytes()); ++ } else { ++ st_d(src, dst, oopDesc::klass_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::load_prototype_header(Register dst, Register src) { ++ load_klass(dst, src); ++ ld_d(dst, Address(dst, Klass::prototype_header_offset())); ++} ++ ++void MacroAssembler::store_klass_gap(Register dst, Register src) { ++ if (UseCompressedClassPointers) { ++ st_w(src, dst, oopDesc::klass_gap_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, ++ Register tmp1, Register thread_tmp) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } else { ++ bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } ++} ++ ++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, ++ Register tmp1, Register tmp2) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2); ++ } else { ++ bs->store_at(this, decorators, type, dst, src, tmp1, tmp2); ++ } ++} ++ ++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); ++} ++ ++// Doesn't do verfication, generates fixed size code ++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp); ++} ++ ++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, ++ Register tmp2, DecoratorSet decorators) { ++ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); ++} ++ ++// Used for storing NULLs. ++void MacroAssembler::store_heap_oop_null(Address dst) { ++ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); ++} ++ ++#ifdef ASSERT ++void MacroAssembler::verify_heapbase(const char* msg) { ++ assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++} ++#endif ++ ++// Algorithm must match oop.inline.hpp encode_heap_oop. ++void MacroAssembler::encode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++ return; ++ } ++ ++ sub_d(AT, r, S5_heapbase); ++ maskeqz(r, AT, r); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ srli_d(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++ return; ++ } ++ ++ sub_d(AT, src, S5_heapbase); ++ maskeqz(dst, AT, src); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register r) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(r, R0, ok); ++ stop("null oop passed to encode_heap_oop_not_null"); ++ bind(ok); ++ } ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop_not_null"); ++ if (Universe::narrow_oop_base() != NULL) { ++ sub_d(r, r, S5_heapbase); ++ } ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(src, R0, ok); ++ stop("null oop passed to encode_heap_oop_not_null2"); ++ bind(ok); ++ } ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop_not_null2"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ srli_d(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++ return; ++ } ++ ++ sub_d(dst, src, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::decode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ return; ++ } ++ ++ move(AT, r); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ shl(r, LogMinObjAlignmentInBytes); ++ add_d(r, r, S5_heapbase); ++ } ++ } else { ++ add_d(r, r, S5_heapbase); ++ } ++ maskeqz(r, r, AT); ++ verify_oop(r, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++ return; ++ } ++ ++ Register cond; ++ if (dst == src) { ++ cond = AT; ++ move(cond, src); ++ } else { ++ cond = src; ++ } ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ add_d(dst, dst, S5_heapbase); ++ } ++ } else { ++ add_d(dst, src, S5_heapbase); ++ } ++ maskeqz(dst, dst, cond); ++ verify_oop(dst, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register r) { ++ // Note: it will change flags ++ assert(UseCompressedOops, "should only be used for compressed headers"); ++ assert(Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (Universe::narrow_oop_base() != NULL) { ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ shl(r, LogMinObjAlignmentInBytes); ++ add_d(r, r, S5_heapbase); ++ } ++ } else { ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ assert(Universe::narrow_oop_base() == NULL, "sanity"); ++ } ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { ++ assert(UseCompressedOops, "should only be used for compressed headers"); ++ assert(Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (Universe::narrow_oop_base() != NULL) { ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ add_d(dst, dst, S5_heapbase); ++ } ++ } else { ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ assert (Universe::narrow_oop_base() == NULL, "sanity"); ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register r) { ++ if (Universe::narrow_klass_base() != NULL) { ++ if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 ++ && Universe::narrow_klass_shift() == 0) { ++ bstrpick_d(r, r, 31, 0); ++ return; ++ } ++ assert(r != AT, "Encoding a klass in AT"); ++ li(AT, (int64_t)Universe::narrow_klass_base()); ++ sub_d(r, r, AT); ++ } ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shr(r, LogKlassAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register dst, Register src) { ++ if (dst == src) { ++ encode_klass_not_null(src); ++ } else { ++ if (Universe::narrow_klass_base() != NULL) { ++ if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 ++ && Universe::narrow_klass_shift() == 0) { ++ bstrpick_d(dst, src, 31, 0); ++ return; ++ } ++ li(dst, (int64_t)Universe::narrow_klass_base()); ++ sub_d(dst, src, dst); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shr(dst, LogKlassAlignmentInBytes); ++ } ++ } else { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ srli_d(dst, src, LogKlassAlignmentInBytes); ++ } else { ++ move(dst, src); ++ } ++ } ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register r) { ++ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ assert(r != AT, "Decoding a klass in AT"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_klass_base() != NULL) { ++ if (Universe::narrow_klass_shift() == 0) { ++ if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0) { ++ lu32i_d(r, (uint64_t)Universe::narrow_klass_base() >> 32); ++ } else { ++ li(AT, (int64_t)Universe::narrow_klass_base()); ++ add_d(r, r, AT); ++ } ++ } else { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); ++ li(AT, (int64_t)Universe::narrow_klass_base()); ++ alsl_d(r, r, AT, Address::times_8 - 1); ++ } ++ } else { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shl(r, LogKlassAlignmentInBytes); ++ } ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register dst, Register src) { ++ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ if (dst == src) { ++ decode_klass_not_null(dst); ++ } else { ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_klass_base() != NULL) { ++ if (Universe::narrow_klass_shift() == 0) { ++ if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0) { ++ move(dst, src); ++ lu32i_d(dst, (uint64_t)Universe::narrow_klass_base() >> 32); ++ } else { ++ li(dst, (int64_t)Universe::narrow_klass_base()); ++ add_d(dst, dst, src); ++ } ++ } else { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); ++ li(dst, (int64_t)Universe::narrow_klass_base()); ++ alsl_d(dst, src, dst, Address::times_8 - 1); ++ } ++ } else { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ slli_d(dst, src, LogKlassAlignmentInBytes); ++ } else { ++ move(dst, src); ++ } ++ } ++ } ++} ++ ++void MacroAssembler::reinit_heapbase() { ++ if (UseCompressedOops || UseCompressedClassPointers) { ++ if (Universe::heap() != NULL) { ++ if (Universe::narrow_oop_base() == NULL) { ++ move(S5_heapbase, R0); ++ } else { ++ li(S5_heapbase, (int64_t)Universe::narrow_ptrs_base()); ++ } ++ } else { ++ li(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr()); ++ ld_d(S5_heapbase, S5_heapbase, 0); ++ } ++ } ++} ++ ++void MacroAssembler::check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success) { ++//implement ind gen_subtype_check ++ Label L_failure; ++ check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); ++ check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); ++ bind(L_failure); ++} ++ ++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset) { ++ assert_different_registers(sub_klass, super_klass, temp_reg); ++ bool must_load_sco = (super_check_offset.constant_or_zero() == -1); ++ if (super_check_offset.is_register()) { ++ assert_different_registers(sub_klass, super_klass, ++ super_check_offset.as_register()); ++ } else if (must_load_sco) { ++ assert(temp_reg != noreg, "supply either a temp or a register offset"); ++ } ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ // If the pointers are equal, we are done (e.g., String[] elements). ++ // This self-check enables sharing of secondary supertype arrays among ++ // non-primary types such as array-of-interface. Otherwise, each such ++ // type would need its own customized SSA. ++ // We move this check to the front of the fast path because many ++ // type checks are in fact trivially successful in this manner, ++ // so we get a nicely predicted branch right at the start of the check. ++ beq(sub_klass, super_klass, *L_success); ++ // Check the supertype display: ++ if (must_load_sco) { ++ ld_wu(temp_reg, super_klass, sco_offset); ++ super_check_offset = RegisterOrConstant(temp_reg); ++ } ++ slli_d(AT, super_check_offset.register_or_noreg(), Address::times_1); ++ add_d(AT, sub_klass, AT); ++ ld_d(AT, AT, super_check_offset.constant_or_zero()*Address::times_1); ++ ++ // This check has worked decisively for primary supers. ++ // Secondary supers are sought in the super_cache ('super_cache_addr'). ++ // (Secondary supers are interfaces and very deeply nested subtypes.) ++ // This works in the same check above because of a tricky aliasing ++ // between the super_cache and the primary super display elements. ++ // (The 'super_check_addr' can address either, as the case requires.) ++ // Note that the cache is updated below if it does not help us find ++ // what we need immediately. ++ // So if it was a primary super, we can just fail immediately. ++ // Otherwise, it's the slow path for us (no success at this point). ++ ++ if (super_check_offset.is_register()) { ++ beq(super_klass, AT, *L_success); ++ addi_d(AT, super_check_offset.as_register(), -sc_offset); ++ if (L_failure == &L_fallthrough) { ++ beq(AT, R0, *L_slow_path); ++ } else { ++ bne_far(AT, R0, *L_failure); ++ b(*L_slow_path); ++ } ++ } else if (super_check_offset.as_constant() == sc_offset) { ++ // Need a slow path; fast failure is impossible. ++ if (L_slow_path == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ } else { ++ bne(super_klass, AT, *L_slow_path); ++ b(*L_success); ++ } ++ } else { ++ // No slow path; it's a fast decision. ++ if (L_failure == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ } else { ++ bne_far(super_klass, AT, *L_failure); ++ b(*L_success); ++ } ++ } ++ ++ bind(L_fallthrough); ++} ++ ++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes) { ++ if (temp2_reg == noreg) ++ temp2_reg = TSR; ++ assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); ++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ // a couple of useful fields in sub_klass: ++ int ss_offset = in_bytes(Klass::secondary_supers_offset()); ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ Address secondary_supers_addr(sub_klass, ss_offset); ++ Address super_cache_addr( sub_klass, sc_offset); ++ ++ // Do a linear scan of the secondary super-klass chain. ++ // This code is rarely used, so simplicity is a virtue here. ++ // The repne_scan instruction uses fixed registers, which we must spill. ++ // Don't worry too much about pre-existing connections with the input regs. ++ ++#ifndef PRODUCT ++ int* pst_counter = &SharedRuntime::_partial_subtype_ctr; ++ ExternalAddress pst_counter_addr((address) pst_counter); ++#endif //PRODUCT ++ ++ // We will consult the secondary-super array. ++ ld_d(temp_reg, secondary_supers_addr); ++ // Load the array length. ++ ld_w(temp2_reg, Address(temp_reg, Array::length_offset_in_bytes())); ++ // Skip to start of data. ++ addi_d(temp_reg, temp_reg, Array::base_offset_in_bytes()); ++ ++ Label Loop, subtype; ++ bind(Loop); ++ beq(temp2_reg, R0, *L_failure); ++ ld_d(AT, temp_reg, 0); ++ addi_d(temp_reg, temp_reg, 1 * wordSize); ++ beq(AT, super_klass, subtype); ++ addi_d(temp2_reg, temp2_reg, -1); ++ b(Loop); ++ ++ bind(subtype); ++ st_d(super_klass, super_cache_addr); ++ if (L_success != &L_fallthrough) { ++ b(*L_success); ++ } ++ ++ // Success. Cache the super we found and proceed in triumph. ++#undef IS_A_TEMP ++ ++ bind(L_fallthrough); ++} ++ ++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { ++ ld_d(oop_result, Address(java_thread, JavaThread::vm_result_offset())); ++ st_d(R0, Address(java_thread, JavaThread::vm_result_offset())); ++ verify_oop(oop_result, "broken oop in call_VM_base"); ++} ++ ++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { ++ ld_d(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); ++ st_d(R0, Address(java_thread, JavaThread::vm_result_2_offset())); ++} ++ ++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, ++ int extra_slot_offset) { ++ // cf. TemplateTable::prepare_invoke(), if (load_receiver). ++ int stackElementSize = Interpreter::stackElementSize; ++ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); ++#ifdef ASSERT ++ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); ++ assert(offset1 - offset == stackElementSize, "correct arithmetic"); ++#endif ++ Register scale_reg = NOREG; ++ Address::ScaleFactor scale_factor = Address::no_scale; ++ if (arg_slot.is_constant()) { ++ offset += arg_slot.as_constant() * stackElementSize; ++ } else { ++ scale_reg = arg_slot.as_register(); ++ scale_factor = Address::times_8; ++ } ++ // We don't push RA on stack in prepare_invoke. ++ // offset += wordSize; // return PC is on stack ++ if(scale_reg==NOREG) return Address(SP, offset); ++ else { ++ alsl_d(scale_reg, scale_reg, SP, scale_factor - 1); ++ return Address(scale_reg, offset); ++ } ++} ++ ++SkipIfEqual::~SkipIfEqual() { ++ _masm->bind(_label); ++} ++ ++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { ++ switch (size_in_bytes) { ++ case 8: ld_d(dst, src); break; ++ case 4: ld_w(dst, src); break; ++ case 2: is_signed ? ld_h(dst, src) : ld_hu(dst, src); break; ++ case 1: is_signed ? ld_b( dst, src) : ld_bu( dst, src); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { ++ switch (size_in_bytes) { ++ case 8: st_d(src, dst); break; ++ case 4: st_w(src, dst); break; ++ case 2: st_h(src, dst); break; ++ case 1: st_b(src, dst); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++// Look up the method for a megamorphic invokeinterface call. ++// The target method is determined by . ++// The receiver klass is in recv_klass. ++// On success, the result will be in method_result, and execution falls through. ++// On failure, execution transfers to the given label. ++void MacroAssembler::lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& L_no_such_interface, ++ bool return_method) { ++ assert_different_registers(recv_klass, intf_klass, scan_temp, AT); ++ assert_different_registers(method_result, intf_klass, scan_temp, AT); ++ assert(recv_klass != method_result || !return_method, ++ "recv_klass can be destroyed when method isn't needed"); ++ ++ assert(itable_index.is_constant() || itable_index.as_register() == method_result, ++ "caller must use same register for non-constant itable index as for method"); ++ ++ // Compute start of first itableOffsetEntry (which is at the end of the vtable) ++ int vtable_base = in_bytes(Klass::vtable_start_offset()); ++ int itentry_off = itableMethodEntry::method_offset_in_bytes(); ++ int scan_step = itableOffsetEntry::size() * wordSize; ++ int vte_size = vtableEntry::size() * wordSize; ++ Address::ScaleFactor times_vte_scale = Address::times_ptr; ++ assert(vte_size == wordSize, "else adjust times_vte_scale"); ++ ++ ld_w(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); ++ ++ // %%% Could store the aligned, prescaled offset in the klassoop. ++ alsl_d(scan_temp, scan_temp, recv_klass, times_vte_scale - 1); ++ addi_d(scan_temp, scan_temp, vtable_base); ++ ++ if (return_method) { ++ // Adjust recv_klass by scaled itable_index, so we can free itable_index. ++ if (itable_index.is_constant()) { ++ li(AT, (itable_index.as_constant() * itableMethodEntry::size() * wordSize) + itentry_off); ++ add_d(recv_klass, recv_klass, AT); ++ } else { ++ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ alsl_d(AT, itable_index.as_register(), recv_klass, (int)Address::times_ptr - 1); ++ addi_d(recv_klass, AT, itentry_off); ++ } ++ } ++ ++ Label search, found_method; ++ ++ ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); ++ beq(intf_klass, method_result, found_method); ++ ++ bind(search); ++ // Check that the previous entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ beqz(method_result, L_no_such_interface); ++ addi_d(scan_temp, scan_temp, scan_step); ++ ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); ++ bne(intf_klass, method_result, search); ++ ++ bind(found_method); ++ if (return_method) { ++ // Got a hit. ++ ld_wu(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); ++ ldx_d(method_result, recv_klass, scan_temp); ++ } ++} ++ ++// virtual method calling ++void MacroAssembler::lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result) { ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); ++ ++ if (vtable_index.is_constant()) { ++ li(AT, vtable_index.as_constant()); ++ alsl_d(AT, AT, recv_klass, Address::times_ptr - 1); ++ } else { ++ alsl_d(AT, vtable_index.as_register(), recv_klass, Address::times_ptr - 1); ++ } ++ ++ ld_d(method_result, AT, base + vtableEntry::method_offset_in_bytes()); ++} ++ ++#ifdef COMPILER2 ++// Compare strings, used for char[] and byte[]. ++void MacroAssembler::string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ int ae) { ++ Label L, Loop, haveResult, done; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ bool isLU = ae == StrIntrinsicNode::LU; ++ bool isUL = ae == StrIntrinsicNode::UL; ++ ++ bool str1_isL = isLL || isLU; ++ bool str2_isL = isLL || isUL; ++ ++ if (!str1_isL) srli_w(cnt1, cnt1, 1); ++ if (!str2_isL) srli_w(cnt2, cnt2, 1); ++ ++ // compute the and difference of lengths (in result) ++ sub_d(result, cnt1, cnt2); // result holds the difference of two lengths ++ ++ // compute the shorter length (in cnt1) ++ bge(cnt2, cnt1, Loop); ++ move(cnt1, cnt2); ++ ++ // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register ++ bind(Loop); // Loop begin ++ if (str1_isL) { ++ ld_bu(AT, str1, 0); ++ } else { ++ ld_hu(AT, str1, 0); ++ } ++ beq(cnt1, R0, done); ++ ++ // compare current character ++ if (str2_isL) { ++ ld_bu(cnt2, str2, 0); ++ } else { ++ ld_hu(cnt2, str2, 0); ++ } ++ addi_d(str1, str1, str1_isL ? 1 : 2); ++ bne(AT, cnt2, haveResult); ++ addi_d(str2, str2, str2_isL ? 1 : 2); ++ addi_d(cnt1, cnt1, -1); ++ b(Loop); ++ ++ bind(haveResult); ++ sub_d(result, AT, cnt2); ++ ++ bind(done); ++} ++ ++// Compare char[] or byte[] arrays or substrings. ++void MacroAssembler::arrays_equals(Register str1, Register str2, ++ Register cnt, Register tmp1, Register tmp2, Register result, ++ bool is_char) { ++ Label Loop, LoopEnd, True, False; ++ ++ addi_d(result, R0, 1); ++ beq(str1, str2, True); // same char[] ? ++ beqz(cnt, True); ++ ++ addi_d(AT, R0, is_char ? wordSize/2 : wordSize); ++ bind(Loop); ++ blt(cnt, AT, LoopEnd); ++ ld_d(tmp1, str1, 0); ++ ld_d(tmp2, str2, 0); ++ bne(tmp1, tmp2, False); ++ addi_d(str1, str1, 8); ++ addi_d(str2, str2, 8); ++ addi_d(cnt, cnt, is_char ? -wordSize/2 : -wordSize); ++ b(Loop); ++ ++ bind(LoopEnd); ++ beqz(cnt, True); ++ // compare current character ++ if (is_char) { ++ ld_hu(tmp1, str1, 0); ++ ld_hu(tmp2, str2, 0); ++ } else { ++ ld_bu(tmp1, str1, 0); ++ ld_bu(tmp2, str2, 0); ++ } ++ bne(tmp1, tmp2, False); ++ addi_d(str1, str1, is_char ? 2 : 1); ++ addi_d(str2, str2, is_char ? 2 : 1); ++ addi_d(cnt, cnt, -1); ++ b(LoopEnd); ++ ++ bind(False); ++ addi_d(result, R0, 0); ++ ++ bind(True); ++} ++#endif // COMPILER2 ++ ++void MacroAssembler::load_byte_map_base(Register reg) { ++ jbyte *byte_map_base = ++ ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); ++ ++ // Strictly speaking the byte_map_base isn't an address at all, and it might ++ // even be negative. It is thus materialised as a constant. ++ li(reg, (uint64_t)byte_map_base); ++} ++ ++// This method checks if provided byte array contains byte with highest bit set. ++void MacroAssembler::has_negatives(Register ary1, Register len, Register result) { ++ Label Loop, End, Nega, Done; ++ ++ orr(result, R0, R0); ++ bge(R0, len, Done); ++ ++ li(AT, 0x8080808080808080); ++ ++ addi_d(len, len, -8); ++ blt(len, R0, End); ++ ++ bind(Loop); ++ ld_d(result, ary1, 0); ++ andr(result, result, AT); ++ bnez(result, Nega); ++ beqz(len, Done); ++ addi_d(len, len, -8); ++ addi_d(ary1, ary1, 8); ++ bge(len, R0, Loop); ++ ++ bind(End); ++ ld_d(result, ary1, 0); ++ slli_d(len, len, 3); ++ sub_d(len, R0, len); ++ sll_d(result, result, len); ++ andr(result, result, AT); ++ beqz(result, Done); ++ ++ bind(Nega); ++ ori(result, R0, 1); ++ ++ bind(Done); ++} ++ ++// Compress char[] to byte[]. len must be positive int. ++// jtreg: TestStringIntrinsicRangeChecks.java ++void MacroAssembler::char_array_compress(Register src, Register dst, ++ Register len, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3) { ++ Label Loop, Done, Once, Fail; ++ ++ move(result, len); ++ bge(R0, result, Done); ++ ++ srli_w(AT, len, 2); ++ andi(len, len, 3); ++ ++ li(tmp3, 0xff00ff00ff00ff00); ++ ++ bind(Loop); ++ beqz(AT, Once); ++ ld_d(tmp1, src, 0); ++ andr(tmp2, tmp3, tmp1); // not latin-1, stop here ++ bnez(tmp2, Fail); ++ ++ // 0x00a100b200c300d4 -> 0x00000000a1b2c3d4 ++ srli_d(tmp2, tmp1, 8); ++ orr(tmp2, tmp2, tmp1); // 0x00a1a1b2b2c3c3d4 ++ bstrpick_d(tmp1, tmp2, 47, 32); // 0x0000a1b2 ++ slli_d(tmp1, tmp1, 16); // 0xa1b20000 ++ bstrins_d(tmp1, tmp2, 15, 0); // 0xa1b2c3d4 ++ ++ st_w(tmp1, dst, 0); ++ addi_w(AT, AT, -1); ++ addi_d(dst, dst, 4); ++ addi_d(src, src, 8); ++ b(Loop); ++ ++ bind(Once); ++ beqz(len, Done); ++ ld_d(AT, src, 0); ++ ++ bstrpick_d(tmp1, AT, 15, 0); ++ andr(tmp2, tmp3, tmp1); ++ bnez(tmp2, Fail); ++ st_b(tmp1, dst, 0); ++ addi_w(len, len, -1); ++ ++ beqz(len, Done); ++ bstrpick_d(tmp1, AT, 31, 16); ++ andr(tmp2, tmp3, tmp1); ++ bnez(tmp2, Fail); ++ st_b(tmp1, dst, 1); ++ addi_w(len, len, -1); ++ ++ beqz(len, Done); ++ bstrpick_d(tmp1, AT, 47, 32); ++ andr(tmp2, tmp3, tmp1); ++ bnez(tmp2, Fail); ++ st_b(tmp1, dst, 2); ++ b(Done); ++ ++ bind(Fail); ++ move(result, R0); ++ ++ bind(Done); ++} ++ ++// Inflate byte[] to char[]. len must be positive int. ++// jtreg:test/jdk/sun/nio/cs/FindDecoderBugs.java ++void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, ++ Register tmp1, Register tmp2) { ++ Label Loop, Once, Done; ++ ++ bge(R0, len, Done); ++ ++ srli_w(AT, len, 2); ++ andi(len, len, 3); ++ ++ bind(Loop); ++ beqz(AT, Once); ++ ld_wu(tmp1, src, 0); ++ ++ // 0x00000000a1b2c3d4 -> 0x00a100b200c300d4 ++ bstrpick_d(tmp2, tmp1, 7, 0); ++ srli_d(tmp1, tmp1, 8); ++ bstrins_d(tmp2, tmp1, 23, 16); ++ srli_d(tmp1, tmp1, 8); ++ bstrins_d(tmp2, tmp1, 39, 32); ++ srli_d(tmp1, tmp1, 8); ++ bstrins_d(tmp2, tmp1, 55, 48); ++ ++ st_d(tmp2, dst, 0); ++ addi_w(AT, AT, -1); ++ addi_d(dst, dst, 8); ++ addi_d(src, src, 4); ++ b(Loop); ++ ++ bind(Once); ++ beqz(len, Done); ++ ld_wu(tmp1, src, 0); ++ ++ bstrpick_d(tmp2, tmp1, 7, 0); ++ st_h(tmp2, dst, 0); ++ addi_w(len, len, -1); ++ ++ beqz(len, Done); ++ bstrpick_d(tmp2, tmp1, 15, 8); ++ st_h(tmp2, dst, 2); ++ addi_w(len, len, -1); ++ ++ beqz(len, Done); ++ bstrpick_d(tmp2, tmp1, 23, 16); ++ st_h(tmp2, dst, 4); ++ ++ bind(Done); ++} ++ ++void MacroAssembler::string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3) ++{ ++ Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, NOMATCH, DONE; ++ ++ beqz(cnt1, NOMATCH); ++ ++ move(result, R0); ++ ori(tmp1, R0, 4); ++ blt(cnt1, tmp1, DO1_LOOP); ++ ++ // UTF-16 char occupies 16 bits ++ // ch -> chchchch ++ bstrins_d(ch, ch, 31, 16); ++ bstrins_d(ch, ch, 63, 32); ++ ++ li(tmp2, 0x0001000100010001); ++ li(tmp3, 0x7fff7fff7fff7fff); ++ ++ bind(CH1_LOOP); ++ ld_d(AT, str1, 0); ++ xorr(AT, ch, AT); ++ sub_d(tmp1, AT, tmp2); ++ orr(AT, AT, tmp3); ++ andn(tmp1, tmp1, AT); ++ bnez(tmp1, HAS_ZERO); ++ addi_d(str1, str1, 8); ++ addi_d(result, result, 4); ++ ++ // meet the end of string ++ beq(cnt1, result, NOMATCH); ++ ++ addi_d(tmp1, result, 4); ++ bge(tmp1, cnt1, DO1_SHORT); ++ b(CH1_LOOP); ++ ++ bind(HAS_ZERO); ++ ctz_d(tmp1, tmp1); ++ srli_d(tmp1, tmp1, 4); ++ add_d(result, result, tmp1); ++ b(DONE); ++ ++ // restore ch ++ bind(DO1_SHORT); ++ bstrpick_d(ch, ch, 15, 0); ++ ++ bind(DO1_LOOP); ++ ld_hu(tmp1, str1, 0); ++ beq(ch, tmp1, DONE); ++ addi_d(str1, str1, 2); ++ addi_d(result, result, 1); ++ blt(result, cnt1, DO1_LOOP); ++ ++ bind(NOMATCH); ++ addi_d(result, R0, -1); ++ ++ bind(DONE); ++} ++ ++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { ++ const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); ++ STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code ++ // The inverted mask is sign-extended ++ li(AT, inverted_jweak_mask); ++ andr(possibly_jweak, AT, possibly_jweak); ++} ++ ++void MacroAssembler::resolve_jobject(Register value, ++ Register thread, ++ Register tmp) { ++ assert_different_registers(value, thread, tmp); ++ Label done, not_weak; ++ beq(value, R0, done); // Use NULL as-is. ++ li(AT, JNIHandles::weak_tag_mask); // Test for jweak tag. ++ andr(AT, value, AT); ++ beq(AT, R0, not_weak); ++ // Resolve jweak. ++ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, ++ value, Address(value, -JNIHandles::weak_tag_value), tmp, thread); ++ verify_oop(value); ++ b(done); ++ bind(not_weak); ++ // Resolve (untagged) jobject. ++ access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); ++ verify_oop(value); ++ bind(done); ++} ++ ++void MacroAssembler::lea(Register rd, Address src) { ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index == noreg) { ++ if (is_simm(disp, 12)) { ++ addi_d(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ add_d(dst, base, AT); ++ } ++ } else { ++ if (scale == 0) { ++ if (is_simm(disp, 12)) { ++ add_d(AT, base, index); ++ addi_d(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ add_d(AT, base, AT); ++ add_d(dst, AT, index); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ alsl_d(AT, index, base, scale - 1); ++ addi_d(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ add_d(AT, AT, base); ++ alsl_d(dst, index, AT, scale - 1); ++ } ++ } ++ } ++} ++ ++void MacroAssembler::lea(Register dst, AddressLiteral adr) { ++ code_section()->relocate(pc(), adr.rspec()); ++ pcaddi(dst, (adr.target() - pc()) >> 2); ++} ++ ++int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) { ++ int v = (dest_pos - inst_pos) >> 2; ++ switch(high(inst, 6)) { ++ case beq_op: ++ case bne_op: ++ case blt_op: ++ case bge_op: ++ case bltu_op: ++ case bgeu_op: ++ assert(is_simm16(v), "must be simm16"); ++#ifndef PRODUCT ++ if(!is_simm16(v)) ++ { ++ tty->print_cr("must be simm16"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ inst &= 0xfc0003ff; ++ inst |= ((v & 0xffff) << 10); ++ break; ++ case beqz_op: ++ case bnez_op: ++ case bccondz_op: ++ assert(is_simm(v, 21), "must be simm21"); ++#ifndef PRODUCT ++ if(!is_simm(v, 21)) ++ { ++ tty->print_cr("must be simm21"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ inst &= 0xfc0003e0; ++ inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x1f) ); ++ break; ++ case b_op: ++ case bl_op: ++ assert(is_simm(v, 26), "must be simm26"); ++#ifndef PRODUCT ++ if(!is_simm(v, 26)) ++ { ++ tty->print_cr("must be simm26"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ inst &= 0xfc000000; ++ inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x3ff) ); ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ return inst; ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src1, ++ Register src2, ++ CMCompare cmp, ++ bool is_signed) { ++ switch (cmp) { ++ case EQ: ++ sub_d(AT, op1, op2); ++ if (dst == src2) { ++ masknez(dst, src2, AT); ++ maskeqz(AT, src1, AT); ++ } else { ++ maskeqz(dst, src1, AT); ++ masknez(AT, src2, AT); ++ } ++ break; ++ ++ case NE: ++ sub_d(AT, op1, op2); ++ if (dst == src2) { ++ maskeqz(dst, src2, AT); ++ masknez(AT, src1, AT); ++ } else { ++ masknez(dst, src1, AT); ++ maskeqz(AT, src2, AT); ++ } ++ break; ++ ++ case GT: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ if(dst == src2) { ++ maskeqz(dst, src2, AT); ++ masknez(AT, src1, AT); ++ } else { ++ masknez(dst, src1, AT); ++ maskeqz(AT, src2, AT); ++ } ++ break; ++ case GE: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ if(dst == src2) { ++ masknez(dst, src2, AT); ++ maskeqz(AT, src1, AT); ++ } else { ++ maskeqz(dst, src1, AT); ++ masknez(AT, src2, AT); ++ } ++ break; ++ ++ case LT: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ if(dst == src2) { ++ maskeqz(dst, src2, AT); ++ masknez(AT, src1, AT); ++ } else { ++ masknez(dst, src1, AT); ++ maskeqz(AT, src2, AT); ++ } ++ break; ++ case LE: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ if(dst == src2) { ++ masknez(dst, src2, AT); ++ maskeqz(AT, src1, AT); ++ } else { ++ maskeqz(dst, src1, AT); ++ masknez(AT, src2, AT); ++ } ++ break; ++ default: ++ Unimplemented(); ++ } ++ OR(dst, dst, AT); ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp, ++ bool is_signed) { ++ switch (cmp) { ++ case EQ: ++ sub_d(AT, op1, op2); ++ maskeqz(dst, dst, AT); ++ masknez(AT, src, AT); ++ break; ++ ++ case NE: ++ sub_d(AT, op1, op2); ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ break; ++ ++ case GT: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ break; ++ ++ case GE: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ maskeqz(dst, dst, AT); ++ masknez(AT, src, AT); ++ break; ++ ++ case LT: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ break; ++ ++ case LE: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ maskeqz(dst, dst, AT); ++ masknez(AT, src, AT); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ OR(dst, dst, AT); ++} ++ ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp, ++ bool is_float) { ++ movgr2fr_d(tmp1, dst); ++ movgr2fr_d(tmp2, src); ++ ++ switch(cmp) { ++ case EQ: ++ if (is_float) { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp1, tmp2, FCC0); ++ break; ++ ++ case NE: ++ if (is_float) { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp2, tmp1, FCC0); ++ break; ++ ++ case GT: ++ if (is_float) { ++ fcmp_cule_s(FCC0, op1, op2); ++ } else { ++ fcmp_cule_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp2, tmp1, FCC0); ++ break; ++ ++ case GE: ++ if (is_float) { ++ fcmp_cult_s(FCC0, op1, op2); ++ } else { ++ fcmp_cult_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp2, tmp1, FCC0); ++ break; ++ ++ case LT: ++ if (is_float) { ++ fcmp_cult_s(FCC0, op1, op2); ++ } else { ++ fcmp_cult_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp1, tmp2, FCC0); ++ break; ++ ++ case LE: ++ if (is_float) { ++ fcmp_cule_s(FCC0, op1, op2); ++ } else { ++ fcmp_cule_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp1, tmp2, FCC0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ ++ movfr2gr_d(dst, tmp1); ++} ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp, ++ bool is_float) { ++ switch(cmp) { ++ case EQ: ++ if (!is_float) { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case NE: ++ if (!is_float) { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GT: ++ if (!is_float) { ++ fcmp_cule_d(FCC0, op1, op2); ++ } else { ++ fcmp_cule_s(FCC0, op1, op2); ++ } ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GE: ++ if (!is_float) { ++ fcmp_cult_d(FCC0, op1, op2); ++ } else { ++ fcmp_cult_s(FCC0, op1, op2); ++ } ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case LT: ++ if (!is_float) { ++ fcmp_cult_d(FCC0, op1, op2); ++ } else { ++ fcmp_cult_s(FCC0, op1, op2); ++ } ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case LE: ++ if (!is_float) { ++ fcmp_cule_d(FCC0, op1, op2); ++ } else { ++ fcmp_cule_s(FCC0, op1, op2); ++ } ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp) { ++ movgr2fr_w(tmp1, R0); ++ ++ switch (cmp) { ++ case EQ: ++ sub_d(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case NE: ++ sub_d(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GT: ++ slt(AT, op2, op1); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GE: ++ slt(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case LT: ++ slt(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case LE: ++ slt(AT, op2, op1); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_BYTE: st_b (reg, base, disp); break; ++ case STORE_CHAR: ++ case STORE_SHORT: st_h (reg, base, disp); break; ++ case STORE_INT: st_w (reg, base, disp); break; ++ case STORE_LONG: st_d (reg, base, disp); break; ++ case LOAD_BYTE: ld_b (reg, base, disp); break; ++ case LOAD_U_BYTE: ld_bu(reg, base, disp); break; ++ case LOAD_SHORT: ld_h (reg, base, disp); break; ++ case LOAD_U_SHORT: ld_hu(reg, base, disp); break; ++ case LOAD_INT: ld_w (reg, base, disp); break; ++ case LOAD_U_INT: ld_wu(reg, base, disp); break; ++ case LOAD_LONG: ld_d (reg, base, disp); break; ++ case LOAD_LINKED_LONG: ++ ll_d(reg, base, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::loadstore(Register reg, Register base, Register disp, int type) { ++ switch (type) { ++ case STORE_BYTE: stx_b (reg, base, disp); break; ++ case STORE_CHAR: ++ case STORE_SHORT: stx_h (reg, base, disp); break; ++ case STORE_INT: stx_w (reg, base, disp); break; ++ case STORE_LONG: stx_d (reg, base, disp); break; ++ case LOAD_BYTE: ldx_b (reg, base, disp); break; ++ case LOAD_U_BYTE: ldx_bu(reg, base, disp); break; ++ case LOAD_SHORT: ldx_h (reg, base, disp); break; ++ case LOAD_U_SHORT: ldx_hu(reg, base, disp); break; ++ case LOAD_INT: ldx_w (reg, base, disp); break; ++ case LOAD_U_INT: ldx_wu(reg, base, disp); break; ++ case LOAD_LONG: ldx_d (reg, base, disp); break; ++ case LOAD_LINKED_LONG: ++ add_d(AT, base, disp); ++ ll_d(reg, AT, 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: fst_s(reg, base, disp); break; ++ case STORE_DOUBLE: fst_d(reg, base, disp); break; ++ case STORE_VECTORX: vst (reg, base, disp); break; ++ case STORE_VECTORY: xvst (reg, base, disp); break; ++ case LOAD_FLOAT: fld_s(reg, base, disp); break; ++ case LOAD_DOUBLE: fld_d(reg, base, disp); break; ++ case LOAD_VECTORX: vld (reg, base, disp); break; ++ case LOAD_VECTORY: xvld (reg, base, disp); break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::loadstore(FloatRegister reg, Register base, Register disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: fstx_s(reg, base, disp); break; ++ case STORE_DOUBLE: fstx_d(reg, base, disp); break; ++ case STORE_VECTORX: vstx (reg, base, disp); break; ++ case STORE_VECTORY: xvstx (reg, base, disp); break; ++ case LOAD_FLOAT: fldx_s(reg, base, disp); break; ++ case LOAD_DOUBLE: fldx_d(reg, base, disp); break; ++ case LOAD_VECTORX: vldx (reg, base, disp); break; ++ case LOAD_VECTORY: xvldx (reg, base, disp); break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++#ifdef COMPILER2 ++void MacroAssembler::reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode) { ++ switch (type) { ++ case T_BYTE: ++ switch (opcode) { ++ case Op_AddReductionVI: vadd_b(vec1, vec2, vec3); break; ++ case Op_MulReductionVI: vmul_b(vec1, vec2, vec3); break; ++ case Op_MaxReductionV: vmax_b(vec1, vec2, vec3); break; ++ case Op_MinReductionV: vmin_b(vec1, vec2, vec3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ case T_SHORT: ++ switch (opcode) { ++ case Op_AddReductionVI: vadd_h(vec1, vec2, vec3); break; ++ case Op_MulReductionVI: vmul_h(vec1, vec2, vec3); break; ++ case Op_MaxReductionV: vmax_h(vec1, vec2, vec3); break; ++ case Op_MinReductionV: vmin_h(vec1, vec2, vec3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ case T_INT: ++ switch (opcode) { ++ case Op_AddReductionVI: vadd_w(vec1, vec2, vec3); break; ++ case Op_MulReductionVI: vmul_w(vec1, vec2, vec3); break; ++ case Op_MaxReductionV: vmax_w(vec1, vec2, vec3); break; ++ case Op_MinReductionV: vmin_w(vec1, vec2, vec3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ case T_LONG: ++ switch (opcode) { ++ case Op_AddReductionVL: vadd_d(vec1, vec2, vec3); break; ++ case Op_MulReductionVL: vmul_d(vec1, vec2, vec3); break; ++ case Op_MaxReductionV: vmax_d(vec1, vec2, vec3); break; ++ case Op_MinReductionV: vmin_d(vec1, vec2, vec3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode) { ++ switch (type) { ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ switch (opcode) { ++ case Op_AddReductionVI: add_w(reg1, reg2, reg3); break; ++ case Op_MulReductionVI: mul_w(reg1, reg2, reg3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ case T_LONG: ++ switch (opcode) { ++ case Op_AddReductionVL: add_d(reg1, reg2, reg3); break; ++ case Op_MulReductionVL: mul_d(reg1, reg2, reg3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode) { ++ switch (type) { ++ case T_FLOAT: ++ switch (opcode) { ++ case Op_AddReductionVF: fadd_s(reg1, reg2, reg3); break; ++ case Op_MulReductionVF: fmul_s(reg1, reg2, reg3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ case T_DOUBLE: ++ switch (opcode) { ++ case Op_AddReductionVD: fadd_d(reg1, reg2, reg3); break; ++ case Op_MulReductionVD: fmul_d(reg1, reg2, reg3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size) { ++ if (vector_size == 32) { ++ xvpermi_d(tmp1, vsrc, 0b00001110); ++ reduce_ins_v(tmp1, vsrc, tmp1, type, opcode); ++ vpermi_w(tmp2, tmp1, 0b00001110); ++ reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); ++ } else if (vector_size == 16) { ++ vpermi_w(tmp1, vsrc, 0b00001110); ++ reduce_ins_v(tmp1, vsrc, tmp1, type, opcode); ++ } else { ++ ShouldNotReachHere(); ++ } ++ ++ if (type != T_LONG) { ++ vshuf4i_w(tmp2, tmp1, 0b00000001); ++ reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); ++ if (type != T_INT) { ++ vshuf4i_h(tmp2, tmp1, 0b00000001); ++ reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); ++ if (type != T_SHORT) { ++ vshuf4i_b(tmp2, tmp1, 0b00000001); ++ reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); ++ } ++ } ++ } ++ ++ switch (type) { ++ case T_BYTE: vpickve2gr_b(dst, tmp1, 0); break; ++ case T_SHORT: vpickve2gr_h(dst, tmp1, 0); break; ++ case T_INT: vpickve2gr_w(dst, tmp1, 0); break; ++ case T_LONG: vpickve2gr_d(dst, tmp1, 0); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ if (opcode == Op_MaxReductionV) { ++ slt(AT, dst, src); ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ orr(dst, dst, AT); ++ } else if (opcode == Op_MinReductionV) { ++ slt(AT, src, dst); ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ orr(dst, dst, AT); ++ } else { ++ reduce_ins_r(dst, dst, src, type, opcode); ++ } ++ switch (type) { ++ case T_BYTE: ext_w_b(dst, dst); break; ++ case T_SHORT: ext_w_h(dst, dst); break; ++ default: ++ break; ++ } ++} ++ ++void MacroAssembler::reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size) { ++ if (vector_size == 32) { ++ switch (type) { ++ case T_FLOAT: ++ reduce_ins_f(dst, vsrc, src, type, opcode); ++ xvpickve_w(tmp, vsrc, 1); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 2); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 3); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 4); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 5); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 6); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 7); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ break; ++ case T_DOUBLE: ++ reduce_ins_f(dst, vsrc, src, type, opcode); ++ xvpickve_d(tmp, vsrc, 1); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_d(tmp, vsrc, 2); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_d(tmp, vsrc, 3); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (vector_size == 16) { ++ switch (type) { ++ case T_FLOAT: ++ reduce_ins_f(dst, vsrc, src, type, opcode); ++ vpermi_w(tmp, vsrc, 0b00000001); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ vpermi_w(tmp, vsrc, 0b00000010); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ vpermi_w(tmp, vsrc, 0b00000011); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ break; ++ case T_DOUBLE: ++ reduce_ins_f(dst, vsrc, src, type, opcode); ++ vpermi_w(tmp, vsrc, 0b00001110); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++#endif // COMPILER2 ++ ++/** ++ * Emits code to update CRC-32 with a byte value according to constants in table ++ * ++ * @param [in,out]crc Register containing the crc. ++ * @param [in]val Register containing the byte to fold into the CRC. ++ * @param [in]table Register containing the table of crc constants. ++ * ++ * uint32_t crc; ++ * val = crc_table[(val ^ crc) & 0xFF]; ++ * crc = val ^ (crc >> 8); ++**/ ++void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { ++ xorr(val, val, crc); ++ andi(val, val, 0xff); ++ ld_w(val, Address(table, val, Address::times_4, 0)); ++ srli_w(crc, crc, 8); ++ xorr(crc, val, crc); ++} ++ ++/** ++ * @param crc register containing existing CRC (32-bit) ++ * @param buf register pointing to input byte buffer (byte*) ++ * @param len register containing number of bytes ++ * @param tmp scratch register ++**/ ++void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register tmp) { ++ Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit; ++ assert_different_registers(crc, buf, len, tmp); ++ ++ nor(crc, crc, R0); ++ ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by64_loop); ++ ld_d(tmp, buf, 0); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 8); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 16); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 24); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 32); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 40); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 48); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 56); ++ crc_w_d_w(crc, tmp, crc); ++ addi_d(buf, buf, 64); ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by4_loop); ++ ld_w(tmp, buf, 0); ++ crc_w_w_w(crc, tmp, crc); ++ addi_d(buf, buf, 4); ++ addi_d(len, len, -4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ bge(R0, len, L_exit); ++ ++ bind(CRC_by1_loop); ++ ld_b(tmp, buf, 0); ++ crc_w_b_w(crc, tmp, crc); ++ addi_d(buf, buf, 1); ++ addi_d(len, len, -1); ++ blt(R0, len, CRC_by1_loop); ++ ++ bind(L_exit); ++ nor(crc, crc, R0); ++} ++ ++/** ++ * @param crc register containing existing CRC (32-bit) ++ * @param buf register pointing to input byte buffer (byte*) ++ * @param len register containing number of bytes ++ * @param tmp scratch register ++**/ ++void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register tmp) { ++ Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit; ++ assert_different_registers(crc, buf, len, tmp); ++ ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by64_loop); ++ ld_d(tmp, buf, 0); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 8); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 16); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 24); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 32); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 40); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 48); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 56); ++ crcc_w_d_w(crc, tmp, crc); ++ addi_d(buf, buf, 64); ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by4_loop); ++ ld_w(tmp, buf, 0); ++ crcc_w_w_w(crc, tmp, crc); ++ addi_d(buf, buf, 4); ++ addi_d(len, len, -4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ bge(R0, len, L_exit); ++ ++ bind(CRC_by1_loop); ++ ld_b(tmp, buf, 0); ++ crcc_w_b_w(crc, tmp, crc); ++ addi_d(buf, buf, 1); ++ addi_d(len, len, -1); ++ blt(R0, len, CRC_by1_loop); ++ ++ bind(L_exit); ++} ++ ++#ifdef COMPILER2 ++void MacroAssembler::cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed) { ++ ++ switch(flag) { ++ case 0x01: //equal ++ beq(op1, op2, L); ++ break; ++ case 0x02: //not_equal ++ bne(op1, op2, L); ++ break; ++ case 0x03: //above ++ if (is_signed) ++ blt(op2, op1, L); ++ else ++ bltu(op2, op1, L); ++ break; ++ case 0x04: //above_equal ++ if (is_signed) ++ bge(op1, op2, L); ++ else ++ bgeu(op1, op2, L); ++ break; ++ case 0x05: //below ++ if (is_signed) ++ blt(op1, op2, L); ++ else ++ bltu(op1, op2, L); ++ break; ++ case 0x06: //below_equal ++ if (is_signed) ++ bge(op2, op1, L); ++ else ++ bgeu(op2, op1, L); ++ break; ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_branch_long(int flag, Register op1, Register op2, Label* L, bool is_signed) { ++ switch(flag) { ++ case 0x01: //equal ++ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ if (is_signed) ++ blt_long(op2, op1, *L, true /* signed */); ++ else ++ blt_long(op2, op1, *L, false); ++ break; ++ case 0x04: //above_equal ++ if (is_signed) ++ bge_long(op1, op2, *L, true /* signed */); ++ else ++ bge_long(op1, op2, *L, false); ++ break; ++ case 0x05: //below ++ if (is_signed) ++ blt_long(op1, op2, *L, true /* signed */); ++ else ++ blt_long(op1, op2, *L, false); ++ break; ++ case 0x06: //below_equal ++ if (is_signed) ++ bge_long(op2, op1, *L, true /* signed */); ++ else ++ bge_long(op2, op1, *L, false); ++ break; ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_branchEqNe_off21(int flag, Register op1, Label& L) { ++ switch(flag) { ++ case 0x01: //equal ++ beqz(op1, L); ++ break; ++ case 0x02: //not_equal ++ bnez(op1, L); ++ break; ++ default: ++ Unimplemented(); ++ } ++} ++#endif // COMPILER2 ++ ++void MacroAssembler::membar(Membar_mask_bits hint){ ++ address prev = pc() - NativeInstruction::sync_instruction_size; ++ address last = code()->last_insn(); ++ if (last != NULL && ((NativeInstruction*)last)->is_sync() && prev == last) { ++ code()->set_last_insn(NULL); ++ NativeMembar *membar = (NativeMembar*)prev; ++ // merged membar ++ // e.g. LoadLoad and LoadLoad|LoadStore to LoadLoad|LoadStore ++ membar->set_hint(membar->get_hint() & (~hint & 0xF)); ++ block_comment("merged membar"); ++ } else { ++ code()->set_last_insn(pc()); ++ Assembler::membar(hint); ++ } ++} ++ ++// Code for BigInteger::mulAdd intrinsic ++// out = A0 ++// in = A1 ++// offset = A2 (already out.length-offset) ++// len = A3 ++// k = A4 ++// ++// pseudo code from java implementation: ++// long kLong = k & LONG_MASK; ++// carry = 0; ++// offset = out.length-offset - 1; ++// for (int j = len - 1; j >= 0; j--) { ++// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; ++// out[offset--] = (int)product; ++// carry = product >>> 32; ++// } ++// return (int)carry; ++void MacroAssembler::mul_add(Register out, Register in, Register offset, ++ Register len, Register k) { ++ Label L_tail_loop, L_unroll, L_end; ++ ++ move(SCR2, out); ++ move(out, R0); // should clear out ++ bge(R0, len, L_end); ++ ++ alsl_d(offset, offset, SCR2, LogBytesPerInt - 1); ++ alsl_d(in, len, in, LogBytesPerInt - 1); ++ ++ const int unroll = 16; ++ li(SCR2, unroll); ++ blt(len, SCR2, L_tail_loop); ++ ++ bind(L_unroll); ++ ++ addi_d(in, in, -unroll * BytesPerInt); ++ addi_d(offset, offset, -unroll * BytesPerInt); ++ ++ for (int i = unroll - 1; i >= 0; i--) { ++ ld_wu(SCR1, in, i * BytesPerInt); ++ mulw_d_wu(SCR1, SCR1, k); ++ add_d(out, out, SCR1); // out as scratch ++ ld_wu(SCR1, offset, i * BytesPerInt); ++ add_d(SCR1, SCR1, out); ++ st_w(SCR1, offset, i * BytesPerInt); ++ srli_d(out, SCR1, 32); // keep carry ++ } ++ ++ sub_w(len, len, SCR2); ++ bge(len, SCR2, L_unroll); ++ ++ bge(R0, len, L_end); // check tail ++ ++ bind(L_tail_loop); ++ ++ addi_d(in, in, -BytesPerInt); ++ ld_wu(SCR1, in, 0); ++ mulw_d_wu(SCR1, SCR1, k); ++ add_d(out, out, SCR1); // out as scratch ++ ++ addi_d(offset, offset, -BytesPerInt); ++ ld_wu(SCR1, offset, 0); ++ add_d(SCR1, SCR1, out); ++ st_w(SCR1, offset, 0); ++ ++ srli_d(out, SCR1, 32); // keep carry ++ ++ addi_w(len, len, -1); ++ blt(R0, len, L_tail_loop); ++ ++ bind(L_end); ++} ++ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,825 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/assembler.hpp" ++#include "runtime/rtmLocking.hpp" ++#include "utilities/macros.hpp" ++ ++// MacroAssembler extends Assembler by frequently used macros. ++// ++// Instructions for which a 'better' code sequence exists depending ++// on arguments should also go in here. ++ ++class MacroAssembler: public Assembler { ++ friend class LIR_Assembler; ++ friend class Runtime1; // as_Address() ++ ++ public: ++ // Compare code ++ typedef enum { ++ EQ = 0x01, ++ NE = 0x02, ++ GT = 0x03, ++ GE = 0x04, ++ LT = 0x05, ++ LE = 0x06 ++ } CMCompare; ++ ++ protected: ++ ++ // Support for VM calls ++ // ++ // This is the base routine called by the different versions of call_VM_leaf. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ #define VIRTUAL virtual ++ ++ VIRTUAL void call_VM_leaf_base( ++ address entry_point, // the entry point ++ int number_of_arguments // the number of arguments to pop after the call ++ ); ++ ++ // This is the base routine called by the different versions of call_VM. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ // ++ // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base ++ // returns the register which contains the thread upon return. If a thread register has been ++ // specified, the return value will correspond to that register. If no last_java_sp is specified ++ // (noreg) than sp will be used instead. ++ VIRTUAL void call_VM_base( // returns the register containing the thread upon return ++ Register oop_result, // where an oop-result ends up if any; use noreg otherwise ++ Register java_thread, // the thread if computed before ; use noreg otherwise ++ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call ++ bool check_exceptions // whether to check for pending exceptions after return ++ ); ++ ++ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); ++ ++ // helpers for FPU flag access ++ // tmp is a temporary register, if none is available use noreg ++ ++ public: ++ MacroAssembler(CodeBuffer* code) : Assembler(code) {} ++ ++ // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. ++ // The implementation is only non-empty for the InterpreterMacroAssembler, ++ // as only the interpreter handles PopFrame and ForceEarlyReturn requests. ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ Address as_Address(AddressLiteral adr); ++ Address as_Address(ArrayAddress adr); ++ ++ static intptr_t i[32]; ++ static float f[32]; ++ static void print(outputStream *s); ++ ++ static int i_offset(unsigned int k); ++ static int f_offset(unsigned int k); ++ ++ static void save_registers(MacroAssembler *masm); ++ static void restore_registers(MacroAssembler *masm); ++ ++ // Support for NULL-checks ++ // ++ // Generates code that causes a NULL OS exception if the content of reg is NULL. ++ // If the accessed location is M[reg + offset] and the offset is known, provide the ++ // offset. No explicit code generation is needed if the offset is within a certain ++ // range (0 <= offset <= page_size). ++ ++ void null_check(Register reg, int offset = -1); ++ static bool needs_explicit_null_check(intptr_t offset); ++ ++ // Required platform-specific helpers for Label::patch_instructions. ++ // They _shadow_ the declarations in AbstractAssembler, which are undefined. ++ static void pd_patch_instruction(address branch, address target); ++ ++ address emit_trampoline_stub(int insts_call_instruction_offset, address target); ++ ++ // Support for inc/dec with optimal instruction selection depending on value ++ // void incrementl(Register reg, int value = 1); ++ // void decrementl(Register reg, int value = 1); ++ ++ ++ // Alignment ++ void align(int modulus); ++ ++ ++ // Stack frame creation/removal ++ void enter(); ++ void leave(); ++ ++ // Frame creation and destruction shared between JITs. ++ void build_frame(int framesize); ++ void remove_frame(int framesize); ++ ++ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) ++ // The pointer will be loaded into the thread register. ++ void get_thread(Register thread); ++ ++ ++ // Support for VM calls ++ // ++ // It is imperative that all calls into the VM are handled via the call_VM macros. ++ // They make sure that the stack linkage is setup correctly. call_VM's correspond ++ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. ++ ++ ++ void call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ // Overloadings with last_Java_sp ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments = 0, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, bool ++ check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ void get_vm_result (Register oop_result, Register thread); ++ void get_vm_result_2(Register metadata_result, Register thread); ++ void call_VM_leaf(address entry_point, ++ int number_of_arguments = 0); ++ void call_VM_leaf(address entry_point, ++ Register arg_1); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2, Register arg_3); ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void super_call_VM_leaf(address entry_point); ++ void super_call_VM_leaf(address entry_point, Register arg_1); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); ++ ++ // last Java Frame (fills frame anchor) ++ void set_last_Java_frame(Register thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc); ++ ++ // thread in the default location (S6) ++ void set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc); ++ ++ void reset_last_Java_frame(Register thread, bool clear_fp); ++ ++ // thread in the default location (S6) ++ void reset_last_Java_frame(bool clear_fp); ++ ++ // jobjects ++ void clear_jweak_tag(Register possibly_jweak); ++ void resolve_jobject(Register value, Register thread, Register tmp); ++ ++ // C 'boolean' to Java boolean: x == 0 ? 0 : 1 ++ void c2bool(Register x); ++ ++ void resolve_oop_handle(Register result, Register tmp); ++ void load_mirror(Register dst, Register method, Register tmp); ++ ++ // oop manipulations ++ void load_klass(Register dst, Register src); ++ void store_klass(Register dst, Register src); ++ ++ void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, ++ Register tmp1, Register thread_tmp); ++ void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, ++ Register tmp1, Register tmp2); ++ ++ void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, ++ Register tmp2 = noreg, DecoratorSet decorators = 0); ++ ++ // Used for storing NULL. All other oop constants should be ++ // stored using routines that take a jobject. ++ void store_heap_oop_null(Address dst); ++ ++ void load_prototype_header(Register dst, Register src); ++ ++ void store_klass_gap(Register dst, Register src); ++ ++ void encode_heap_oop(Register r); ++ void encode_heap_oop(Register dst, Register src); ++ void decode_heap_oop(Register r); ++ void decode_heap_oop(Register dst, Register src); ++ void encode_heap_oop_not_null(Register r); ++ void decode_heap_oop_not_null(Register r); ++ void encode_heap_oop_not_null(Register dst, Register src); ++ void decode_heap_oop_not_null(Register dst, Register src); ++ ++ void encode_klass_not_null(Register r); ++ void decode_klass_not_null(Register r); ++ void encode_klass_not_null(Register dst, Register src); ++ void decode_klass_not_null(Register dst, Register src); ++ ++ // if heap base register is used - reinit it with the correct value ++ void reinit_heapbase(); ++ ++ DEBUG_ONLY(void verify_heapbase(const char* msg);) ++ ++ void set_narrow_klass(Register dst, Klass* k); ++ void set_narrow_oop(Register dst, jobject obj); ++ ++ // Sign extension ++ void sign_extend_short(Register reg) { ext_w_h(reg, reg); } ++ void sign_extend_byte(Register reg) { ext_w_b(reg, reg); } ++ void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ ++ // allocation ++ void eden_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void tlab_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1 = noreg); ++ // interface method calling ++ void lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& no_such_interface, ++ bool return_method = true); ++ ++ // virtual method calling ++ void lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result); ++ ++ // Test sub_klass against super_klass, with fast and slow paths. ++ ++ // The fast path produces a tri-state answer: yes / no / maybe-slow. ++ // One of the three labels can be NULL, meaning take the fall-through. ++ // If super_check_offset is -1, the value is loaded up from super_klass. ++ // No registers are killed, except temp_reg. ++ void check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); ++ ++ // The rest of the type check; must be wired to a corresponding fast path. ++ // It does not repeat the fast path logic, so don't use it standalone. ++ // The temp_reg and temp2_reg can be noreg, if no temps are available. ++ // Updates the sub's secondary super cache as necessary. ++ // If set_cond_codes, condition codes will be Z on success, NZ on failure. ++ void check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes = false); ++ ++ // Simplified, combined version, good for typical uses. ++ // Falls through on failure. ++ void check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success); ++ ++ ++ // Debugging ++ ++ // only if +VerifyOops ++ void verify_oop(Register reg, const char* s = "broken oop"); ++ void verify_oop_addr(Address addr, const char * s = "broken oop addr"); ++ void verify_oop_subroutine(); ++ // TODO: verify method and klass metadata (compare against vptr?) ++ void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} ++ void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} ++ ++ #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) ++ #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) ++ ++ // only if +VerifyFPU ++ void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); ++ ++ // prints msg, dumps registers and stops execution ++ void stop(const char* msg); ++ ++ // prints msg and continues ++ void warn(const char* msg); ++ ++ static void debug(char* msg/*, RegistersForDebugging* regs*/); ++ static void debug64(char* msg, int64_t pc, int64_t regs[]); ++ ++ void untested() { stop("untested"); } ++ ++ void unimplemented(const char* what = ""); ++ ++ void should_not_reach_here() { stop("should not reach here"); } ++ ++ void print_CPU_state(); ++ ++ // Stack overflow checking ++ void bang_stack_with_offset(int offset) { ++ // stack grows down, caller passes positive offset ++ assert(offset > 0, "must bang with negative offset"); ++ if (offset <= 2048) { ++ st_w(A0, SP, -offset); ++ } else if (offset <= 32768 && !(offset & 3)) { ++ stptr_w(A0, SP, -offset); ++ } else { ++ li(AT, offset); ++ sub_d(AT, SP, AT); ++ st_w(A0, AT, 0); ++ } ++ } ++ ++ // Writes to stack successive pages until offset reached to check for ++ // stack overflow + shadow pages. Also, clobbers tmp ++ void bang_stack_size(Register size, Register tmp); ++ ++ // Check for reserved stack access in method being exited (for JIT) ++ void reserved_stack_check(); ++ ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset); ++ ++ // Support for serializing memory accesses between threads ++ void serialize_memory(Register thread, Register tmp); ++ ++ void safepoint_poll(Label& slow_path, Register thread_reg); ++ void safepoint_poll_acquire(Label& slow_path, Register thread_reg); ++ ++ //void verify_tlab(); ++ void verify_tlab(Register t1, Register t2); ++ ++ // Biased locking support ++ // lock_reg and obj_reg must be loaded up with the appropriate values. ++ // tmp_reg is optional. If it is supplied (i.e., != noreg) it will ++ // be killed; if not supplied, push/pop will be used internally to ++ // allocate a temporary (inefficient, avoid if possible). ++ // Optional slow case is for implementations (interpreter and C1) which branch to ++ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. ++ // Returns offset of first potentially-faulting instruction for null ++ // check info (currently consumed only by C1). If ++ // swap_reg_contains_mark is true then returns -1 as it is assumed ++ // the calling code has already passed any potential faults. ++ int biased_locking_enter(Register lock_reg, Register obj_reg, ++ Register swap_reg, Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, Label* slow_case = NULL, ++ BiasedLockingCounters* counters = NULL); ++ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); ++#ifdef COMPILER2 ++ void cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed); ++ void cmp_branch_long(int flag, Register op1, Register op2, Label* L, bool is_signed); ++ void cmp_branchEqNe_off21(int flag, Register op1, Label& L); ++ void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr); ++ void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr); ++#endif ++ ++ ++ // the follow two might use AT register, be sure you have no meanful data in AT before you call them ++ void increment(Register reg, int imm); ++ void decrement(Register reg, int imm); ++ void increment(Address addr, int imm = 1); ++ void decrement(Address addr, int imm = 1); ++ void shl(Register reg, int sa) { slli_d(reg, reg, sa); } ++ void shr(Register reg, int sa) { srli_d(reg, reg, sa); } ++ void sar(Register reg, int sa) { srai_d(reg, reg, sa); } ++ // Helper functions for statistics gathering. ++ void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2); ++ ++ // Calls ++ void call(address entry); ++ void call(address entry, relocInfo::relocType rtype); ++ void call(address entry, RelocationHolder& rh); ++ void call_long(address entry); ++ ++ address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL); ++ ++ static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); ++ ++ static bool far_branches() { ++ if (ForceUnreachable) { ++ return true; ++ } else { ++ return ReservedCodeCacheSize > branch_range; ++ } ++ } ++ ++ // Emit the CompiledIC call idiom ++ address ic_call(address entry, jint method_index = 0); ++ ++ // Jumps ++ void jmp(address entry); ++ void jmp(address entry, relocInfo::relocType rtype); ++ void jmp_far(Label& L); // patchable ++ ++ /* branches may exceed 16-bit offset */ ++ void b_far(address entry); ++ void b_far(Label& L); ++ ++ void bne_far (Register rs, Register rt, address entry); ++ void bne_far (Register rs, Register rt, Label& L); ++ ++ void beq_far (Register rs, Register rt, address entry); ++ void beq_far (Register rs, Register rt, Label& L); ++ ++ void blt_far (Register rs, Register rt, address entry, bool is_signed); ++ void blt_far (Register rs, Register rt, Label& L, bool is_signed); ++ ++ void bge_far (Register rs, Register rt, address entry, bool is_signed); ++ void bge_far (Register rs, Register rt, Label& L, bool is_signed); ++ ++ // For C2 to support long branches ++ void beq_long (Register rs, Register rt, Label& L); ++ void bne_long (Register rs, Register rt, Label& L); ++ void blt_long (Register rs, Register rt, Label& L, bool is_signed); ++ void bge_long (Register rs, Register rt, Label& L, bool is_signed); ++ void bc1t_long (Label& L); ++ void bc1f_long (Label& L); ++ ++ static bool patchable_branches() { ++ const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); ++ return ReservedCodeCacheSize > branch_range; ++ } ++ ++ static bool reachable_from_branch_short(jlong offs); ++ ++ void patchable_jump_far(Register ra, jlong offs); ++ void patchable_jump(address target, bool force_patchable = false); ++ void patchable_call(address target, address call_size = 0); ++ ++ // Floating ++ void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi, ++ address pio2, address dsin_coef, address dcos_coef); ++ ++ // Data ++ ++ // Load and store values by size and signed-ness ++ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); ++ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); ++ ++ // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs ++ inline void ld_ptr(Register rt, Address a) { ++ ld_d(rt, a); ++ } ++ ++ inline void ld_ptr(Register rt, Register base, int offset16) { ++ ld_d(rt, base, offset16); ++ } ++ ++ // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs ++ inline void st_ptr(Register rt, Address a) { ++ st_d(rt, a); ++ } ++ ++ inline void st_ptr(Register rt, Register base, int offset16) { ++ st_d(rt, base, offset16); ++ } ++ ++ void ld_ptr(Register rt, Register base, Register offset); ++ void st_ptr(Register rt, Register base, Register offset); ++ ++ // swap the two byte of the low 16-bit halfword ++ // this directive will use AT, be sure the high 16-bit of reg is zero ++ void hswap(Register reg); ++ void huswap(Register reg); ++ ++ // convert big endian integer to little endian integer ++ void swap(Register reg); ++ ++ void cmpxchg(Address addr, Register oldval, Register newval, Register resflag, ++ bool retold, bool barrier); ++ void cmpxchg(Address addr, Register oldval, Register newval, Register tmp, ++ bool retold, bool barrier, Label& succ, Label* fail = NULL); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag, ++ bool sign, bool retold, bool barrier); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL); ++ ++ void extend_sign(Register rh, Register rl) { /*stop("extend_sign");*/ guarantee(0, "LA not implemented yet");} ++ void neg(Register reg) { /*dsubu(reg, R0, reg);*/ guarantee(0, "LA not implemented yet");} ++ void push (Register reg) { addi_d(SP, SP, -8); st_d (reg, SP, 0); } ++ void push (FloatRegister reg) { addi_d(SP, SP, -8); fst_d (reg, SP, 0); } ++ void pop (Register reg) { ld_d (reg, SP, 0); addi_d(SP, SP, 8); } ++ void pop (FloatRegister reg) { fld_d (reg, SP, 0); addi_d(SP, SP, 8); } ++ void pop () { addi_d(SP, SP, 8); } ++ void pop2 () { addi_d(SP, SP, 16); } ++ void push2(Register reg1, Register reg2); ++ void pop2 (Register reg1, Register reg2); ++ //we need 2 fun to save and resotre general register ++ void pushad(); ++ void popad(); ++ void pushad_except_v0(); ++ void popad_except_v0(); ++ void push(RegSet regs) { if (regs.bits()) push(regs.bits()); } ++ void pop(RegSet regs) { if (regs.bits()) pop(regs.bits()); } ++ ++ void li(Register rd, jlong value); ++ void li(Register rd, address addr) { li(rd, (long)addr); } ++ void patchable_li52(Register rd, jlong value); ++ void lipc(Register rd, Label& L); ++ ++ void move(Register rd, Register rs) { orr(rd, rs, R0); } ++ void move_u32(Register rd, Register rs) { add_w(rd, rs, R0); } ++ void mov_metadata(Register dst, Metadata* obj); ++ void mov_metadata(Address dst, Metadata* obj); ++ ++ // Load the base of the cardtable byte map into reg. ++ void load_byte_map_base(Register reg); ++ ++ // Code for java.lang.StringCoding::hasNegatives() instrinsic. ++ void has_negatives(Register ary1, Register len, Register result); ++ ++ // Code for java.lang.StringUTF16::compress intrinsic. ++ void char_array_compress(Register src, Register dst, Register len, ++ Register result, Register tmp1, ++ Register tmp2, Register tmp3); ++ ++ // Code for java.lang.StringLatin1::inflate intrinsic. ++ void byte_array_inflate(Register src, Register dst, Register len, ++ Register tmp1, Register tmp2); ++ ++ // Find index of char in UTF-16 string ++ void string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3); ++ ++ //FIXME ++ void empty_FPU_stack(){/*need implemented*/}; ++ ++#ifdef COMPILER2 ++ // Compare strings. ++ void string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ int ae); ++ ++ // Compare char[] or byte[] arrays. ++ void arrays_equals(Register str1, Register str2, ++ Register cnt, Register tmp1, Register tmp2, Register result, ++ bool is_char); ++#endif ++ ++ // method handles (JSR 292) ++ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); ++ ++ ++ // LA added: ++ void jr (Register reg) { jirl(R0, reg, 0); } ++ void jalr(Register reg) { jirl(RA, reg, 0); } ++ void nop () { andi(R0, R0, 0); } ++ void andr(Register rd, Register rj, Register rk) { AND(rd, rj, rk); } ++ void xorr(Register rd, Register rj, Register rk) { XOR(rd, rj, rk); } ++ void orr (Register rd, Register rj, Register rk) { OR(rd, rj, rk); } ++ void lea (Register rd, Address src); ++ void lea(Register dst, AddressLiteral adr); ++ static int patched_branch(int dest_pos, int inst, int inst_pos); ++ ++ // Conditional move ++ void cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src1, ++ Register src2, ++ CMCompare cmp = EQ, ++ bool is_signed = true); ++ void cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp = EQ, ++ bool is_signed = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp = EQ); ++ ++ // CRC32 code for java.util.zip.CRC32::update() instrinsic. ++ void update_byte_crc32(Register crc, Register val, Register table); ++ ++ // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic. ++ void kernel_crc32(Register crc, Register buf, Register len, Register tmp); ++ ++ // CRC32C code for java.util.zip.CRC32C::updateBytes() instrinsic. ++ void kernel_crc32c(Register crc, Register buf, Register len, Register tmp); ++ ++ void membar(Membar_mask_bits hint); ++ ++ void bind(Label& L) { ++ Assembler::bind(L); ++ code()->clear_last_insn(); ++ } ++ ++ // Code for java.math.BigInteger::mulAdd intrinsic. ++ void mul_add(Register out, Register in, Register offset, ++ Register len, Register k); ++ ++#undef VIRTUAL ++ ++public: ++// Memory Data Type ++#define INT_TYPE 0x100 ++#define FLOAT_TYPE 0x200 ++#define SIGNED_TYPE 0x10 ++#define UNSIGNED_TYPE 0x20 ++ ++ typedef enum { ++ LOAD_BYTE = INT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_CHAR = INT_TYPE | SIGNED_TYPE | 0x2, ++ LOAD_SHORT = INT_TYPE | SIGNED_TYPE | 0x3, ++ LOAD_INT = INT_TYPE | SIGNED_TYPE | 0x4, ++ LOAD_LONG = INT_TYPE | SIGNED_TYPE | 0x5, ++ STORE_BYTE = INT_TYPE | SIGNED_TYPE | 0x6, ++ STORE_CHAR = INT_TYPE | SIGNED_TYPE | 0x7, ++ STORE_SHORT = INT_TYPE | SIGNED_TYPE | 0x8, ++ STORE_INT = INT_TYPE | SIGNED_TYPE | 0x9, ++ STORE_LONG = INT_TYPE | SIGNED_TYPE | 0xa, ++ LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb, ++ ++ LOAD_U_BYTE = INT_TYPE | UNSIGNED_TYPE | 0x1, ++ LOAD_U_SHORT = INT_TYPE | UNSIGNED_TYPE | 0x2, ++ LOAD_U_INT = INT_TYPE | UNSIGNED_TYPE | 0x3, ++ ++ LOAD_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x2, ++ LOAD_VECTORX = FLOAT_TYPE | SIGNED_TYPE | 0x3, ++ LOAD_VECTORY = FLOAT_TYPE | SIGNED_TYPE | 0x4, ++ STORE_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x5, ++ STORE_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x6, ++ STORE_VECTORX = FLOAT_TYPE | SIGNED_TYPE | 0x7, ++ STORE_VECTORY = FLOAT_TYPE | SIGNED_TYPE | 0x8 ++ } CMLoadStoreDataType; ++ ++ void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) { ++ assert((type & INT_TYPE), "must be General reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++ void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) { ++ assert((type & FLOAT_TYPE), "must be Float reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++#ifdef COMPILER2 ++ void reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size); ++ void reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size); ++#endif ++ ++private: ++ void push(unsigned int bitset); ++ void pop(unsigned int bitset); ++ ++ template ++ void loadstore_t(T reg, int base, int index, int scale, int disp, int type) { ++ if (index != 0) { ++ assert(((scale==0)&&(disp==0)), "only support base+index"); ++ loadstore(reg, as_Register(base), as_Register(index), type); ++ } else { ++ loadstore(reg, as_Register(base), disp, type); ++ } ++ } ++ void loadstore(Register reg, Register base, int disp, int type); ++ void loadstore(Register reg, Register base, Register disp, int type); ++ void loadstore(FloatRegister reg, Register base, int disp, int type); ++ void loadstore(FloatRegister reg, Register base, Register disp, int type); ++ ++#ifdef COMPILER2 ++ void reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode); ++ void reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode); ++ void reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode); ++#endif ++ void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef); ++ void generate_kernel_cos(FloatRegister x, address dcos_coef); ++ void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2); ++ void generate__kernel_rem_pio2(address two_over_pi, address pio2); ++}; ++ ++/** ++ * class SkipIfEqual: ++ * ++ * Instantiating this class will result in assembly code being output that will ++ * jump around any code emitted between the creation of the instance and it's ++ * automatic destruction at the end of a scope block, depending on the value of ++ * the flag passed to the constructor, which will be checked at run-time. ++ */ ++class SkipIfEqual { ++private: ++ MacroAssembler* _masm; ++ Label _label; ++ ++public: ++ inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) ++ : _masm(masm) { ++ _masm->li(AT, (address)flag_addr); ++ _masm->ld_b(AT, AT, 0); ++ if (value) { ++ _masm->bne(AT, R0, _label); ++ } else { ++ _masm->beq(AT, R0, _label); ++ } ++ } ++ ++ ~SkipIfEqual(); ++}; ++ ++#ifdef ASSERT ++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } ++#endif ++ ++struct tableswitch { ++ Register _reg; ++ int _insn_index; jint _first_key; jint _last_key; ++ Label _after; ++ Label _branches; ++}; ++ ++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp +--- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp +--- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,1625 @@ ++/* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT) ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "macroAssembler_loongarch.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// The following code is a optimized version of fdlibm sin/cos implementation ++// (C code is in share/runtime/sharedRuntimeTrig.cpp) adapted for LOONGARCH64. ++ ++// Please refer to sin/cos approximation via polynomial and ++// trigonometric argument reduction techniques to the following literature: ++// ++// [1] Muller, Jean-Michel, Nicolas Brisebarre, Florent De Dinechin, ++// Claude-Pierre Jeannerod, Vincent Lefevre, Guillaume Melquiond, ++// Nathalie Revol, Damien Stehlé, and Serge Torres: ++// Handbook of floating-point arithmetic. ++// Springer Science & Business Media, 2009. ++// [2] K. C. Ng ++// Argument Reduction for Huge Arguments: Good to the Last Bit ++// July 13, 1992, SunPro ++// ++// HOW TO READ THIS CODE: ++// This code consists of several functions. Each function has following header: ++// 1) Description ++// 2) C-pseudo code with differences from fdlibm marked by comments starting ++// with "NOTE". Check unmodified fdlibm code in ++// share/runtime/SharedRuntimeTrig.cpp ++// 3) Brief textual description of changes between fdlibm and current ++// implementation along with optimization notes (if applicable) ++// 4) Assumptions, input and output ++// 5) (Optional) additional notes about intrinsic implementation ++// Each function is separated in blocks which follow the pseudo-code structure ++// ++// HIGH-LEVEL ALGORITHM DESCRIPTION: ++// - entry point: generate_dsin_dcos(...); ++// - check corner cases: NaN, INF, tiny argument. ++// - check if |x| < Pi/4. Then approximate sin/cos via polynomial (kernel_sin/kernel_cos) ++// -- else proceed to argument reduction routine (__ieee754_rem_pio2) and ++// use reduced argument to get result via kernel_sin/kernel_cos ++// ++// HIGH-LEVEL CHANGES BETWEEN INTRINSICS AND FDLIBM: ++// 1) two_over_pi table fdlibm representation is int[], while intrinsic version ++// has these int values converted to double representation to load converted ++// double values directly (see stubRoutines_aarch4::_two_over_pi) ++// 2) Several loops are unrolled and vectorized: see comments in code after ++// labels: SKIP_F_LOAD, RECOMP_FOR1_CHECK, RECOMP_FOR2 ++// 3) fdlibm npio2_hw table now has "prefix" with constants used in ++// calculation. These constants are loaded from npio2_hw table instead of ++// constructing it in code (see stubRoutines_loongarch64.cpp) ++// 4) Polynomial coefficients for sin and cos are moved to table sin_coef ++// and cos_coef to use the same optimization as in 3). It allows to load most of ++// required constants via single instruction ++// ++// ++// ++///* __ieee754_rem_pio2(x,y) ++// * ++// * returns the remainder of x rem pi/2 in y[0]+y[1] (i.e. like x div pi/2) ++// * x is input argument, y[] is hi and low parts of reduced argument (x) ++// * uses __kernel_rem_pio2() ++// */ ++// // use tables(see stubRoutines_loongarch64.cpp): two_over_pi and modified npio2_hw ++// ++// BEGIN __ieee754_rem_pio2 PSEUDO CODE ++// ++//static int __ieee754_rem_pio2(double x, double *y) { ++// double z,w,t,r,fn; ++// double tx[3]; ++// int e0,i,j,nx,n,ix,hx,i0; ++// ++// i0 = ((*(int*)&two24A)>>30)^1; /* high word index */ ++// hx = *(i0+(int*)&x); /* high word of x */ ++// ix = hx&0x7fffffff; ++// if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */ ++// if(hx>0) { ++// z = x - pio2_1; ++// if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ ++// y[0] = z - pio2_1t; ++// y[1] = (z-y[0])-pio2_1t; ++// } else { /* near pi/2, use 33+33+53 bit pi */ ++// z -= pio2_2; ++// y[0] = z - pio2_2t; ++// y[1] = (z-y[0])-pio2_2t; ++// } ++// return 1; ++// } else { /* negative x */ ++// z = x + pio2_1; ++// if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ ++// y[0] = z + pio2_1t; ++// y[1] = (z-y[0])+pio2_1t; ++// } else { /* near pi/2, use 33+33+53 bit pi */ ++// z += pio2_2; ++// y[0] = z + pio2_2t; ++// y[1] = (z-y[0])+pio2_2t; ++// } ++// return -1; ++// } ++// } ++// if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */ ++// t = fabsd(x); ++// n = (int) (t*invpio2+half); ++// fn = (double)n; ++// r = t-fn*pio2_1; ++// w = fn*pio2_1t; /* 1st round good to 85 bit */ ++// // NOTE: y[0] = r-w; is moved from if/else below to be before "if" ++// y[0] = r-w; ++// if(n<32&&ix!=npio2_hw[n-1]) { ++// // y[0] = r-w; /* quick check no cancellation */ // NOTE: moved earlier ++// } else { ++// j = ix>>20; ++// // y[0] = r-w; // NOTE: moved earlier ++// i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++// if(i>16) { /* 2nd iteration needed, good to 118 */ ++// t = r; ++// w = fn*pio2_2; ++// r = t-w; ++// w = fn*pio2_2t-((t-r)-w); ++// y[0] = r-w; ++// i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++// if(i>49) { /* 3rd iteration need, 151 bits acc */ ++// t = r; /* will cover all possible cases */ ++// w = fn*pio2_3; ++// r = t-w; ++// w = fn*pio2_3t-((t-r)-w); ++// y[0] = r-w; ++// } ++// } ++// } ++// y[1] = (r-y[0])-w; ++// if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} ++// else return n; ++// } ++// /* ++// * all other (large) arguments ++// */ ++// // NOTE: this check is removed, because it was checked in dsin/dcos ++// // if(ix>=0x7ff00000) { /* x is inf or NaN */ ++// // y[0]=y[1]=x-x; return 0; ++// // } ++// /* set z = scalbn(|x|,ilogb(x)-23) */ ++// *(1-i0+(int*)&z) = *(1-i0+(int*)&x); ++// e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */ ++// *(i0+(int*)&z) = ix - (e0<<20); ++// ++// // NOTE: "for" loop below in unrolled. See comments in asm code ++// for(i=0;i<2;i++) { ++// tx[i] = (double)((int)(z)); ++// z = (z-tx[i])*two24A; ++// } ++// ++// tx[2] = z; ++// nx = 3; ++// ++// // NOTE: while(tx[nx-1]==zeroA) nx--; is unrolled. See comments in asm code ++// while(tx[nx-1]==zeroA) nx--; /* skip zero term */ ++// ++// n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi); ++// if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} ++// return n; ++//} ++// ++// END __ieee754_rem_pio2 PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic for __ieee754_rem_pio2: ++// 1. INF/NaN check for huge argument is removed in comparison with fdlibm ++// code, because this check is already done in dcos/dsin code ++// 2. Most constants are now loaded from table instead of direct initialization ++// 3. Two loops are unrolled ++// Assumptions: ++// 1. Assume |X| >= PI/4 ++// 2. Assume SCR1 = 0x3fe921fb00000000 (~ PI/4) ++// 3. Assume ix = A3 ++// Input and output: ++// 1. Input: X = A0 ++// 2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5 ++// NOTE: general purpose register names match local variable names in C code ++// NOTE: fpu registers are actively reused. See comments in code about their usage ++void MacroAssembler::generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2) { ++ const int64_t PIO2_1t = 0x3DD0B4611A626331ULL; ++ const int64_t PIO2_2 = 0x3DD0B4611A600000ULL; ++ const int64_t PIO2_2t = 0x3BA3198A2E037073ULL; ++ Label X_IS_NEGATIVE, X_IS_MEDIUM_OR_LARGE, X_IS_POSITIVE_LONG_PI, LARGE_ELSE, ++ REDUCTION_DONE, X_IS_MEDIUM_BRANCH_DONE, X_IS_LARGE, NX_SET, ++ X_IS_NEGATIVE_LONG_PI; ++ Register X = A0, n = A2, ix = A3, jv = A4, tmp5 = A5, jx = A6, ++ tmp3 = A7, iqBase = T0, ih = T1, i = T2; ++ FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7, ++ vt = FT1, v24 = FT8, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v31 = FT15; ++ ++ push2(S0, S1); ++ ++ // initializing constants first ++ li(SCR1, 0x3ff921fb54400000); // PIO2_1 ++ li(SCR2, 0x4002d97c); // 3*PI/4 high word ++ movgr2fr_d(v1, SCR1); // v1 = PIO2_1 ++ bge(ix, SCR2, X_IS_MEDIUM_OR_LARGE); ++ ++ block_comment("if(ix<0x4002d97c) {... /* |x| ~< 3pi/4 */ "); { ++ blt(X, R0, X_IS_NEGATIVE); ++ ++ block_comment("if(hx>0) {"); { ++ fsub_d(v2, v0, v1); // v2 = z = x - pio2_1 ++ srli_d(SCR1, SCR1, 32); ++ li(n, 1); ++ beq(ix, SCR1, X_IS_POSITIVE_LONG_PI); ++ ++ block_comment("case: hx > 0 && ix!=0x3ff921fb {"); { /* 33+53 bit pi is good enough */ ++ li(SCR2, PIO2_1t); ++ movgr2fr_d(v27, SCR2); ++ fsub_d(v4, v2, v27); // v4 = y[0] = z - pio2_1t; ++ fsub_d(v5, v2, v4); ++ fsub_d(v5, v5, v27); // v5 = y[1] = (z-y[0])-pio2_1t ++ b(REDUCTION_DONE); ++ } ++ ++ block_comment("case: hx > 0 &*& ix==0x3ff921fb {"); { /* near pi/2, use 33+33+53 bit pi */ ++ bind(X_IS_POSITIVE_LONG_PI); ++ li(SCR1, PIO2_2); ++ li(SCR2, PIO2_2t); ++ movgr2fr_d(v27, SCR1); ++ movgr2fr_d(v6, SCR2); ++ fsub_d(v2, v2, v27); // z-= pio2_2 ++ fsub_d(v4, v2, v6); // y[0] = z - pio2_2t ++ fsub_d(v5, v2, v4); ++ fsub_d(v5, v5, v6); // v5 = (z - y[0]) - pio2_2t ++ b(REDUCTION_DONE); ++ } ++ } ++ ++ block_comment("case: hx <= 0)"); { ++ bind(X_IS_NEGATIVE); ++ fadd_d(v2, v0, v1); // v2 = z = x + pio2_1 ++ srli_d(SCR1, SCR1, 32); ++ li(n, -1); ++ beq(ix, SCR1, X_IS_NEGATIVE_LONG_PI); ++ ++ block_comment("case: hx <= 0 && ix!=0x3ff921fb) {"); { /* 33+53 bit pi is good enough */ ++ li(SCR2, PIO2_1t); ++ movgr2fr_d(v27, SCR2); ++ fadd_d(v4, v2, v27); // v4 = y[0] = z + pio2_1t; ++ fsub_d(v5, v2, v4); ++ fadd_d(v5, v5, v27); // v5 = y[1] = (z-y[0]) + pio2_1t ++ b(REDUCTION_DONE); ++ } ++ ++ block_comment("case: hx <= 0 && ix==0x3ff921fb"); { /* near pi/2, use 33+33+53 bit pi */ ++ bind(X_IS_NEGATIVE_LONG_PI); ++ li(SCR1, PIO2_2); ++ li(SCR2, PIO2_2t); ++ movgr2fr_d(v27, SCR1); ++ movgr2fr_d(v6, SCR2); ++ fadd_d(v2, v2, v27); // z += pio2_2 ++ fadd_d(v4, v2, v6); // y[0] = z + pio2_2t ++ fsub_d(v5, v2, v4); ++ fadd_d(v5, v5, v6); // v5 = (z - y[0]) + pio2_2t ++ b(REDUCTION_DONE); ++ } ++ } ++ } ++ bind(X_IS_MEDIUM_OR_LARGE); ++ li(SCR1, 0x413921fb); ++ blt(SCR1, ix, X_IS_LARGE); // ix < = 0x413921fb ? ++ ++ block_comment("|x| ~<= 2^19*(pi/2), medium size"); { ++ li(ih, npio2_hw); ++ fld_d(v4, ih, 0); ++ fld_d(v5, ih, 8); ++ fld_d(v6, ih, 16); ++ fld_d(v7, ih, 24); ++ fabs_d(v31, v0); // v31 = t = |x| ++ addi_d(ih, ih, 64); ++ fmadd_d(v2, v31, v5, v4); // v2 = t * invpio2 + half (invpio2 = 53 bits of 2/pi, half = 0.5) ++ ftintrz_w_d(vt, v2); // n = (int) v2 ++ movfr2gr_s(n, vt); ++ vfrintrz_d(v2, v2); ++ fnmsub_d(v3, v2, v6, v31); // v3 = r = t - fn * pio2_1 ++ fmul_d(v26, v2, v7); // v26 = w = fn * pio2_1t ++ fsub_d(v4, v3, v26); // y[0] = r - w. Calculated before branch ++ li(SCR1, 32); ++ blt(SCR1, n, LARGE_ELSE); ++ addi_w(tmp5, n, -1); // tmp5 = n - 1 ++ alsl_d(tmp5, tmp5, ih, 2 - 1); ++ ld_w(jv, tmp5, 0); ++ bne(ix, jv, X_IS_MEDIUM_BRANCH_DONE); ++ ++ block_comment("else block for if(n<32&&ix!=npio2_hw[n-1])"); { ++ bind(LARGE_ELSE); ++ movfr2gr_d(jx, v4); ++ srli_d(tmp5, ix, 20); // j = ix >> 20 ++ slli_d(jx, jx, 1); ++ srli_d(tmp3, jx, 32 + 20 + 1); // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++ sub_d(tmp3, tmp5, tmp3); ++ ++ block_comment("if(i>16)"); { ++ li(SCR1, 16); ++ bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE); ++ // i > 16. 2nd iteration needed ++ fld_d(v6, ih, -32); ++ fld_d(v7, ih, -24); ++ fmov_d(v28, v3); // t = r ++ fmul_d(v29, v2, v6); // w = v29 = fn * pio2_2 ++ fsub_d(v3, v28, v29); // r = t - w ++ fsub_d(v31, v28, v3); // v31 = (t - r) ++ fsub_d(v31, v29, v31); // v31 = w - (t - r) = - ((t - r) - w) ++ fmadd_d(v26, v2, v7, v31); // v26 = w = fn*pio2_2t - ((t - r) - w) ++ fsub_d(v4, v3, v26); // y[0] = r - w ++ movfr2gr_d(jx, v4); ++ slli_d(jx, jx, 1); ++ srli_d(tmp3, jx, 32 + 20 + 1); // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++ sub_d(tmp3, tmp5, tmp3); ++ ++ block_comment("if(i>49)"); { ++ li(SCR1, 49); ++ bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE); ++ // 3rd iteration need, 151 bits acc ++ fld_d(v6, ih, -16); ++ fld_d(v7, ih, -8); ++ fmov_d(v28, v3); // save "r" ++ fmul_d(v29, v2, v6); // v29 = fn * pio2_3 ++ fsub_d(v3, v28, v29); // r = r - w ++ fsub_d(v31, v28, v3); // v31 = (t - r) ++ fsub_d(v31, v29, v31); // v31 = w - (t - r) = - ((t - r) - w) ++ fmadd_d(v26, v2, v7, v31); // v26 = w = fn*pio2_3t - ((t - r) - w) ++ fsub_d(v4, v3, v26); // y[0] = r - w ++ } ++ } ++ } ++ block_comment("medium x tail"); { ++ bind(X_IS_MEDIUM_BRANCH_DONE); ++ fsub_d(v5, v3, v4); // v5 = y[1] = (r - y[0]) ++ fsub_d(v5, v5, v26); // v5 = y[1] = (r - y[0]) - w ++ blt(R0, X, REDUCTION_DONE); ++ fneg_d(v4, v4); ++ sub_w(n, R0, n); ++ fneg_d(v5, v5); ++ b(REDUCTION_DONE); ++ } ++ } ++ ++ block_comment("all other (large) arguments"); { ++ bind(X_IS_LARGE); ++ srli_d(SCR1, ix, 20); // ix >> 20 ++ li(tmp5, 0x4170000000000000); ++ addi_w(SCR1, SCR1, -1046); // e0 ++ movgr2fr_d(v24, tmp5); // init two24A value ++ slli_w(jv, SCR1, 20); // ix - (e0<<20) ++ sub_w(jv, ix, jv); ++ slli_d(jv, jv, 32); ++ addi_w(SCR2, SCR1, -3); ++ bstrins_d(jv, X, 31, 0); // jv = z ++ li(i, 24); ++ movgr2fr_d(v26, jv); // v26 = z ++ ++ block_comment("unrolled for(i=0;i<2;i++) {tx[i] = (double)((int)(z));z = (z-tx[i])*two24A;}"); { ++ // tx[0,1,2] = v6,v7,v26 ++ vfrintrz_d(v6, v26); // v6 = (double)((int)v26) ++ div_w(jv, SCR2, i); // jv = (e0 - 3)/24 ++ fsub_d(v26, v26, v6); ++ addi_d(SP, SP, -560); ++ fmul_d(v26, v26, v24); ++ vfrintrz_d(v7, v26); // v7 = (double)((int)v26) ++ li(jx, 2); // calculate jx as nx - 1, which is initially 2. Not a part of unrolled loop ++ fsub_d(v26, v26, v7); ++ } ++ ++ block_comment("nx calculation with unrolled while(tx[nx-1]==zeroA) nx--;"); { ++ vxor_v(vt, vt, vt); ++ fcmp_cne_d(FCC0, v26, vt); // if NE then jx == 2. else it's 1 or 0 ++ addi_d(iqBase, SP, 480); // base of iq[] ++ fmul_d(v3, v26, v24); ++ bcnez(FCC0, NX_SET); ++ fcmp_cne_d(FCC0, v7, vt); // v7 == 0 => jx = 0. Else jx = 1 ++ movcf2gr(jx, FCC0); ++ } ++ bind(NX_SET); ++ generate__kernel_rem_pio2(two_over_pi, pio2); ++ // now we have y[0] = v4, y[1] = v5 and n = r2 ++ bge(X, R0, REDUCTION_DONE); ++ fneg_d(v4, v4); ++ fneg_d(v5, v5); ++ sub_w(n, R0, n); ++ } ++ bind(REDUCTION_DONE); ++ ++ pop2(S0, S1); ++} ++ ++///* ++// * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2) ++// * double x[],y[]; int e0,nx,prec; int ipio2[]; ++// * ++// * __kernel_rem_pio2 return the last three digits of N with ++// * y = x - N*pi/2 ++// * so that |y| < pi/2. ++// * ++// * The method is to compute the integer (mod 8) and fraction parts of ++// * (2/pi)*x without doing the full multiplication. In general we ++// * skip the part of the product that are known to be a huge integer ( ++// * more accurately, = 0 mod 8 ). Thus the number of operations are ++// * independent of the exponent of the input. ++// * ++// * NOTE: 2/pi int representation is converted to double ++// * // (2/pi) is represented by an array of 24-bit integers in ipio2[]. ++// * ++// * Input parameters: ++// * x[] The input value (must be positive) is broken into nx ++// * pieces of 24-bit integers in double precision format. ++// * x[i] will be the i-th 24 bit of x. The scaled exponent ++// * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 ++// * match x's up to 24 bits. ++// * ++// * Example of breaking a double positive z into x[0]+x[1]+x[2]: ++// * e0 = ilogb(z)-23 ++// * z = scalbn(z,-e0) ++// * for i = 0,1,2 ++// * x[i] = floor(z) ++// * z = (z-x[i])*2**24 ++// * ++// * ++// * y[] ouput result in an array of double precision numbers. ++// * The dimension of y[] is: ++// * 24-bit precision 1 ++// * 53-bit precision 2 ++// * 64-bit precision 2 ++// * 113-bit precision 3 ++// * The actual value is the sum of them. Thus for 113-bit ++// * precsion, one may have to do something like: ++// * ++// * long double t,w,r_head, r_tail; ++// * t = (long double)y[2] + (long double)y[1]; ++// * w = (long double)y[0]; ++// * r_head = t+w; ++// * r_tail = w - (r_head - t); ++// * ++// * e0 The exponent of x[0] ++// * ++// * nx dimension of x[] ++// * ++// * prec an interger indicating the precision: ++// * 0 24 bits (single) ++// * 1 53 bits (double) ++// * 2 64 bits (extended) ++// * 3 113 bits (quad) ++// * ++// * NOTE: ipio2[] array below is converted to double representation ++// * //ipio2[] ++// * // integer array, contains the (24*i)-th to (24*i+23)-th ++// * // bit of 2/pi after binary point. The corresponding ++// * // floating value is ++// * ++// * ipio2[i] * 2^(-24(i+1)). ++// * ++// * Here is the description of some local variables: ++// * ++// * jk jk+1 is the initial number of terms of ipio2[] needed ++// * in the computation. The recommended value is 2,3,4, ++// * 6 for single, double, extended,and quad. ++// * ++// * jz local integer variable indicating the number of ++// * terms of ipio2[] used. ++// * ++// * jx nx - 1 ++// * ++// * jv index for pointing to the suitable ipio2[] for the ++// * computation. In general, we want ++// * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 ++// * is an integer. Thus ++// * e0-3-24*jv >= 0 or (e0-3)/24 >= jv ++// * Hence jv = max(0,(e0-3)/24). ++// * ++// * jp jp+1 is the number of terms in PIo2[] needed, jp = jk. ++// * ++// * q[] double array with integral value, representing the ++// * 24-bits chunk of the product of x and 2/pi. ++// * ++// * q0 the corresponding exponent of q[0]. Note that the ++// * exponent for q[i] would be q0-24*i. ++// * ++// * PIo2[] double precision array, obtained by cutting pi/2 ++// * into 24 bits chunks. ++// * ++// * f[] ipio2[] in floating point ++// * ++// * iq[] integer array by breaking up q[] in 24-bits chunk. ++// * ++// * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] ++// * ++// * ih integer. If >0 it indicates q[] is >= 0.5, hence ++// * it also indicates the *sign* of the result. ++// * ++// */ ++// ++// Use PIo2 table(see stubRoutines_loongarch64.cpp) ++// ++// BEGIN __kernel_rem_pio2 PSEUDO CODE ++// ++//static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, /* NOTE: converted to double */ const double *ipio2 // const int *ipio2) { ++// int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; ++// double z,fw,f[20],fq[20],q[20]; ++// ++// /* initialize jk*/ ++// // jk = init_jk[prec]; // NOTE: prec==2 for double. jk is always 4. ++// jp = jk; // NOTE: always 4 ++// ++// /* determine jx,jv,q0, note that 3>q0 */ ++// jx = nx-1; ++// jv = (e0-3)/24; if(jv<0) jv=0; ++// q0 = e0-24*(jv+1); ++// ++// /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ ++// j = jv-jx; m = jx+jk; ++// ++// // NOTE: split into two for-loops: one with zeroB and one with ipio2[j]. It ++// // allows the use of wider loads/stores ++// for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; //(double) ipio2[j]; ++// ++// // NOTE: unrolled and vectorized "for". See comments in asm code ++// /* compute q[0],q[1],...q[jk] */ ++// for (i=0;i<=jk;i++) { ++// for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; ++// } ++// ++// jz = jk; ++//recompute: ++// /* distill q[] into iq[] reversingly */ ++// for(i=0,j=jz,z=q[jz];j>0;i++,j--) { ++// fw = (double)((int)(twon24* z)); ++// iq[i] = (int)(z-two24B*fw); ++// z = q[j-1]+fw; ++// } ++// ++// /* compute n */ ++// z = scalbnA(z,q0); /* actual value of z */ ++// z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ ++// n = (int) z; ++// z -= (double)n; ++// ih = 0; ++// if(q0>0) { /* need iq[jz-1] to determine n */ ++// i = (iq[jz-1]>>(24-q0)); n += i; ++// iq[jz-1] -= i<<(24-q0); ++// ih = iq[jz-1]>>(23-q0); ++// } ++// else if(q0==0) ih = iq[jz-1]>>23; ++// else if(z>=0.5) ih=2; ++// ++// if(ih>0) { /* q > 0.5 */ ++// n += 1; carry = 0; ++// for(i=0;i0) { /* rare case: chance is 1 in 12 */ ++// switch(q0) { ++// case 1: ++// iq[jz-1] &= 0x7fffff; break; ++// case 2: ++// iq[jz-1] &= 0x3fffff; break; ++// } ++// } ++// if(ih==2) { ++// z = one - z; ++// if(carry!=0) z -= scalbnA(one,q0); ++// } ++// } ++// ++// /* check if recomputation is needed */ ++// if(z==zeroB) { ++// j = 0; ++// for (i=jz-1;i>=jk;i--) j |= iq[i]; ++// if(j==0) { /* need recomputation */ ++// for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */ ++// ++// for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */ ++// f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i]; ++// for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; ++// q[i] = fw; ++// } ++// jz += k; ++// goto recompute; ++// } ++// } ++// ++// /* chop off zero terms */ ++// if(z==0.0) { ++// jz -= 1; q0 -= 24; ++// while(iq[jz]==0) { jz--; q0-=24;} ++// } else { /* break z into 24-bit if necessary */ ++// z = scalbnA(z,-q0); ++// if(z>=two24B) { ++// fw = (double)((int)(twon24*z)); ++// iq[jz] = (int)(z-two24B*fw); ++// jz += 1; q0 += 24; ++// iq[jz] = (int) fw; ++// } else iq[jz] = (int) z ; ++// } ++// ++// /* convert integer "bit" chunk to floating-point value */ ++// fw = scalbnA(one,q0); ++// for(i=jz;i>=0;i--) { ++// q[i] = fw*(double)iq[i]; fw*=twon24; ++// } ++// ++// /* compute PIo2[0,...,jp]*q[jz,...,0] */ ++// for(i=jz;i>=0;i--) { ++// for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; ++// fq[jz-i] = fw; ++// } ++// ++// // NOTE: switch below is eliminated, because prec is always 2 for doubles ++// /* compress fq[] into y[] */ ++// //switch(prec) { ++// //case 0: ++// // fw = 0.0; ++// // for (i=jz;i>=0;i--) fw += fq[i]; ++// // y[0] = (ih==0)? fw: -fw; ++// // break; ++// //case 1: ++// //case 2: ++// fw = 0.0; ++// for (i=jz;i>=0;i--) fw += fq[i]; ++// y[0] = (ih==0)? fw: -fw; ++// fw = fq[0]-fw; ++// for (i=1;i<=jz;i++) fw += fq[i]; ++// y[1] = (ih==0)? fw: -fw; ++// // break; ++// //case 3: /* painful */ ++// // for (i=jz;i>0;i--) { ++// // fw = fq[i-1]+fq[i]; ++// // fq[i] += fq[i-1]-fw; ++// // fq[i-1] = fw; ++// // } ++// // for (i=jz;i>1;i--) { ++// // fw = fq[i-1]+fq[i]; ++// // fq[i] += fq[i-1]-fw; ++// // fq[i-1] = fw; ++// // } ++// // for (fw=0.0,i=jz;i>=2;i--) fw += fq[i]; ++// // if(ih==0) { ++// // y[0] = fq[0]; y[1] = fq[1]; y[2] = fw; ++// // } else { ++// // y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw; ++// // } ++// //} ++// return n&7; ++//} ++// ++// END __kernel_rem_pio2 PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. One loop is unrolled and vectorized (see comments in code) ++// 2. One loop is split into 2 loops (see comments in code) ++// 3. Non-double code is removed(last switch). Sevaral variables became ++// constants because of that (see comments in code) ++// 4. Use of jx, which is nx-1 instead of nx ++// Assumptions: ++// 1. Assume |X| >= PI/4 ++// Input and output: ++// 1. Input: X = A0, jx == nx - 1 == A6, e0 == SCR1 ++// 2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5 ++// NOTE: general purpose register names match local variable names in C code ++// NOTE: fpu registers are actively reused. See comments in code about their usage ++void MacroAssembler::generate__kernel_rem_pio2(address two_over_pi, address pio2) { ++ Label Q_DONE, JX_IS_0, JX_IS_2, COMP_INNER_LOOP, RECOMP_FOR2, Q0_ZERO_CMP_LT, ++ RECOMP_CHECK_DONE_NOT_ZERO, Q0_ZERO_CMP_DONE, COMP_FOR, Q0_ZERO_CMP_EQ, ++ INIT_F_ZERO, RECOMPUTE, IH_FOR_INCREMENT, IH_FOR_STORE, RECOMP_CHECK_DONE, ++ Z_IS_LESS_THAN_TWO24B, Z_IS_ZERO, FW_Y1_NO_NEGATION, ++ RECOMP_FW_UPDATED, Z_ZERO_CHECK_DONE, FW_FOR1, IH_AFTER_SWITCH, IH_HANDLED, ++ CONVERTION_FOR, FW_Y0_NO_NEGATION, FW_FOR1_DONE, FW_FOR2, FW_FOR2_DONE, ++ IH_FOR, SKIP_F_LOAD, RECOMP_FOR1, RECOMP_FIRST_FOR, INIT_F_COPY, ++ RECOMP_FOR1_CHECK; ++ Register tmp2 = A1, n = A2, jv = A4, tmp5 = A5, jx = A6, ++ tmp3 = A7, iqBase = T0, ih = T1, i = T2, tmp1 = T3, ++ jz = S0, j = T5, twoOverPiBase = T6, tmp4 = S1, qBase = T8; ++ FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7, ++ vt = FT1, v17 = FT2, v18 = FT3, v19 = FT4, v20 = FT5, v21 = FT6, v22 = FT7, v24 = FT8, ++ v25 = FT9, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v30 = FT14, v31 = FT15; ++ // jp = jk == init_jk[prec] = init_jk[2] == {2,3,4,6}[2] == 4 ++ // jx = nx - 1 ++ li(twoOverPiBase, two_over_pi); ++ slti(SCR2, jv, 0); ++ addi_w(tmp4, jx, 4); // tmp4 = m = jx + jk = jx + 4. jx is in {0,1,2} so m is in [4,5,6] ++ masknez(jv, jv, SCR2); ++ if (UseLASX) ++ xvxor_v(v26, v26, v26); ++ else ++ vxor_v(v26, v26, v26); ++ addi_w(tmp5, jv, 1); // jv+1 ++ sub_w(j, jv, jx); ++ addi_d(qBase, SP, 320); // base of q[] ++ mul_w(SCR2, i, tmp5); // q0 = e0-24*(jv+1) ++ sub_w(SCR1, SCR1, SCR2); ++ // use double f[20], fq[20], q[20], iq[20] on stack, which is ++ // (20 + 20 + 20) x 8 + 20 x 4 = 560 bytes. From lower to upper addresses it ++ // will contain f[20], fq[20], q[20], iq[20] ++ // now initialize f[20] indexes 0..m (inclusive) ++ // for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j]; ++ move(tmp5, SP); ++ ++ block_comment("for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j];"); { ++ xorr(i, i, i); ++ bge(j, R0, INIT_F_COPY); ++ bind(INIT_F_ZERO); ++ if (UseLASX) { ++ xvst(v26, tmp5, 0); ++ } else { ++ vst(v26, tmp5, 0); ++ vst(v26, tmp5, 16); ++ } ++ addi_d(tmp5, tmp5, 32); ++ addi_w(i, i, 4); ++ addi_w(j, j, 4); ++ blt(j, R0, INIT_F_ZERO); ++ sub_w(i, i, j); ++ move(j, R0); ++ bind(INIT_F_COPY); ++ alsl_d(tmp1, j, twoOverPiBase, 3 - 1); // ipio2[j] start address ++ if (UseLASX) { ++ xvld(v18, tmp1, 0); ++ xvld(v19, tmp1, 32); ++ } else { ++ vld(v18, tmp1, 0); ++ vld(v19, tmp1, 16); ++ vld(v20, tmp1, 32); ++ vld(v21, tmp1, 48); ++ } ++ alsl_d(tmp5, i, SP, 3 - 1); ++ if (UseLASX) { ++ xvst(v18, tmp5, 0); ++ xvst(v19, tmp5, 32); ++ } else { ++ vst(v18, tmp5, 0); ++ vst(v19, tmp5, 16); ++ vst(v20, tmp5, 32); ++ vst(v21, tmp5, 48); ++ } ++ } ++ // v18..v21 can actually contain f[0..7] ++ beqz(i, SKIP_F_LOAD); // i == 0 => f[i] == f[0] => already loaded ++ if (UseLASX) { ++ xvld(v18, SP, 0); // load f[0..7] ++ xvld(v19, SP, 32); ++ } else { ++ vld(v18, SP, 0); // load f[0..7] ++ vld(v19, SP, 16); ++ vld(v20, SP, 32); ++ vld(v21, SP, 48); ++ } ++ bind(SKIP_F_LOAD); ++ // calculate 2^q0 and 2^-q0, which we'll need further. ++ // q0 is exponent. So, calculate biased exponent(q0+1023) ++ sub_w(tmp4, R0, SCR1); ++ addi_w(tmp5, SCR1, 1023); ++ addi_w(tmp4, tmp4, 1023); ++ // Unroll following for(s) depending on jx in [0,1,2] ++ // for (i=0;i<=jk;i++) { ++ // for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; ++ // } ++ // Unrolling for jx == 0 case: ++ // q[0] = x[0] * f[0] ++ // q[1] = x[0] * f[1] ++ // q[2] = x[0] * f[2] ++ // q[3] = x[0] * f[3] ++ // q[4] = x[0] * f[4] ++ // ++ // Vectorization for unrolled jx == 0 case: ++ // {q[0], q[1]} = {f[0], f[1]} * x[0] ++ // {q[2], q[3]} = {f[2], f[3]} * x[0] ++ // q[4] = f[4] * x[0] ++ // ++ // Unrolling for jx == 1 case: ++ // q[0] = x[0] * f[1] + x[1] * f[0] ++ // q[1] = x[0] * f[2] + x[1] * f[1] ++ // q[2] = x[0] * f[3] + x[1] * f[2] ++ // q[3] = x[0] * f[4] + x[1] * f[3] ++ // q[4] = x[0] * f[5] + x[1] * f[4] ++ // ++ // Vectorization for unrolled jx == 1 case: ++ // {q[0], q[1]} = {f[0], f[1]} * x[1] ++ // {q[2], q[3]} = {f[2], f[3]} * x[1] ++ // q[4] = f[4] * x[1] ++ // {q[0], q[1]} += {f[1], f[2]} * x[0] ++ // {q[2], q[3]} += {f[3], f[4]} * x[0] ++ // q[4] += f[5] * x[0] ++ // ++ // Unrolling for jx == 2 case: ++ // q[0] = x[0] * f[2] + x[1] * f[1] + x[2] * f[0] ++ // q[1] = x[0] * f[3] + x[1] * f[2] + x[2] * f[1] ++ // q[2] = x[0] * f[4] + x[1] * f[3] + x[2] * f[2] ++ // q[3] = x[0] * f[5] + x[1] * f[4] + x[2] * f[3] ++ // q[4] = x[0] * f[6] + x[1] * f[5] + x[2] * f[4] ++ // ++ // Vectorization for unrolled jx == 2 case: ++ // {q[0], q[1]} = {f[0], f[1]} * x[2] ++ // {q[2], q[3]} = {f[2], f[3]} * x[2] ++ // q[4] = f[4] * x[2] ++ // {q[0], q[1]} += {f[1], f[2]} * x[1] ++ // {q[2], q[3]} += {f[3], f[4]} * x[1] ++ // q[4] += f[5] * x[1] ++ // {q[0], q[1]} += {f[2], f[3]} * x[0] ++ // {q[2], q[3]} += {f[4], f[5]} * x[0] ++ // q[4] += f[6] * x[0] ++ block_comment("unrolled and vectorized computation of q[0]..q[jk]"); { ++ li(SCR2, 1); ++ slli_d(tmp5, tmp5, 52); // now it's 2^q0 double value ++ slli_d(tmp4, tmp4, 52); // now it's 2^-q0 double value ++ if (UseLASX) ++ xvpermi_d(v6, v6, 0); ++ else ++ vreplvei_d(v6, v6, 0); ++ blt(jx, SCR2, JX_IS_0); ++ addi_d(i, SP, 8); ++ if (UseLASX) { ++ xvld(v26, i, 0); // load f[1..4] ++ xvpermi_d(v3, v3, 0); ++ xvpermi_d(v7, v7, 0); ++ xvpermi_d(v20, v19, 85); ++ xvpermi_d(v21, v19, 170); ++ } else { ++ vld(v26, i, 0); // load f[1..4] ++ vld(v27, i, 16); ++ vreplvei_d(v3, v3, 0); ++ vreplvei_d(v7, v7, 0); ++ vreplvei_d(vt, v20, 1); ++ vreplvei_d(v21, v21, 0); ++ } ++ blt(SCR2, jx, JX_IS_2); ++ // jx == 1 ++ if (UseLASX) { ++ xvfmul_d(v28, v18, v7); // f[0,3] * x[1] ++ fmul_d(v30, v19, v7); // f[4] * x[1] ++ xvfmadd_d(v28, v26, v6, v28); ++ fmadd_d(v30, v6, v20, v30); // v30 += f[5] * x[0] ++ } else { ++ vfmul_d(v28, v18, v7); // f[0,1] * x[1] ++ vfmul_d(v29, v19, v7); // f[2,3] * x[1] ++ fmul_d(v30, v20, v7); // f[4] * x[1] ++ vfmadd_d(v28, v26, v6, v28); ++ vfmadd_d(v29, v27, v6, v29); ++ fmadd_d(v30, v6, vt, v30); // v30 += f[5] * x[0] ++ } ++ b(Q_DONE); ++ bind(JX_IS_2); ++ if (UseLASX) { ++ xvfmul_d(v28, v18, v3); // f[0,3] * x[2] ++ fmul_d(v30, v19, v3); // f[4] * x[2] ++ xvfmadd_d(v28, v26, v7, v28); ++ fmadd_d(v30, v7, v20, v30); // v30 += f[5] * x[1] ++ xvpermi_q(v18, v19, 3); ++ xvfmadd_d(v28, v18, v6, v28); ++ } else { ++ vfmul_d(v28, v18, v3); // f[0,1] * x[2] ++ vfmul_d(v29, v19, v3); // f[2,3] * x[2] ++ fmul_d(v30, v20, v3); // f[4] * x[2] ++ vfmadd_d(v28, v26, v7, v28); ++ vfmadd_d(v29, v27, v7, v29); ++ fmadd_d(v30, v7, vt, v30); // v30 += f[5] * x[1] ++ vfmadd_d(v28, v19, v6, v28); ++ vfmadd_d(v29, v20, v6, v29); ++ } ++ fmadd_d(v30, v6, v21, v30); // v30 += f[6] * x[0] ++ b(Q_DONE); ++ bind(JX_IS_0); ++ if (UseLASX) { ++ xvfmul_d(v28, v18, v6); // f[0,1] * x[0] ++ fmul_d(v30, v19, v6); // f[4] * x[0] ++ } else { ++ vfmul_d(v28, v18, v6); // f[0,1] * x[0] ++ vfmul_d(v29, v19, v6); // f[2,3] * x[0] ++ fmul_d(v30, v20, v6); // f[4] * x[0] ++ } ++ bind(Q_DONE); ++ if (UseLASX) { ++ xvst(v28, qBase, 0); // save calculated q[0]...q[jk] ++ } else { ++ vst(v28, qBase, 0); // save calculated q[0]...q[jk] ++ vst(v29, qBase, 16); ++ } ++ fst_d(v30, qBase, 32); ++ } ++ li(i, 0x3E70000000000000); ++ li(jz, 4); ++ movgr2fr_d(v17, i); // v17 = twon24 ++ movgr2fr_d(v30, tmp5); // 2^q0 ++ vldi(v21, -960); // 0.125 (0x3fc0000000000000) ++ vldi(v20, -992); // 8.0 (0x4020000000000000) ++ movgr2fr_d(v22, tmp4); // 2^-q0 ++ ++ block_comment("recompute loop"); { ++ bind(RECOMPUTE); ++ // for(i=0,j=jz,z=q[jz];j>0;i++,j--) { ++ // fw = (double)((int)(twon24* z)); ++ // iq[i] = (int)(z-two24A*fw); ++ // z = q[j-1]+fw; ++ // } ++ block_comment("distill q[] into iq[] reversingly"); { ++ xorr(i, i, i); ++ move(j, jz); ++ alsl_d(tmp2, jz, qBase, 3 - 1); // q[jz] address ++ fld_d(v18, tmp2, 0); // z = q[j] and moving address to q[j-1] ++ addi_d(tmp2, tmp2, -8); ++ bind(RECOMP_FIRST_FOR); ++ fld_d(v27, tmp2, 0); ++ addi_d(tmp2, tmp2, -8); ++ fmul_d(v29, v17, v18); // twon24*z ++ vfrintrz_d(v29, v29); // (double)(int) ++ fnmsub_d(v28, v24, v29, v18); // v28 = z-two24A*fw ++ ftintrz_w_d(vt, v28); // (int)(z-two24A*fw) ++ alsl_d(SCR2, i, iqBase, 2 - 1); ++ fst_s(vt, SCR2, 0); ++ fadd_d(v18, v27, v29); ++ addi_w(i, i, 1); ++ addi_w(j, j, -1); ++ blt(R0, j, RECOMP_FIRST_FOR); ++ } ++ // compute n ++ fmul_d(v18, v18, v30); ++ fmul_d(v2, v18, v21); ++ vfrintrm_d(v2, v2); // v2 = floor(v2) == rounding towards -inf ++ fnmsub_d(v18, v2, v20, v18); // z -= 8.0*floor(z*0.125); ++ li(ih, 2); ++ vfrintrz_d(v2, v18); // v2 = (double)((int)z) ++ ftintrz_w_d(vt, v18); // n = (int) z; ++ movfr2gr_s(n, vt); ++ fsub_d(v18, v18, v2); // z -= (double)n; ++ ++ block_comment("q0-dependent initialization"); { ++ blt(SCR1, R0, Q0_ZERO_CMP_LT); // if (q0 > 0) ++ addi_w(j, jz, -1); // j = jz - 1 ++ alsl_d(SCR2, j, iqBase, 2 - 1); ++ ld_w(tmp2, SCR2, 0); // tmp2 = iq[jz-1] ++ beq(SCR1, R0, Q0_ZERO_CMP_EQ); ++ li(tmp4, 24); ++ sub_w(tmp4, tmp4, SCR1); // == 24 - q0 ++ srl_w(i, tmp2, tmp4); // i = iq[jz-1] >> (24-q0) ++ sll_w(tmp5, i, tmp4); ++ sub_w(tmp2, tmp2, tmp5); // iq[jz-1] -= i<<(24-q0); ++ alsl_d(SCR2, j, iqBase, 2 - 1); ++ st_w(tmp2, SCR2, 0); // store iq[jz-1] ++ addi_w(SCR2, tmp4, -1); // == 23 - q0 ++ add_w(n, n, i); // n+=i ++ srl_w(ih, tmp2, SCR2); // ih = iq[jz-1] >> (23-q0) ++ b(Q0_ZERO_CMP_DONE); ++ bind(Q0_ZERO_CMP_EQ); ++ srli_d(ih, tmp2, 23); // ih = iq[z-1] >> 23 ++ b(Q0_ZERO_CMP_DONE); ++ bind(Q0_ZERO_CMP_LT); ++ vldi(v4, -928); // 0.5 (0x3fe0000000000000) ++ fcmp_clt_d(FCC0, v18, v4); ++ movcf2gr(SCR2, FCC0); ++ masknez(ih, ih, SCR2); // if (z<0.5) ih = 0 ++ } ++ bind(Q0_ZERO_CMP_DONE); ++ bge(R0, ih, IH_HANDLED); ++ ++ block_comment("if(ih>) {"); { ++ // use rscratch2 as carry ++ ++ block_comment("for(i=0;i0) {"); { ++ bge(R0, SCR1, IH_AFTER_SWITCH); ++ // tmp3 still has iq[jz-1] value. no need to reload ++ // now, zero high tmp3 bits (rscratch1 number of bits) ++ li(j, 0xffffffff); ++ addi_w(i, jz, -1); // set i to jz-1 ++ srl_d(j, j, SCR1); ++ srli_w(tmp1, j, 8); ++ andr(tmp3, tmp3, tmp1); // we have 24-bit-based constants ++ alsl_d(tmp1, i, iqBase, 2 - 1); ++ st_w(tmp3, tmp1, 0); // save iq[jz-1] ++ } ++ bind(IH_AFTER_SWITCH); ++ li(tmp1, 2); ++ bne(ih, tmp1, IH_HANDLED); ++ ++ block_comment("if(ih==2) {"); { ++ vldi(v25, -912); // 1.0 (0x3ff0000000000000) ++ fsub_d(v18, v25, v18); // z = one - z; ++ beqz(SCR2, IH_HANDLED); ++ fsub_d(v18, v18, v30); // z -= scalbnA(one,q0); ++ } ++ } ++ bind(IH_HANDLED); ++ // check if recomputation is needed ++ vxor_v(vt, vt, vt); ++ fcmp_cne_d(FCC0, v18, vt); ++ bcnez(FCC0, RECOMP_CHECK_DONE_NOT_ZERO); ++ ++ block_comment("if(z==zeroB) {"); { ++ ++ block_comment("for (i=jz-1;i>=jk;i--) j |= iq[i];"); { ++ addi_w(i, jz, -1); ++ xorr(j, j, j); ++ b(RECOMP_FOR1_CHECK); ++ bind(RECOMP_FOR1); ++ alsl_d(tmp1, i, iqBase, 2 - 1); ++ ld_w(tmp1, tmp1, 0); ++ orr(j, j, tmp1); ++ addi_w(i, i, -1); ++ bind(RECOMP_FOR1_CHECK); ++ li(SCR2, 4); ++ bge(i, SCR2, RECOMP_FOR1); ++ } ++ bnez(j, RECOMP_CHECK_DONE); ++ ++ block_comment("if(j==0) {"); { ++ // for(k=1;iq[jk-k]==0;k++); // let's unroll it. jk == 4. So, read ++ // iq[3], iq[2], iq[1], iq[0] until non-zero value ++ ld_d(tmp1, iqBase, 0); // iq[0..3] ++ ld_d(tmp3, iqBase, 8); ++ li(j, 2); ++ masknez(tmp1, tmp1, tmp3); // set register for further consideration ++ orr(tmp1, tmp1, tmp3); ++ masknez(j, j, tmp3); // set initial k. Use j as k ++ srli_d(SCR2, tmp1, 32); ++ sltu(SCR2, R0, SCR2); ++ addi_w(i, jz, 1); ++ add_w(j, j, SCR2); ++ ++ block_comment("for(i=jz+1;i<=jz+k;i++) {...}"); { ++ add_w(jz, i, j); // i = jz+1, j = k-1. j+i = jz+k (which is a new jz) ++ bind(RECOMP_FOR2); ++ add_w(tmp1, jv, i); ++ alsl_d(SCR2, tmp1, twoOverPiBase, 3 - 1); ++ fld_d(v29, SCR2, 0); ++ add_w(tmp2, jx, i); ++ alsl_d(SCR2, tmp2, SP, 3 - 1); ++ fst_d(v29, SCR2, 0); ++ // f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i]; ++ // since jx = 0, 1 or 2 we can unroll it: ++ // for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; ++ // f[jx+i-j] == (for first iteration) f[jx+i], which is already v29 ++ alsl_d(tmp2, tmp2, SP, 3 - 1); // address of f[jx+i] ++ fld_d(v4, tmp2, -16); // load f[jx+i-2] and f[jx+i-1] ++ fld_d(v5, tmp2, -8); ++ fmul_d(v26, v6, v29); // initial fw ++ beqz(jx, RECOMP_FW_UPDATED); ++ fmadd_d(v26, v7, v5, v26); ++ li(SCR2, 1); ++ beq(jx, SCR2, RECOMP_FW_UPDATED); ++ fmadd_d(v26, v3, v4, v26); ++ bind(RECOMP_FW_UPDATED); ++ alsl_d(SCR2, i, qBase, 3 - 1); ++ fst_d(v26, SCR2, 0); // q[i] = fw; ++ addi_w(i, i, 1); ++ bge(jz, i, RECOMP_FOR2); // jz here is "old jz" + k ++ } ++ b(RECOMPUTE); ++ } ++ } ++ } ++ bind(RECOMP_CHECK_DONE); ++ // chop off zero terms ++ vxor_v(vt, vt, vt); ++ fcmp_ceq_d(FCC0, v18, vt); ++ bcnez(FCC0, Z_IS_ZERO); ++ ++ block_comment("else block of if(z==0.0) {"); { ++ bind(RECOMP_CHECK_DONE_NOT_ZERO); ++ fmul_d(v18, v18, v22); ++ fcmp_clt_d(FCC0, v18, v24); // v24 is stil two24A ++ bcnez(FCC0, Z_IS_LESS_THAN_TWO24B); ++ fmul_d(v1, v18, v17); // twon24*z ++ vfrintrz_d(v1, v1); // v1 = (double)(int)(v1) ++ fnmsub_d(v2, v24, v1, v18); ++ ftintrz_w_d(vt, v1); // (int)fw ++ movfr2gr_s(tmp3, vt); ++ ftintrz_w_d(vt, v2); // double to int ++ movfr2gr_s(tmp2, vt); ++ alsl_d(SCR2, jz, iqBase, 2 - 1); ++ st_w(tmp2, SCR2, 0); ++ addi_w(SCR1, SCR1, 24); ++ addi_w(jz, jz, 1); ++ st_w(tmp3, SCR2, 0); // iq[jz] = (int) fw ++ b(Z_ZERO_CHECK_DONE); ++ bind(Z_IS_LESS_THAN_TWO24B); ++ ftintrz_w_d(vt, v18); // (int)z ++ movfr2gr_s(tmp3, vt); ++ alsl_d(SCR2, jz, iqBase, 2 - 1); ++ st_w(tmp3, SCR2, 0); // iq[jz] = (int) z ++ b(Z_ZERO_CHECK_DONE); ++ } ++ ++ block_comment("if(z==0.0) {"); { ++ bind(Z_IS_ZERO); ++ addi_w(jz, jz, -1); ++ alsl_d(SCR2, jz, iqBase, 2 - 1); ++ ld_w(tmp1, SCR2, 0); ++ addi_w(SCR1, SCR1, -24); ++ beqz(tmp1, Z_IS_ZERO); ++ } ++ bind(Z_ZERO_CHECK_DONE); ++ // convert integer "bit" chunk to floating-point value ++ // v17 = twon24 ++ // update v30, which was scalbnA(1.0, ); ++ addi_w(tmp2, SCR1, 1023); // biased exponent ++ slli_d(tmp2, tmp2, 52); // put at correct position ++ move(i, jz); ++ movgr2fr_d(v30, tmp2); ++ ++ block_comment("for(i=jz;i>=0;i--) {q[i] = fw*(double)iq[i]; fw*=twon24;}"); { ++ bind(CONVERTION_FOR); ++ alsl_d(SCR2, i, iqBase, 2 - 1); ++ fld_s(v31, SCR2, 0); ++ vffintl_d_w(v31, v31); ++ fmul_d(v31, v31, v30); ++ alsl_d(SCR2, i, qBase, 3 - 1); ++ fst_d(v31, SCR2, 0); ++ fmul_d(v30, v30, v17); ++ addi_w(i, i, -1); ++ bge(i, R0, CONVERTION_FOR); ++ } ++ addi_d(SCR2, SP, 160); // base for fq ++ // reusing twoOverPiBase ++ li(twoOverPiBase, pio2); ++ ++ block_comment("compute PIo2[0,...,jp]*q[jz,...,0]. for(i=jz;i>=0;i--) {...}"); { ++ move(i, jz); ++ move(tmp2, R0); // tmp2 will keep jz - i == 0 at start ++ bind(COMP_FOR); ++ // for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; ++ vxor_v(v30, v30, v30); ++ alsl_d(tmp5, i, qBase, 3 - 1); // address of q[i+k] for k==0 ++ li(tmp3, 4); ++ slti(tmp4, tmp2, 5); ++ alsl_d(tmp1, i, qBase, 3 - 1); // used as q[i] address ++ masknez(tmp3, tmp3, tmp4); // min(jz - i, jp); ++ maskeqz(tmp4, tmp2, tmp4); ++ orr(tmp3, tmp3, tmp4); ++ move(tmp4, R0); // used as k ++ ++ block_comment("for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];"); { ++ bind(COMP_INNER_LOOP); ++ alsl_d(tmp5, tmp4, tmp1, 3 - 1); ++ fld_d(v18, tmp5, 0); // q[i+k] ++ alsl_d(tmp5, tmp4, twoOverPiBase, 3 - 1); ++ fld_d(v19, tmp5, 0); // PIo2[k] ++ fmadd_d(v30, v18, v19, v30); // fw += PIo2[k]*q[i+k]; ++ addi_w(tmp4, tmp4, 1); // k++ ++ bge(tmp3, tmp4, COMP_INNER_LOOP); ++ } ++ alsl_d(tmp5, tmp2, SCR2, 3 - 1); ++ fst_d(v30, tmp5, 0); // fq[jz-i] ++ addi_d(tmp2, tmp2, 1); ++ addi_w(i, i, -1); ++ bge(i, R0, COMP_FOR); ++ } ++ ++ block_comment("switch(prec) {...}. case 2:"); { ++ // compress fq into y[] ++ // remember prec == 2 ++ ++ block_comment("for (i=jz;i>=0;i--) fw += fq[i];"); { ++ vxor_v(v4, v4, v4); ++ move(i, jz); ++ bind(FW_FOR1); ++ alsl_d(tmp5, i, SCR2, 3 - 1); ++ fld_d(v1, tmp5, 0); ++ addi_w(i, i, -1); ++ fadd_d(v4, v4, v1); ++ bge(i, R0, FW_FOR1); ++ } ++ bind(FW_FOR1_DONE); ++ // v1 contains fq[0]. so, keep it so far ++ fsub_d(v5, v1, v4); // fw = fq[0] - fw ++ beqz(ih, FW_Y0_NO_NEGATION); ++ fneg_d(v4, v4); ++ bind(FW_Y0_NO_NEGATION); ++ ++ block_comment("for (i=1;i<=jz;i++) fw += fq[i];"); { ++ li(i, 1); ++ blt(jz, i, FW_FOR2_DONE); ++ bind(FW_FOR2); ++ alsl_d(tmp5, i, SCR2, 3 - 1); ++ fld_d(v1, tmp5, 0); ++ addi_w(i, i, 1); ++ fadd_d(v5, v5, v1); ++ bge(jz, i, FW_FOR2); ++ } ++ bind(FW_FOR2_DONE); ++ beqz(ih, FW_Y1_NO_NEGATION); ++ fneg_d(v5, v5); ++ bind(FW_Y1_NO_NEGATION); ++ addi_d(SP, SP, 560); ++ } ++} ++ ++///* __kernel_sin( x, y, iy) ++// * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854 ++// * Input x is assumed to be bounded by ~pi/4 in magnitude. ++// * Input y is the tail of x. ++// * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). ++// * ++// * Algorithm ++// * 1. Since sin(-x) = -sin(x), we need only to consider positive x. ++// * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0. ++// * 3. sin(x) is approximated by a polynomial of degree 13 on ++// * [0,pi/4] ++// * 3 13 ++// * sin(x) ~ x + S1*x + ... + S6*x ++// * where ++// * ++// * |sin(x) 2 4 6 8 10 12 | -58 ++// * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 ++// * | x | ++// * ++// * 4. sin(x+y) = sin(x) + sin'(x')*y ++// * ~ sin(x) + (1-x*x/2)*y ++// * For better accuracy, let ++// * 3 2 2 2 2 ++// * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) ++// * then 3 2 ++// * sin(x) = x + (S1*x + (x *(r-y/2)+y)) ++// */ ++//static const double ++//S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ ++//S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ ++//S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ ++//S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ ++//S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ ++//S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ ++// ++// NOTE: S1..S6 were moved into a table: StubRoutines::la::_dsin_coef ++// ++// BEGIN __kernel_sin PSEUDO CODE ++// ++//static double __kernel_sin(double x, double y, bool iy) ++//{ ++// double z,r,v; ++// ++// // NOTE: not needed. moved to dsin/dcos ++// //int ix; ++// //ix = high(x)&0x7fffffff; /* high word of x */ ++// ++// // NOTE: moved to dsin/dcos ++// //if(ix<0x3e400000) /* |x| < 2**-27 */ ++// // {if((int)x==0) return x;} /* generate inexact */ ++// ++// z = x*x; ++// v = z*x; ++// r = S2+z*(S3+z*(S4+z*(S5+z*S6))); ++// if(iy==0) return x+v*(S1+z*r); ++// else return x-((z*(half*y-v*r)-y)-v*S1); ++//} ++// ++// END __kernel_sin PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos ++// 2. Constants are now loaded from table dsin_coef ++// 3. C code parameter "int iy" was modified to "bool iyIsOne", because ++// iy is always 0 or 1. Also, iyIsOne branch was moved into ++// generation phase instead of taking it during code execution ++// Input ans output: ++// 1. Input for generated function: X argument = x ++// 2. Input for generator: x = register to read argument from, iyIsOne ++// = flag to use low argument low part or not, dsin_coef = coefficients ++// table address ++// 3. Return sin(x) value in FA0 ++void MacroAssembler::generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef) { ++ FloatRegister y = FA5, z = FA6, v = FA7, r = FT0, s1 = FT1, s2 = FT2, ++ s3 = FT3, s4 = FT4, s5 = FT5, s6 = FT6, half = FT7; ++ li(SCR2, dsin_coef); ++ fld_d(s5, SCR2, 32); ++ fld_d(s6, SCR2, 40); ++ fmul_d(z, x, x); // z = x*x; ++ fld_d(s1, SCR2, 0); ++ fld_d(s2, SCR2, 8); ++ fld_d(s3, SCR2, 16); ++ fld_d(s4, SCR2, 24); ++ fmul_d(v, z, x); // v = z*x; ++ ++ block_comment("calculate r = S2+z*(S3+z*(S4+z*(S5+z*S6)))"); { ++ fmadd_d(r, z, s6, s5); ++ // initialize "half" in current block to utilize 2nd FPU. However, it's ++ // not a part of this block ++ vldi(half, -928); // 0.5 (0x3fe0000000000000) ++ fmadd_d(r, z, r, s4); ++ fmadd_d(r, z, r, s3); ++ fmadd_d(r, z, r, s2); ++ } ++ ++ if (!iyIsOne) { ++ // return x+v*(S1+z*r); ++ fmadd_d(s1, z, r, s1); ++ fmadd_d(FA0, v, s1, x); ++ } else { ++ // return x-((z*(half*y-v*r)-y)-v*S1); ++ fmul_d(s6, half, y); // half*y ++ fnmsub_d(s6, v, r, s6); // half*y-v*r ++ fnmsub_d(s6, z, s6, y); // y - z*(half*y-v*r) = - (z*(half*y-v*r)-y) ++ fmadd_d(s6, v, s1, s6); // - (z*(half*y-v*r)-y) + v*S1 == -((z*(half*y-v*r)-y)-v*S1) ++ fadd_d(FA0, x, s6); ++ } ++} ++ ++///* ++// * __kernel_cos( x, y ) ++// * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 ++// * Input x is assumed to be bounded by ~pi/4 in magnitude. ++// * Input y is the tail of x. ++// * ++// * Algorithm ++// * 1. Since cos(-x) = cos(x), we need only to consider positive x. ++// * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. ++// * 3. cos(x) is approximated by a polynomial of degree 14 on ++// * [0,pi/4] ++// * 4 14 ++// * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x ++// * where the remez error is ++// * ++// * | 2 4 6 8 10 12 14 | -58 ++// * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 ++// * | | ++// * ++// * 4 6 8 10 12 14 ++// * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then ++// * cos(x) = 1 - x*x/2 + r ++// * since cos(x+y) ~ cos(x) - sin(x)*y ++// * ~ cos(x) - x*y, ++// * a correction term is necessary in cos(x) and hence ++// * cos(x+y) = 1 - (x*x/2 - (r - x*y)) ++// * For better accuracy when x > 0.3, let qx = |x|/4 with ++// * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125. ++// * Then ++// * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)). ++// * Note that 1-qx and (x*x/2-qx) is EXACT here, and the ++// * magnitude of the latter is at least a quarter of x*x/2, ++// * thus, reducing the rounding error in the subtraction. ++// */ ++// ++//static const double ++//C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ ++//C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ ++//C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ ++//C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ ++//C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ ++//C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ ++// ++// NOTE: C1..C6 were moved into a table: StubRoutines::la::_dcos_coef ++// ++// BEGIN __kernel_cos PSEUDO CODE ++// ++//static double __kernel_cos(double x, double y) ++//{ ++// double a,h,z,r,qx=0; ++// ++// // NOTE: ix is already initialized in dsin/dcos. Reuse value from register ++// //int ix; ++// //ix = high(x)&0x7fffffff; /* ix = |x|'s high word*/ ++// ++// // NOTE: moved to dsin/dcos ++// //if(ix<0x3e400000) { /* if x < 2**27 */ ++// // if(((int)x)==0) return one; /* generate inexact */ ++// //} ++// ++// z = x*x; ++// r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6))))); ++// if(ix < 0x3FD33333) /* if |x| < 0.3 */ ++// return one - (0.5*z - (z*r - x*y)); ++// else { ++// if(ix > 0x3fe90000) { /* x > 0.78125 */ ++// qx = 0.28125; ++// } else { ++// set_high(&qx, ix-0x00200000); /* x/4 */ ++// set_low(&qx, 0); ++// } ++// h = 0.5*z-qx; ++// a = one-qx; ++// return a - (h - (z*r-x*y)); ++// } ++//} ++// ++// END __kernel_cos PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos ++// 2. Constants are now loaded from table dcos_coef ++// Input and output: ++// 1. Input for generated function: X argument = x ++// 2. Input for generator: x = register to read argument from, dcos_coef ++// = coefficients table address ++// 3. Return cos(x) value in FA0 ++void MacroAssembler::generate_kernel_cos(FloatRegister x, address dcos_coef) { ++ Register ix = A3; ++ FloatRegister qx = FA1, h = FA2, a = FA3, y = FA5, z = FA6, r = FA7, C1 = FT0, ++ C2 = FT1, C3 = FT2, C4 = FT3, C5 = FT4, C6 = FT5, one = FT6, half = FT7; ++ Label IX_IS_LARGE, SET_QX_CONST, DONE, QX_SET; ++ li(SCR2, dcos_coef); ++ fld_d(C1, SCR2, 0); ++ fld_d(C2, SCR2, 8); ++ fld_d(C3, SCR2, 16); ++ fld_d(C4, SCR2, 24); ++ fld_d(C5, SCR2, 32); ++ fld_d(C6, SCR2, 40); ++ fmul_d(z, x, x); // z=x^2 ++ block_comment("calculate r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))))"); { ++ fmadd_d(r, z, C6, C5); ++ vldi(half, -928); // 0.5 (0x3fe0000000000000) ++ fmadd_d(r, z, r, C4); ++ fmul_d(y, x, y); ++ fmadd_d(r, z, r, C3); ++ li(SCR1, 0x3FD33333); ++ fmadd_d(r, z, r, C2); ++ fmul_d(x, z, z); // x = z^2 ++ fmadd_d(r, z, r, C1); // r = C1+z(C2+z(C4+z(C5+z*C6))) ++ } ++ // need to multiply r by z to have "final" r value ++ vldi(one, -912); // 1.0 (0x3ff0000000000000) ++ bge(ix, SCR1, IX_IS_LARGE); ++ block_comment("if(ix < 0x3FD33333) return one - (0.5*z - (z*r - x*y))"); { ++ // return 1.0 - (0.5*z - (z*r - x*y)) = 1.0 - (0.5*z + (x*y - z*r)) ++ fnmsub_d(FA0, x, r, y); ++ fmadd_d(FA0, half, z, FA0); ++ fsub_d(FA0, one, FA0); ++ b(DONE); ++ } ++ block_comment("if(ix >= 0x3FD33333)"); { ++ bind(IX_IS_LARGE); ++ li(SCR2, 0x3FE90000); ++ blt(SCR2, ix, SET_QX_CONST); ++ block_comment("set_high(&qx, ix-0x00200000); set_low(&qx, 0);"); { ++ li(SCR2, 0x00200000); ++ sub_w(SCR2, ix, SCR2); ++ slli_d(SCR2, SCR2, 32); ++ movgr2fr_d(qx, SCR2); ++ } ++ b(QX_SET); ++ bind(SET_QX_CONST); ++ block_comment("if(ix > 0x3fe90000) qx = 0.28125;"); { ++ vldi(qx, -942); // 0.28125 (0x3fd2000000000000) ++ } ++ bind(QX_SET); ++ fmsub_d(C6, x, r, y); // z*r - xy ++ fmsub_d(h, half, z, qx); // h = 0.5*z - qx ++ fsub_d(a, one, qx); // a = 1-qx ++ fsub_d(C6, h, C6); // = h - (z*r - x*y) ++ fsub_d(FA0, a, C6); ++ } ++ bind(DONE); ++} ++ ++// generate_dsin_dcos creates stub for dsin and dcos ++// Generation is done via single call because dsin and dcos code is almost the ++// same(see C code below). These functions work as follows: ++// 1) handle corner cases: |x| ~< pi/4, x is NaN or INF, |x| < 2**-27 ++// 2) perform argument reduction if required ++// 3) call kernel_sin or kernel_cos which approximate sin/cos via polynomial ++// ++// BEGIN dsin/dcos PSEUDO CODE ++// ++//dsin_dcos(jdouble x, bool isCos) { ++// double y[2],z=0.0; ++// int n, ix; ++// ++// /* High word of x. */ ++// ix = high(x); ++// ++// /* |x| ~< pi/4 */ ++// ix &= 0x7fffffff; ++// if(ix <= 0x3fe921fb) return isCos ? __kernel_cos : __kernel_sin(x,z,0); ++// ++// /* sin/cos(Inf or NaN) is NaN */ ++// else if (ix>=0x7ff00000) return x-x; ++// else if (ix<0x3e400000) { /* if ix < 2**27 */ ++// if(((int)x)==0) return isCos ? one : x; /* generate inexact */ ++// } ++// /* argument reduction needed */ ++// else { ++// n = __ieee754_rem_pio2(x,y); ++// switch(n&3) { ++// case 0: return isCos ? __kernel_cos(y[0],y[1]) : __kernel_sin(y[0],y[1], true); ++// case 1: return isCos ? -__kernel_sin(y[0],y[1],true) : __kernel_cos(y[0],y[1]); ++// case 2: return isCos ? -__kernel_cos(y[0],y[1]) : -__kernel_sin(y[0],y[1], true); ++// default: ++// return isCos ? __kernel_sin(y[0],y[1],1) : -__kernel_cos(y[0],y[1]); ++// } ++// } ++//} ++// END dsin/dcos PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. Moved ix < 2**27 from kernel_sin/kernel_cos into dsin/dcos ++// 2. Final switch use equivalent bit checks(tbz/tbnz) ++// Input ans output: ++// 1. Input for generated function: X = A0 ++// 2. Input for generator: isCos = generate sin or cos, npio2_hw = address ++// of npio2_hw table, two_over_pi = address of two_over_pi table, ++// pio2 = address if pio2 table, dsin_coef = address if dsin_coef table, ++// dcos_coef = address of dcos_coef table ++// 3. Return result in FA0 ++// NOTE: general purpose register names match local variable names in C code ++void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw, ++ address two_over_pi, address pio2, ++ address dsin_coef, address dcos_coef) { ++ Label DONE, ARG_REDUCTION, TINY_X, RETURN_SIN, EARLY_CASE; ++ Register X = A0, absX = A1, n = A2, ix = A3; ++ FloatRegister y0 = FA4, y1 = FA5; ++ ++ block_comment("check |x| ~< pi/4, NaN, Inf and |x| < 2**-27 cases"); { ++ movfr2gr_d(X, FA0); ++ li(SCR2, 0x3e400000); ++ li(SCR1, 0x3fe921fb); // high word of pi/4. ++ bstrpick_d(absX, X, 62, 0); // absX ++ li(T0, 0x7ff0000000000000); ++ srli_d(ix, absX, 32); // set ix ++ blt(ix, SCR2, TINY_X); // handle tiny x (|x| < 2^-27) ++ bge(SCR1, ix, EARLY_CASE); // if(ix <= 0x3fe921fb) return ++ blt(absX, T0, ARG_REDUCTION); ++ // X is NaN or INF(i.e. 0x7FF* or 0xFFF*). Return NaN (mantissa != 0). ++ // Set last bit unconditionally to make it NaN ++ ori(T0, T0, 1); ++ movgr2fr_d(FA0, T0); ++ jr(RA); ++ } ++ block_comment("kernel_sin/kernel_cos: if(ix<0x3e400000) {}"); { ++ bind(TINY_X); ++ if (isCos) { ++ vldi(FA0, -912); // 1.0 (0x3ff0000000000000) ++ } ++ jr(RA); ++ } ++ bind(ARG_REDUCTION); /* argument reduction needed */ ++ block_comment("n = __ieee754_rem_pio2(x,y);"); { ++ generate__ieee754_rem_pio2(npio2_hw, two_over_pi, pio2); ++ } ++ block_comment("switch(n&3) {case ... }"); { ++ if (isCos) { ++ srli_w(T0, n, 1); ++ xorr(absX, n, T0); ++ andi(T0, n, 1); ++ bnez(T0, RETURN_SIN); ++ } else { ++ andi(T0, n, 1); ++ beqz(T0, RETURN_SIN); ++ } ++ generate_kernel_cos(y0, dcos_coef); ++ if (isCos) { ++ andi(T0, absX, 1); ++ beqz(T0, DONE); ++ } else { ++ andi(T0, n, 2); ++ beqz(T0, DONE); ++ } ++ fneg_d(FA0, FA0); ++ jr(RA); ++ bind(RETURN_SIN); ++ generate_kernel_sin(y0, true, dsin_coef); ++ if (isCos) { ++ andi(T0, absX, 1); ++ beqz(T0, DONE); ++ } else { ++ andi(T0, n, 2); ++ beqz(T0, DONE); ++ } ++ fneg_d(FA0, FA0); ++ jr(RA); ++ } ++ bind(EARLY_CASE); ++ vxor_v(y1, y1, y1); ++ if (isCos) { ++ generate_kernel_cos(FA0, dcos_coef); ++ } else { ++ generate_kernel_sin(FA0, false, dsin_coef); ++ } ++ bind(DONE); ++ jr(RA); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,564 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "classfile/javaClasses.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "utilities/preserveException.hpp" ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T8 RT8 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) // nothing ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#define STOP(error) block_comment(error); __ stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { ++ if (VerifyMethodHandles) ++ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), ++ "MH argument is a Class"); ++ __ ld_d(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); ++} ++ ++#ifdef ASSERT ++static int check_nonzero(const char* xname, int x) { ++ assert(x != 0, "%s should be nonzero", xname); ++ return x; ++} ++#define NONZERO(x) check_nonzero(#x, x) ++#else //ASSERT ++#define NONZERO(x) (x) ++#endif //ASSERT ++ ++#ifdef ASSERT ++void MethodHandles::verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message) { ++} ++ ++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { ++ Label L; ++ BLOCK_COMMENT("verify_ref_kind {"); ++ __ ld_w(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes()))); ++ __ srai_w(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); ++ __ li(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); ++ __ andr(temp, temp, AT); ++ __ li(AT, ref_kind); ++ __ beq(temp, AT, L); ++ { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); ++ jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); ++ if (ref_kind == JVM_REF_invokeVirtual || ++ ref_kind == JVM_REF_invokeSpecial) ++ // could do this for all ref_kinds, but would explode assembly code size ++ trace_method_handle(_masm, buf); ++ __ STOP(buf); ++ } ++ BLOCK_COMMENT("} verify_ref_kind"); ++ __ bind(L); ++} ++ ++#endif //ASSERT ++ ++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry) { ++ assert(method == Rmethod, "interpreter calling convention"); ++ ++ Label L_no_such_method; ++ __ beq(method, R0, L_no_such_method); ++ ++ __ verify_method_ptr(method); ++ ++ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++ Register rthread = TREG; ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ __ ld_bu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset())); ++ __ beq(AT, R0, run_compiled_code); ++ __ ld_d(T4, method, in_bytes(Method::interpreter_entry_offset())); ++ __ jr(T4); ++ __ BIND(run_compiled_code); ++ } ++ ++ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : ++ Method::from_interpreted_offset(); ++ __ ld_d(T4, method, in_bytes(entry_offset)); ++ __ jr(T4); ++ ++ __ bind(L_no_such_method); ++ address wrong_method = StubRoutines::throw_AbstractMethodError_entry(); ++ __ jmp(wrong_method, relocInfo::runtime_call_type); ++} ++ ++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry) { ++ BLOCK_COMMENT("jump_to_lambda_form {"); ++ // This is the initial entry point of a lazy method handle. ++ // After type checking, it picks up the invoker from the LambdaForm. ++ assert_different_registers(recv, method_temp, temp2); ++ assert(recv != noreg, "required register"); ++ assert(method_temp == Rmethod, "required register for loading method"); ++ ++ //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); ++ ++ // Load the invoker, as MH -> MH.form -> LF.vmentry ++ __ verify_oop(recv); ++ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()))); ++ __ verify_oop(method_temp); ++ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg); ++ ++ if (VerifyMethodHandles && !for_compiler_entry) { ++ // make sure recv is already on stack ++ __ ld_d(temp2, Address(method_temp, Method::const_offset())); ++ __ load_sized_value(temp2, ++ Address(temp2, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ Label L; ++ Address recv_addr = __ argument_address(temp2, -1); ++ __ ld_d(AT, recv_addr); ++ __ beq(recv, AT, L); ++ ++ recv_addr = __ argument_address(temp2, -1); ++ __ ld_d(V0, recv_addr); ++ __ STOP("receiver not on stack"); ++ __ BIND(L); ++ } ++ ++ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); ++ BLOCK_COMMENT("} jump_to_lambda_form"); ++} ++ ++ ++// Code generation ++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, ++ vmIntrinsics::ID iid) { ++ const bool not_for_compiler_entry = false; // this is the interpreter entry ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ if (iid == vmIntrinsics::_invokeGeneric || ++ iid == vmIntrinsics::_compiledLambdaForm) { ++ // Perhaps surprisingly, the symbolic references visible to Java are not directly used. ++ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. ++ // They all allow an appendix argument. ++ __ stop("empty stubs make SG sick"); ++ return NULL; ++ } ++ ++ // Rmethod: Method* ++ // T4: argument locator (parameter slot count, added to sp) ++ // S7: used as temp to hold mh or receiver ++ Register t4_argp = T4; // argument list ptr, live on error paths ++ Register s7_mh = S7; // MH receiver; dies quickly and is recycled ++ Register rm_method = Rmethod; // eventual target of this invocation ++ ++ // here's where control starts out: ++ __ align(CodeEntryAlignment); ++ address entry_point = __ pc(); ++ ++ if (VerifyMethodHandles) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ Label L; ++ BLOCK_COMMENT("verify_intrinsic_id {"); ++ __ ld_hu(AT, rm_method, Method::intrinsic_id_offset_in_bytes()); ++ guarantee(Assembler::is_simm(iid, 12), "Oops, iid is not simm12! Change the instructions."); ++ __ addi_d(AT, AT, -1 * (int) iid); ++ __ beq(AT, R0, L); ++ if (iid == vmIntrinsics::_linkToVirtual || ++ iid == vmIntrinsics::_linkToSpecial) { ++ // could do this for all kinds, but would explode assembly code size ++ trace_method_handle(_masm, "bad Method*::intrinsic_id"); ++ } ++ __ STOP("bad Method*::intrinsic_id"); ++ __ bind(L); ++ BLOCK_COMMENT("} verify_intrinsic_id"); ++ } ++ ++ // First task: Find out how big the argument list is. ++ Address t4_first_arg_addr; ++ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); ++ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); ++ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ __ ld_d(t4_argp, Address(rm_method, Method::const_offset())); ++ __ load_sized_value(t4_argp, ++ Address(t4_argp, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ t4_first_arg_addr = __ argument_address(t4_argp, -1); ++ } else { ++ DEBUG_ONLY(t4_argp = noreg); ++ } ++ ++ if (!is_signature_polymorphic_static(iid)) { ++ __ ld_d(s7_mh, t4_first_arg_addr); ++ DEBUG_ONLY(t4_argp = noreg); ++ } ++ ++ // t4_first_arg_addr is live! ++ ++ trace_method_handle_interpreter_entry(_masm, iid); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry); ++ ++ } else { ++ // Adjust argument list by popping the trailing MemberName argument. ++ Register r_recv = noreg; ++ if (MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. ++ __ ld_d(r_recv = T2, t4_first_arg_addr); ++ } ++ DEBUG_ONLY(t4_argp = noreg); ++ Register rm_member = rm_method; // MemberName ptr; incoming method ptr is dead now ++ __ pop(rm_member); // extract last argument ++ generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry); ++ } ++ ++ return entry_point; ++} ++ ++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, ++ vmIntrinsics::ID iid, ++ Register receiver_reg, ++ Register member_reg, ++ bool for_compiler_entry) { ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ Register rm_method = Rmethod; // eventual target of this invocation ++ // temps used in this code are not used in *either* compiled or interpreted calling sequences ++ Register j_rarg0 = T0; ++ Register j_rarg1 = A0; ++ Register j_rarg2 = A1; ++ Register j_rarg3 = A2; ++ Register j_rarg4 = A3; ++ Register j_rarg5 = A4; ++ ++ Register temp1 = T8; ++ Register temp2 = T4; ++ Register temp3 = T5; ++ if (for_compiler_entry) { ++ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); ++ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ } ++ else { ++ assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP ++ } ++ assert_different_registers(temp1, temp2, temp3, receiver_reg); ++ assert_different_registers(temp1, temp2, temp3, member_reg); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ // indirect through MH.form.vmentry.vmtarget ++ jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry); ++ ++ } else { ++ // The method is a member invoker used by direct method handles. ++ if (VerifyMethodHandles) { ++ // make sure the trailing argument really is a MemberName (caller responsibility) ++ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), ++ "MemberName required for invokeVirtual etc."); ++ } ++ ++ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); ++ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); ++ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())); ++ Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())); ++ ++ Register temp1_recv_klass = temp1; ++ if (iid != vmIntrinsics::_linkToStatic) { ++ __ verify_oop(receiver_reg); ++ if (iid == vmIntrinsics::_linkToSpecial) { ++ // Don't actually load the klass; just null-check the receiver. ++ __ null_check(receiver_reg); ++ } else { ++ // load receiver klass itself ++ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ BLOCK_COMMENT("check_receiver {"); ++ // The receiver for the MemberName must be in receiver_reg. ++ // Check the receiver against the MemberName.clazz ++ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { ++ // Did not load it above... ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { ++ Label L_ok; ++ Register temp2_defc = temp2; ++ __ load_heap_oop(temp2_defc, member_clazz, temp3); ++ load_klass_from_Class(_masm, temp2_defc); ++ __ verify_klass_ptr(temp2_defc); ++ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); ++ // If we get here, the type check failed! ++ __ STOP("receiver class disagrees with MemberName.clazz"); ++ __ bind(L_ok); ++ } ++ BLOCK_COMMENT("} check_receiver"); ++ } ++ if (iid == vmIntrinsics::_linkToSpecial || ++ iid == vmIntrinsics::_linkToStatic) { ++ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass ++ } ++ ++ // Live registers at this point: ++ // member_reg - MemberName that was the trailing argument ++ // temp1_recv_klass - klass of stacked receiver, if needed ++ ++ Label L_incompatible_class_change_error; ++ switch (iid) { ++ case vmIntrinsics::_linkToSpecial: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); ++ } ++ __ load_heap_oop(rm_method, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToStatic: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); ++ } ++ __ load_heap_oop(rm_method, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToVirtual: ++ { ++ // same as TemplateTable::invokevirtual, ++ // minus the CP setup and profiling: ++ ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); ++ } ++ ++ // pick out the vtable index from the MemberName, and then we can discard it: ++ Register temp2_index = temp2; ++ __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L_index_ok; ++ __ blt(R0, temp2_index, L_index_ok); ++ __ STOP("no virtual index"); ++ __ BIND(L_index_ok); ++ } ++ ++ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget ++ // at this point. And VerifyMethodHandles has already checked clazz, if needed. ++ ++ // get target Method* & entry point ++ __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method); ++ break; ++ } ++ ++ case vmIntrinsics::_linkToInterface: ++ { ++ // same as TemplateTable::invokeinterface ++ // (minus the CP setup and profiling, with different argument motion) ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); ++ } ++ ++ Register temp3_intf = temp3; ++ __ load_heap_oop(temp3_intf, member_clazz); ++ load_klass_from_Class(_masm, temp3_intf); ++ __ verify_klass_ptr(temp3_intf); ++ ++ Register rm_index = rm_method; ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L; ++ __ bge(rm_index, R0, L); ++ __ STOP("invalid vtable index for MH.invokeInterface"); ++ __ bind(L); ++ } ++ ++ // given intf, index, and recv klass, dispatch to the implementation method ++ __ lookup_interface_method(temp1_recv_klass, temp3_intf, ++ // note: next two args must be the same: ++ rm_index, rm_method, ++ temp2, ++ L_incompatible_class_change_error); ++ break; ++ } ++ ++ default: ++ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); ++ break; ++ } ++ ++ // Live at this point: ++ // rm_method ++ ++ // After figuring out which concrete method to call, jump into it. ++ // Note that this works in the interpreter with no data motion. ++ // But the compiled version will require that r_recv be shifted out. ++ __ verify_method_ptr(rm_method); ++ jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry); ++ ++ if (iid == vmIntrinsics::_linkToInterface) { ++ __ bind(L_incompatible_class_change_error); ++ address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry(); ++ __ jmp(icce_entry, relocInfo::runtime_call_type); ++ } ++ } ++} ++ ++#ifndef PRODUCT ++void trace_method_handle_stub(const char* adaptername, ++ oop mh, ++ intptr_t* saved_regs, ++ intptr_t* entry_sp) { ++ // called as a leaf from native code: do not block the JVM! ++ bool has_mh = (strstr(adaptername, "/static") == NULL && ++ strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH ++ const char* mh_reg_name = has_mh ? "s7_mh" : "s7"; ++ tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT, ++ adaptername, mh_reg_name, ++ p2i(mh), p2i(entry_sp)); ++ ++ if (Verbose) { ++ tty->print_cr("Registers:"); ++ const int saved_regs_count = RegisterImpl::number_of_registers; ++ for (int i = 0; i < saved_regs_count; i++) { ++ Register r = as_Register(i); ++ // The registers are stored in reverse order on the stack (by pusha). ++ tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); ++ if ((i + 1) % 4 == 0) { ++ tty->cr(); ++ } else { ++ tty->print(", "); ++ } ++ } ++ tty->cr(); ++ ++ { ++ // dumping last frame with frame::describe ++ ++ JavaThread* p = JavaThread::active(); ++ ++ ResourceMark rm; ++ PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here ++ FrameValues values; ++ ++ // Note: We want to allow trace_method_handle from any call site. ++ // While trace_method_handle creates a frame, it may be entered ++ // without a PC on the stack top (e.g. not just after a call). ++ // Walking that frame could lead to failures due to that invalid PC. ++ // => carefully detect that frame when doing the stack walking ++ ++ // Current C frame ++ frame cur_frame = os::current_frame(); ++ ++ // Robust search of trace_calling_frame (independant of inlining). ++ // Assumes saved_regs comes from a pusha in the trace_calling_frame. ++ assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?"); ++ frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame); ++ while (trace_calling_frame.fp() < saved_regs) { ++ trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame); ++ } ++ ++ // safely create a frame and call frame::describe ++ intptr_t *dump_sp = trace_calling_frame.sender_sp(); ++ intptr_t *dump_fp = trace_calling_frame.link(); ++ ++ bool walkable = has_mh; // whether the traced frame shoud be walkable ++ ++ if (walkable) { ++ // The previous definition of walkable may have to be refined ++ // if new call sites cause the next frame constructor to start ++ // failing. Alternatively, frame constructors could be ++ // modified to support the current or future non walkable ++ // frames (but this is more intrusive and is not considered as ++ // part of this RFE, which will instead use a simpler output). ++ frame dump_frame = frame(dump_sp, dump_fp); ++ dump_frame.describe(values, 1); ++ } else { ++ // Stack may not be walkable (invalid PC above FP): ++ // Add descriptions without building a Java frame to avoid issues ++ values.describe(-1, dump_fp, "fp for #1 "); ++ values.describe(-1, dump_sp, "sp for #1"); ++ } ++ values.describe(-1, entry_sp, "raw top of stack"); ++ ++ tty->print_cr("Stack layout:"); ++ values.print(p); ++ } ++ if (has_mh && oopDesc::is_oop(mh)) { ++ mh->print(); ++ if (java_lang_invoke_MethodHandle::is_instance(mh)) { ++ if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) ++ java_lang_invoke_MethodHandle::form(mh)->print(); ++ } ++ } ++ } ++} ++ ++// The stub wraps the arguments in a struct on the stack to avoid ++// dealing with the different calling conventions for passing 6 ++// arguments. ++struct MethodHandleStubArguments { ++ const char* adaptername; ++ oopDesc* mh; ++ intptr_t* saved_regs; ++ intptr_t* entry_sp; ++}; ++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { ++ trace_method_handle_stub(args->adaptername, ++ args->mh, ++ args->saved_regs, ++ args->entry_sp); ++} ++ ++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { ++} ++#endif //PRODUCT +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// Platform-specific definitions for method handles. ++// These definitions are inlined into class MethodHandles. ++ ++// Adapters ++enum /* platform_dependent_constants */ { ++ adapter_code_size = 32000 DEBUG_ONLY(+ 150000) ++}; ++ ++// Additional helper methods for MethodHandles code generation: ++public: ++ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); ++ ++ static void verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message = "wrong klass") NOT_DEBUG_RETURN; ++ ++ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { ++ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), ++ "reference is a MH"); ++ } ++ ++ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; ++ ++ // Similar to InterpreterMacroAssembler::jump_from_interpreted. ++ // Takes care of special dispatch from single stepping too. ++ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry); ++ ++ static void jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry); ++ ++ static Register saved_last_sp_register() { ++ // Should be in sharedRuntime, not here. ++ return R3; ++ } +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,511 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/codeCache.hpp" ++#include "code/compiledIC.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/ostream.hpp" ++ ++#ifndef PRODUCT ++#include "compiler/disassembler.hpp" ++#endif ++ ++#include ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++void NativeInstruction::wrote(int offset) { ++ ICache::invalidate_word(addr_at(offset)); ++} ++ ++void NativeInstruction::set_long_at(int offset, long i) { ++ address addr = addr_at(offset); ++ *(long*)addr = i; ++ ICache::invalidate_range(addr, 8); ++} ++ ++bool NativeInstruction::is_int_branch() { ++ int op = Assembler::high(insn_word(), 6); ++ return op == Assembler::beqz_op || op == Assembler::bnez_op || ++ op == Assembler::beq_op || op == Assembler::bne_op || ++ op == Assembler::blt_op || op == Assembler::bge_op || ++ op == Assembler::bltu_op || op == Assembler::bgeu_op; ++} ++ ++bool NativeInstruction::is_float_branch() { ++ return Assembler::high(insn_word(), 6) == Assembler::bccondz_op; ++} ++ ++bool NativeInstruction::is_lu12iw_lu32id() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 7) == Assembler::lu32i_d_op; ++} ++ ++bool NativeInstruction::is_pcaddu12i_add() const { ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddu12i_op && ++ Assembler::high(int_at(4), 10) == Assembler::addi_d_op; ++} ++ ++bool NativeCall::is_bl() const { ++ return Assembler::high(int_at(0), 6) == Assembler::bl_op; ++} ++ ++void NativeCall::verify() { ++ assert(is_bl(), "not a NativeCall"); ++} ++ ++address NativeCall::target_addr_for_bl(address orig_addr) const { ++ address addr = orig_addr ? orig_addr : addr_at(0); ++ ++ // bl ++ if (is_bl()) { ++ return addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) | ++ ((int_at(0) >> 10) & 0xffff)) << 2); ++ } ++ ++ fatal("not a NativeCall"); ++ return NULL; ++} ++ ++address NativeCall::destination() const { ++ address addr = (address)this; ++ address destination = target_addr_for_bl(); ++ // Do we use a trampoline stub for this call? ++ // Trampoline stubs are located behind the main code. ++ if (destination > addr) { ++ // Filter out recursive method invocation (call to verified/unverified entry point). ++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. ++ assert(cb && cb->is_nmethod(), "sanity"); ++ nmethod *nm = (nmethod *)cb; ++ NativeInstruction* ni = nativeInstruction_at(destination); ++ if (nm->stub_contains(destination) && ni->is_NativeCallTrampolineStub_at()) { ++ // Yes we do, so get the destination from the trampoline stub. ++ const address trampoline_stub_addr = destination; ++ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); ++ } ++ } ++ return destination; ++} ++ ++// Similar to replace_mt_safe, but just changes the destination. The ++// important thing is that free-running threads are able to execute this ++// call instruction at all times. ++// ++// Used in the runtime linkage of calls; see class CompiledIC. ++// ++// Add parameter assert_lock to switch off assertion ++// during code generation, where no patching lock is needed. ++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { ++ assert(!assert_lock || ++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), ++ "concurrent code patching"); ++ ++ ResourceMark rm; ++ address addr_call = addr_at(0); ++ bool reachable = MacroAssembler::reachable_from_branch_short(dest - addr_call); ++ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); ++ ++ // Patch the call. ++ if (!reachable) { ++ address trampoline_stub_addr = get_trampoline(); ++ assert (trampoline_stub_addr != NULL, "we need a trampoline"); ++ guarantee(Assembler::is_simm((trampoline_stub_addr - addr_call) >> 2, 26), "cannot reach trampoline stub"); ++ ++ // Patch the constant in the call's trampoline stub. ++ NativeInstruction* ni = nativeInstruction_at(dest); ++ assert (! ni->is_NativeCallTrampolineStub_at(), "chained trampolines"); ++ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); ++ dest = trampoline_stub_addr; ++ } ++ set_destination(dest); ++} ++ ++address NativeCall::get_trampoline() { ++ address call_addr = addr_at(0); ++ ++ CodeBlob *code = CodeCache::find_blob(call_addr); ++ assert(code != NULL, "Could not find the containing code blob"); ++ ++ address bl_destination ++ = nativeCall_at(call_addr)->target_addr_for_bl(); ++ NativeInstruction* ni = nativeInstruction_at(bl_destination); ++ if (code->contains(bl_destination) && ++ ni->is_NativeCallTrampolineStub_at()) ++ return bl_destination; ++ ++ if (code->is_nmethod()) { ++ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); ++ } ++ ++ return NULL; ++} ++ ++void NativeCall::set_destination(address dest) { ++ address addr_call = addr_at(0); ++ CodeBuffer cb(addr_call, instruction_size); ++ MacroAssembler masm(&cb); ++ assert(is_call_at(addr_call), "unexpected call type"); ++ jlong offs = dest - addr_call; ++ masm.bl(offs >> 2); ++ ICache::invalidate_range(addr_call, instruction_size); ++} ++ ++// Generate a trampoline for a branch to dest. If there's no need for a ++// trampoline, simply patch the call directly to dest. ++address NativeCall::trampoline_jump(CodeBuffer &cbuf, address dest) { ++ MacroAssembler a(&cbuf); ++ address stub = NULL; ++ ++ if (a.far_branches() ++ && ! is_NativeCallTrampolineStub_at()) { ++ stub = a.emit_trampoline_stub(instruction_address() - cbuf.insts()->start(), dest); ++ } ++ ++ if (stub == NULL) { ++ // If we generated no stub, patch this call directly to dest. ++ // This will happen if we don't need far branches or if there ++ // already was a trampoline. ++ set_destination(dest); ++ } ++ ++ return stub; ++} ++ ++void NativeCall::print() { ++ tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, ++ p2i(instruction_address()), p2i(destination())); ++} ++ ++// Inserts a native call instruction at a given pc ++void NativeCall::insert(address code_pos, address entry) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// MT-safe patching of a call instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { ++ Unimplemented(); ++} ++ ++bool NativeFarCall::is_short() const { ++ return Assembler::high(int_at(0), 10) == Assembler::andi_op && ++ Assembler::low(int_at(0), 22) == 0 && ++ Assembler::high(int_at(4), 6) == Assembler::bl_op; ++} ++ ++bool NativeFarCall::is_far() const { ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op && ++ Assembler::high(int_at(4), 6) == Assembler::jirl_op && ++ Assembler::low(int_at(4), 5) == RA->encoding(); ++} ++ ++address NativeFarCall::destination(address orig_addr) const { ++ address addr = orig_addr ? orig_addr : addr_at(0); ++ ++ if (is_short()) { ++ // short ++ return addr + BytesPerInstWord + ++ (Assembler::simm26(((int_at(4) & 0x3ff) << 16) | ++ ((int_at(4) >> 10) & 0xffff)) << 2); ++ } ++ ++ if (is_far()) { ++ // far ++ return addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) + ++ (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2); ++ } ++ ++ fatal("not a NativeFarCall"); ++ return NULL; ++} ++ ++void NativeFarCall::set_destination(address dest) { ++ address addr_call = addr_at(0); ++ CodeBuffer cb(addr_call, instruction_size); ++ MacroAssembler masm(&cb); ++ assert(is_far_call_at(addr_call), "unexpected call type"); ++ masm.patchable_call(dest, addr_call); ++ ICache::invalidate_range(addr_call, instruction_size); ++} ++ ++void NativeFarCall::verify() { ++ assert(is_short() || is_far(), "not a NativeFarcall"); ++} ++ ++//------------------------------------------------------------------- ++ ++bool NativeMovConstReg::is_lu12iw_ori_lu32id() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 10) == Assembler::ori_op && ++ Assembler::high(int_at(8), 7) == Assembler::lu32i_d_op; ++} ++ ++bool NativeMovConstReg::is_lu12iw_lu32id_nop() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 7) == Assembler::lu32i_d_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++bool NativeMovConstReg::is_lu12iw_2nop() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 10) == Assembler::andi_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++bool NativeMovConstReg::is_lu12iw_ori_nop() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 10) == Assembler::ori_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++bool NativeMovConstReg::is_addid_2nop() const { ++ return Assembler::high(int_at(0), 10) == Assembler::addi_d_op && ++ Assembler::high(int_at(4), 10) == Assembler::andi_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++void NativeMovConstReg::verify() { ++ assert(is_li52(), "not a mov reg, imm52"); ++} ++ ++void NativeMovConstReg::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, ++ p2i(instruction_address()), data()); ++} ++ ++intptr_t NativeMovConstReg::data() const { ++ if (is_lu12iw_ori_lu32id()) { ++ return Assembler::merge((intptr_t)((int_at(4) >> 10) & 0xfff), ++ (intptr_t)((int_at(0) >> 5) & 0xfffff), ++ (intptr_t)((int_at(8) >> 5) & 0xfffff)); ++ } ++ ++ if (is_lu12iw_lu32id_nop()) { ++ return Assembler::merge((intptr_t)0, ++ (intptr_t)((int_at(0) >> 5) & 0xfffff), ++ (intptr_t)((int_at(4) >> 5) & 0xfffff)); ++ } ++ ++ if (is_lu12iw_2nop()) { ++ return Assembler::merge((intptr_t)0, ++ (intptr_t)((int_at(0) >> 5) & 0xfffff)); ++ } ++ ++ if (is_lu12iw_ori_nop()) { ++ return Assembler::merge((intptr_t)((int_at(4) >> 10) & 0xfff), ++ (intptr_t)((int_at(0) >> 5) & 0xfffff)); ++ } ++ ++ if (is_addid_2nop()) { ++ return Assembler::simm12((int_at(0) >> 10) & 0xfff); ++ } ++ ++#ifndef PRODUCT ++ Disassembler::decode(addr_at(0), addr_at(0) + 16, tty); ++#endif ++ fatal("not a mov reg, imm52"); ++ return 0; // unreachable ++} ++ ++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) { ++ CodeBuffer cb(addr_at(0), instruction_size); ++ MacroAssembler masm(&cb); ++ masm.patchable_li52(as_Register(int_at(0) & 0x1f), x); ++ ICache::invalidate_range(addr_at(0), instruction_size); ++ ++ // Find and replace the oop/metadata corresponding to this ++ // instruction in oops section. ++ CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address()); ++ nmethod* nm = blob->as_nmethod_or_null(); ++ if (nm != NULL) { ++ o = o ? o : x; ++ RelocIterator iter(nm, instruction_address(), next_instruction_address()); ++ while (iter.next()) { ++ if (iter.type() == relocInfo::oop_type) { ++ oop* oop_addr = iter.oop_reloc()->oop_addr(); ++ *oop_addr = cast_to_oop(o); ++ break; ++ } else if (iter.type() == relocInfo::metadata_type) { ++ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); ++ *metadata_addr = (Metadata*)o; ++ break; ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------- ++ ++int NativeMovRegMem::offset() const{ ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++ return 0; // mute compiler ++} ++ ++void NativeMovRegMem::set_offset(int x) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++void NativeMovRegMem::verify() { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++ ++void NativeMovRegMem::print() { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++bool NativeInstruction::is_sigill_zombie_not_entrant() { ++ return uint_at(0) == NativeIllegalInstruction::instruction_code; ++} ++ ++void NativeIllegalInstruction::insert(address code_pos) { ++ *(juint*)code_pos = instruction_code; ++ ICache::invalidate_range(code_pos, instruction_size); ++} ++ ++void NativeJump::verify() { ++ assert(is_short() || is_far(), "not a general jump instruction"); ++} ++ ++bool NativeJump::is_short() { ++ return Assembler::high(insn_word(), 6) == Assembler::b_op; ++} ++ ++bool NativeJump::is_far() { ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op && ++ Assembler::high(int_at(4), 6) == Assembler::jirl_op && ++ Assembler::low(int_at(4), 5) == R0->encoding(); ++} ++ ++address NativeJump::jump_destination(address orig_addr) { ++ address addr = orig_addr ? orig_addr : addr_at(0); ++ address ret = (address)-1; ++ ++ // short ++ if (is_short()) { ++ ret = addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) | ++ ((int_at(0) >> 10) & 0xffff)) << 2); ++ return ret == instruction_address() ? (address)-1 : ret; ++ } ++ ++ // far ++ if (is_far()) { ++ ret = addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) + ++ (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2); ++ return ret == instruction_address() ? (address)-1 : ret; ++ } ++ ++ fatal("not a jump"); ++ return NULL; ++} ++ ++void NativeJump::set_jump_destination(address dest) { ++ OrderAccess::fence(); ++ ++ CodeBuffer cb(addr_at(0), instruction_size); ++ MacroAssembler masm(&cb); ++ masm.patchable_jump(dest); ++ ICache::invalidate_range(addr_at(0), instruction_size); ++} ++ ++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// MT-safe patching of a long jump instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// Must ensure atomicity ++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { ++ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); ++ jlong offs = dest - verified_entry; ++ ++ if (MacroAssembler::reachable_from_branch_short(offs)) { ++ CodeBuffer cb(verified_entry, 1 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.b(dest); ++ } else { ++ // We use an illegal instruction for marking a method as ++ // not_entrant or zombie ++ NativeIllegalInstruction::insert(verified_entry); ++ } ++ ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord); ++} ++ ++bool NativeInstruction::is_dtrace_trap() { ++ //return (*(int32_t*)this & 0xff) == 0xcc; ++ Unimplemented(); ++ return false; ++} ++ ++bool NativeInstruction::is_safepoint_poll() { ++ // ++ // 390 li T2, 0x0000000000400000 #@loadConP ++ // 394 st_w [SP + #12], V1 # spill 9 ++ // 398 Safepoint @ [T2] : poll for GC @ safePoint_poll # spec.benchmarks.compress.Decompressor::decompress @ bci:224 L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1 ++ // ++ // 0x000000ffe5815130: lu12i_w t2, 0x40 ++ // 0x000000ffe5815134: st_w v1, 0xc(sp) ; OopMap{a6=Oop off=920} ++ // ;*goto ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ // 0x000000ffe5815138: ld_w at, 0x0(t2) ;*goto <--- PC ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ ++ // Since there may be some spill instructions between the safePoint_poll and loadConP, ++ // we check the safepoint instruction like this. ++ return Assembler::high(insn_word(), 10) == Assembler::ld_w_op && ++ Assembler::low(insn_word(), 5) == AT->encoding(); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,528 @@ ++/* ++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP ++#define CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP ++ ++#include "asm/assembler.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/os.hpp" ++#include "runtime/orderAccess.hpp" ++#include "runtime/safepointMechanism.hpp" ++ ++// We have interfaces for the following instructions: ++// - NativeInstruction ++// - - NativeCall ++// - - NativeMovConstReg ++// - - NativeMovConstRegPatching ++// - - NativeMovRegMem ++// - - NativeMovRegMemPatching ++// - - NativeIllegalOpCode ++// - - NativeGeneralJump ++// - - NativePushConst ++// - - NativeTstRegMem ++ ++// The base class for different kinds of native instruction abstractions. ++// Provides the primitive operations to manipulate code relative to this. ++ ++class NativeInstruction { ++ friend class Relocation; ++ ++ public: ++ enum loongarch_specific_constants { ++ nop_instruction_code = 0, ++ nop_instruction_size = 4, ++ sync_instruction_code = 0xf, ++ sync_instruction_size = 4 ++ }; ++ ++ bool is_nop() { guarantee(0, "LA not implemented yet"); return long_at(0) == nop_instruction_code; } ++ bool is_sync() { return Assembler::high(insn_word(), 17) == Assembler::dbar_op; } ++ bool is_dtrace_trap(); ++ inline bool is_call(); ++ inline bool is_far_call(); ++ inline bool is_illegal(); ++ bool is_jump(); ++ bool is_safepoint_poll(); ++ ++ // Helper func for jvmci ++ bool is_lu12iw_lu32id() const; ++ bool is_pcaddu12i_add() const; ++ ++ // LoongArch has no instruction to generate a illegal instrucion exception? ++ // But `break 11` is not illegal instruction for LoongArch. ++ static int illegal_instruction(); ++ ++ bool is_int_branch(); ++ bool is_float_branch(); ++ ++ inline bool is_NativeCallTrampolineStub_at(); ++ //We use an illegal instruction for marking a method as not_entrant or zombie. ++ bool is_sigill_zombie_not_entrant(); ++ ++ protected: ++ address addr_at(int offset) const { return address(this) + offset; } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(BytesPerInstWord); } ++ address prev_instruction_address() const { return addr_at(-BytesPerInstWord); } ++ ++ s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } ++ u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } ++ ++ jint int_at(int offset) const { return *(jint*) addr_at(offset); } ++ juint uint_at(int offset) const { return *(juint*) addr_at(offset); } ++ ++ intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); } ++ ++ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } ++ int long_at(int offset) const { return *(jint*)addr_at(offset); } ++ ++ ++ void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } ++ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } ++ void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } ++ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } ++ void set_long_at(int offset, long i); ++ ++ int insn_word() const { return long_at(0); } ++ ++ void wrote(int offset); ++ ++ public: ++ ++ // unit test stuff ++ static void test() {} // override for testing ++ ++ inline friend NativeInstruction* nativeInstruction_at(address address); ++}; ++ ++inline NativeInstruction* nativeInstruction_at(address address) { ++ NativeInstruction* inst = (NativeInstruction*)address; ++#ifdef ASSERT ++ //inst->verify(); ++#endif ++ return inst; ++} ++ ++inline NativeCall* nativeCall_at(address address); ++ ++// The NativeCall is an abstraction for accessing/manipulating native call ++// instructions (used to manipulate inline caches, primitive & dll calls, etc.). ++class NativeCall: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 1 * BytesPerInstWord, ++ return_address_offset = 1 * BytesPerInstWord, ++ displacement_offset = 0 ++ }; ++ ++ // We have only bl. ++ bool is_bl() const; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ address next_instruction_address() const { ++ return addr_at(return_address_offset); ++ } ++ ++ address return_address() const { ++ return next_instruction_address(); ++ } ++ ++ address target_addr_for_bl(address orig_addr = 0) const; ++ address destination() const; ++ void set_destination(address dest); ++ ++ void verify_alignment() {} ++ void verify(); ++ void print(); ++ ++ // Creation ++ inline friend NativeCall* nativeCall_at(address address); ++ inline friend NativeCall* nativeCall_before(address return_address); ++ ++ static bool is_call_at(address instr) { ++ return nativeInstruction_at(instr)->is_call(); ++ } ++ ++ static bool is_call_before(address return_address) { ++ return is_call_at(return_address - return_address_offset); ++ } ++ ++ // MT-safe patching of a call instruction. ++ static void insert(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++ ++ // Similar to replace_mt_safe, but just changes the destination. The ++ // important thing is that free-running threads are able to execute ++ // this call instruction at all times. If the call is an immediate bl ++ // instruction we can simply rely on atomicity of 32-bit writes to ++ // make sure other threads will see no intermediate states. ++ ++ // We cannot rely on locks here, since the free-running threads must run at ++ // full speed. ++ // ++ // Used in the runtime linkage of calls; see class CompiledIC. ++ ++ // The parameter assert_lock disables the assertion during code generation. ++ void set_destination_mt_safe(address dest, bool assert_lock = true); ++ ++ address get_trampoline(); ++ address trampoline_jump(CodeBuffer &cbuf, address dest); ++}; ++ ++inline NativeCall* nativeCall_at(address address) { ++ NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++inline NativeCall* nativeCall_before(address return_address) { ++ NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++// The NativeFarCall is an abstraction for accessing/manipulating native ++// call-anywhere instructions. ++// Used to call native methods which may be loaded anywhere in the address ++// space, possibly out of reach of a call instruction. ++class NativeFarCall: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 2 * BytesPerInstWord ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ // We use MacroAssembler::patchable_call() for implementing a ++ // call-anywhere instruction. ++ bool is_short() const; ++ bool is_far() const; ++ ++ // Checks whether instr points at a NativeFarCall instruction. ++ static bool is_far_call_at(address address) { ++ return nativeInstruction_at(address)->is_far_call(); ++ } ++ ++ // Returns the NativeFarCall's destination. ++ address destination(address orig_addr = 0) const; ++ ++ // Sets the NativeFarCall's destination, not necessarily mt-safe. ++ // Used when relocating code. ++ void set_destination(address dest); ++ ++ void verify(); ++}; ++ ++// Instantiates a NativeFarCall object starting at the given instruction ++// address and returns the NativeFarCall object. ++inline NativeFarCall* nativeFarCall_at(address address) { ++ NativeFarCall* call = (NativeFarCall*)address; ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++// An interface for accessing/manipulating native set_oop imm, reg instructions ++// (used to manipulate inlined data references, etc.). ++class NativeMovConstReg: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 3 * BytesPerInstWord, ++ next_instruction_offset = 3 * BytesPerInstWord, ++ }; ++ ++ int insn_word() const { return long_at(instruction_offset); } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(next_instruction_offset); } ++ intptr_t data() const; ++ void set_data(intptr_t x, intptr_t o = 0); ++ ++ bool is_li52() const { ++ return is_lu12iw_ori_lu32id() || ++ is_lu12iw_lu32id_nop() || ++ is_lu12iw_2nop() || ++ is_lu12iw_ori_nop() || ++ is_addid_2nop(); ++ } ++ bool is_lu12iw_ori_lu32id() const; ++ bool is_lu12iw_lu32id_nop() const; ++ bool is_lu12iw_2nop() const; ++ bool is_lu12iw_ori_nop() const; ++ bool is_addid_2nop() const; ++ void verify(); ++ void print(); ++ ++ // unit test stuff ++ static void test() {} ++ ++ // Creation ++ inline friend NativeMovConstReg* nativeMovConstReg_at(address address); ++ inline friend NativeMovConstReg* nativeMovConstReg_before(address address); ++}; ++ ++inline NativeMovConstReg* nativeMovConstReg_at(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++inline NativeMovConstReg* nativeMovConstReg_before(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovConstRegPatching: public NativeMovConstReg { ++ private: ++ friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { ++ NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++class NativeMovRegMem: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 4, ++ hiword_offset = 4, ++ ldst_offset = 12, ++ immediate_size = 4, ++ ldst_size = 16 ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; } ++ ++ int offset() const; ++ ++ void set_offset(int x); ++ ++ void add_offset_in_bytes(int add_offset) { set_offset ( ( offset() + add_offset ) ); } ++ ++ void verify(); ++ void print (); ++ ++ // unit test stuff ++ static void test() {} ++ ++ private: ++ inline friend NativeMovRegMem* nativeMovRegMem_at (address address); ++}; ++ ++inline NativeMovRegMem* nativeMovRegMem_at (address address) { ++ NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovRegMemPatching: public NativeMovRegMem { ++ private: ++ friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) { ++ NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++ ++// Handles all kinds of jump on Loongson. ++// short: ++// b offs26 ++// nop ++// ++// far: ++// pcaddu18i reg, si20 ++// jirl r0, reg, si18 ++// ++class NativeJump: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 2 * BytesPerInstWord ++ }; ++ ++ bool is_short(); ++ bool is_far(); ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address jump_destination(address orig_addr = 0); ++ void set_jump_destination(address dest); ++ ++ // Creation ++ inline friend NativeJump* nativeJump_at(address address); ++ ++ // Insertion of native jump instruction ++ static void insert(address code_pos, address entry) { Unimplemented(); } ++ // MT-safe insertion of native jump at verified method entry ++ static void check_verified_entry_alignment(address entry, address verified_entry){} ++ static void patch_verified_entry(address entry, address verified_entry, address dest); ++ ++ void verify(); ++}; ++ ++inline NativeJump* nativeJump_at(address address) { ++ NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeGeneralJump: public NativeJump { ++ public: ++ // Creation ++ inline friend NativeGeneralJump* nativeGeneralJump_at(address address); ++ ++ // Insertion of native general jump instruction ++ static void insert_unconditional(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++}; ++ ++inline NativeGeneralJump* nativeGeneralJump_at(address address) { ++ NativeGeneralJump* jump = (NativeGeneralJump*)(address); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeIllegalInstruction: public NativeInstruction { ++public: ++ enum loongarch_specific_constants { ++ instruction_code = 0xbadc0de0, // TODO: LA ++ // Temporary LoongArch reserved instruction ++ instruction_size = 4, ++ instruction_offset = 0, ++ next_instruction_offset = 4 ++ }; ++ ++ // Insert illegal opcode as specific address ++ static void insert(address code_pos); ++}; ++ ++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); } ++ ++inline bool NativeInstruction::is_call() { ++ NativeCall *call = (NativeCall*)instruction_address(); ++ return call->is_bl(); ++} ++ ++inline bool NativeInstruction::is_far_call() { ++ NativeFarCall *call = (NativeFarCall*)instruction_address(); ++ ++ // short ++ if (call->is_short()) { ++ return true; ++ } ++ ++ // far ++ if (call->is_far()) { ++ return true; ++ } ++ ++ return false; ++} ++ ++inline bool NativeInstruction::is_jump() ++{ ++ NativeGeneralJump *jump = (NativeGeneralJump*)instruction_address(); ++ ++ // short ++ if (jump->is_short()) { ++ return true; ++ } ++ ++ // far ++ if (jump->is_far()) { ++ return true; ++ } ++ ++ return false; ++} ++ ++// Call trampoline stubs. ++class NativeCallTrampolineStub : public NativeInstruction { ++ public: ++ ++ enum la_specific_constants { ++ instruction_size = 6 * 4, ++ instruction_offset = 0, ++ data_offset = 4 * 4, ++ next_instruction_offset = 6 * 4 ++ }; ++ ++ address destination() const { ++ return (address)ptr_at(data_offset); ++ } ++ ++ void set_destination(address new_destination) { ++ set_ptr_at(data_offset, (intptr_t)new_destination); ++ OrderAccess::fence(); ++ } ++}; ++ ++// Note: Other stubs must not begin with this pattern. ++inline bool NativeInstruction::is_NativeCallTrampolineStub_at() { ++ // pcaddi ++ // ld_d ++ // jirl ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddi_op && ++ Assembler::high(int_at(4), 10) == Assembler::ld_d_op && ++ Assembler::high(int_at(8), 6) == Assembler::jirl_op && ++ Assembler::low(int_at(8), 5) == R0->encoding(); ++} ++ ++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { ++ NativeInstruction* ni = nativeInstruction_at(addr); ++ assert(ni->is_NativeCallTrampolineStub_at(), "no call trampoline found"); ++ return (NativeCallTrampolineStub*)addr; ++} ++ ++class NativeMembar : public NativeInstruction { ++public: ++ unsigned int get_hint() { return Assembler::low(insn_word(), 4); } ++ void set_hint(int hint) { Assembler::patch(addr_at(0), 4, hint); } ++}; ++ ++#endif // CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,103 @@ ++/* ++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/register.hpp" ++#include "register_loongarch.hpp" ++#ifdef TARGET_ARCH_MODEL_loongarch_32 ++# include "interp_masm_loongarch_32.hpp" ++#endif ++#ifdef TARGET_ARCH_MODEL_loongarch_64 ++# include "interp_masm_loongarch_64.hpp" ++#endif ++ ++REGISTER_DEFINITION(Register, noreg); ++REGISTER_DEFINITION(Register, r0); ++REGISTER_DEFINITION(Register, r1); ++REGISTER_DEFINITION(Register, r2); ++REGISTER_DEFINITION(Register, r3); ++REGISTER_DEFINITION(Register, r4); ++REGISTER_DEFINITION(Register, r5); ++REGISTER_DEFINITION(Register, r6); ++REGISTER_DEFINITION(Register, r7); ++REGISTER_DEFINITION(Register, r8); ++REGISTER_DEFINITION(Register, r9); ++REGISTER_DEFINITION(Register, r10); ++REGISTER_DEFINITION(Register, r11); ++REGISTER_DEFINITION(Register, r12); ++REGISTER_DEFINITION(Register, r13); ++REGISTER_DEFINITION(Register, r14); ++REGISTER_DEFINITION(Register, r15); ++REGISTER_DEFINITION(Register, r16); ++REGISTER_DEFINITION(Register, r17); ++REGISTER_DEFINITION(Register, r18); ++REGISTER_DEFINITION(Register, r19); ++REGISTER_DEFINITION(Register, r20); ++REGISTER_DEFINITION(Register, r21); ++REGISTER_DEFINITION(Register, r22); ++REGISTER_DEFINITION(Register, r23); ++REGISTER_DEFINITION(Register, r24); ++REGISTER_DEFINITION(Register, r25); ++REGISTER_DEFINITION(Register, r26); ++REGISTER_DEFINITION(Register, r27); ++REGISTER_DEFINITION(Register, r28); ++REGISTER_DEFINITION(Register, r29); ++REGISTER_DEFINITION(Register, r30); ++REGISTER_DEFINITION(Register, r31); ++ ++REGISTER_DEFINITION(FloatRegister, fnoreg); ++REGISTER_DEFINITION(FloatRegister, f0); ++REGISTER_DEFINITION(FloatRegister, f1); ++REGISTER_DEFINITION(FloatRegister, f2); ++REGISTER_DEFINITION(FloatRegister, f3); ++REGISTER_DEFINITION(FloatRegister, f4); ++REGISTER_DEFINITION(FloatRegister, f5); ++REGISTER_DEFINITION(FloatRegister, f6); ++REGISTER_DEFINITION(FloatRegister, f7); ++REGISTER_DEFINITION(FloatRegister, f8); ++REGISTER_DEFINITION(FloatRegister, f9); ++REGISTER_DEFINITION(FloatRegister, f10); ++REGISTER_DEFINITION(FloatRegister, f11); ++REGISTER_DEFINITION(FloatRegister, f12); ++REGISTER_DEFINITION(FloatRegister, f13); ++REGISTER_DEFINITION(FloatRegister, f14); ++REGISTER_DEFINITION(FloatRegister, f15); ++REGISTER_DEFINITION(FloatRegister, f16); ++REGISTER_DEFINITION(FloatRegister, f17); ++REGISTER_DEFINITION(FloatRegister, f18); ++REGISTER_DEFINITION(FloatRegister, f19); ++REGISTER_DEFINITION(FloatRegister, f20); ++REGISTER_DEFINITION(FloatRegister, f21); ++REGISTER_DEFINITION(FloatRegister, f22); ++REGISTER_DEFINITION(FloatRegister, f23); ++REGISTER_DEFINITION(FloatRegister, f24); ++REGISTER_DEFINITION(FloatRegister, f25); ++REGISTER_DEFINITION(FloatRegister, f26); ++REGISTER_DEFINITION(FloatRegister, f27); ++REGISTER_DEFINITION(FloatRegister, f28); ++REGISTER_DEFINITION(FloatRegister, f29); ++REGISTER_DEFINITION(FloatRegister, f30); ++REGISTER_DEFINITION(FloatRegister, f31); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/register_loongarch.cpp b/src/hotspot/cpu/loongarch/register_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/register_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/register_loongarch.cpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "register_loongarch.hpp" ++ ++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * RegisterImpl::max_slots_per_register; ++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + ++ FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; ++ ++ ++const char* RegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "zero", "ra", "tp", "sp", "a0/v0", "a1/v1", "a2", "a3", ++ "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", ++ "t4", "t5", "t6", "t7", "t8", "x", "fp", "s0", ++ "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} ++ ++const char* FloatRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", ++ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", ++ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", ++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", ++ }; ++ return is_valid() ? names[encoding()] : "fnoreg"; ++} ++ ++const char* ConditionalFlagRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7", ++ }; ++ return is_valid() ? names[encoding()] : "fccnoreg"; ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/register_loongarch.hpp b/src/hotspot/cpu/loongarch/register_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/register_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/register_loongarch.hpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,495 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_REGISTER_LOONGARCH_HPP ++#define CPU_LOONGARCH_REGISTER_LOONGARCH_HPP ++ ++#include "asm/register.hpp" ++#include "utilities/formatBuffer.hpp" ++ ++class VMRegImpl; ++typedef VMRegImpl* VMReg; ++ ++// Use Register as shortcut ++class RegisterImpl; ++typedef RegisterImpl* Register; ++ ++inline Register as_Register(int encoding) { ++ return (Register)(intptr_t) encoding; ++} ++ ++class RegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32, ++ max_slots_per_register = 2 ++ }; ++ ++ // derived registers, offsets, and addresses ++ Register successor() const { return as_Register(encoding() + 1); } ++ ++ // construction ++ inline friend Register as_Register(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++}; ++ ++// The integer registers of the LoongArch architecture ++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); ++ ++ ++CONSTANT_REGISTER_DECLARATION(Register, r0, (0)); ++CONSTANT_REGISTER_DECLARATION(Register, r1, (1)); ++CONSTANT_REGISTER_DECLARATION(Register, r2, (2)); ++CONSTANT_REGISTER_DECLARATION(Register, r3, (3)); ++CONSTANT_REGISTER_DECLARATION(Register, r4, (4)); ++CONSTANT_REGISTER_DECLARATION(Register, r5, (5)); ++CONSTANT_REGISTER_DECLARATION(Register, r6, (6)); ++CONSTANT_REGISTER_DECLARATION(Register, r7, (7)); ++CONSTANT_REGISTER_DECLARATION(Register, r8, (8)); ++CONSTANT_REGISTER_DECLARATION(Register, r9, (9)); ++CONSTANT_REGISTER_DECLARATION(Register, r10, (10)); ++CONSTANT_REGISTER_DECLARATION(Register, r11, (11)); ++CONSTANT_REGISTER_DECLARATION(Register, r12, (12)); ++CONSTANT_REGISTER_DECLARATION(Register, r13, (13)); ++CONSTANT_REGISTER_DECLARATION(Register, r14, (14)); ++CONSTANT_REGISTER_DECLARATION(Register, r15, (15)); ++CONSTANT_REGISTER_DECLARATION(Register, r16, (16)); ++CONSTANT_REGISTER_DECLARATION(Register, r17, (17)); ++CONSTANT_REGISTER_DECLARATION(Register, r18, (18)); ++CONSTANT_REGISTER_DECLARATION(Register, r19, (19)); ++CONSTANT_REGISTER_DECLARATION(Register, r20, (20)); ++CONSTANT_REGISTER_DECLARATION(Register, r21, (21)); ++CONSTANT_REGISTER_DECLARATION(Register, r22, (22)); ++CONSTANT_REGISTER_DECLARATION(Register, r23, (23)); ++CONSTANT_REGISTER_DECLARATION(Register, r24, (24)); ++CONSTANT_REGISTER_DECLARATION(Register, r25, (25)); ++CONSTANT_REGISTER_DECLARATION(Register, r26, (26)); ++CONSTANT_REGISTER_DECLARATION(Register, r27, (27)); ++CONSTANT_REGISTER_DECLARATION(Register, r28, (28)); ++CONSTANT_REGISTER_DECLARATION(Register, r29, (29)); ++CONSTANT_REGISTER_DECLARATION(Register, r30, (30)); ++CONSTANT_REGISTER_DECLARATION(Register, r31, (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define NOREG ((Register)(noreg_RegisterEnumValue)) ++ ++#define R0 ((Register)(r0_RegisterEnumValue)) ++#define R1 ((Register)(r1_RegisterEnumValue)) ++#define R2 ((Register)(r2_RegisterEnumValue)) ++#define R3 ((Register)(r3_RegisterEnumValue)) ++#define R4 ((Register)(r4_RegisterEnumValue)) ++#define R5 ((Register)(r5_RegisterEnumValue)) ++#define R6 ((Register)(r6_RegisterEnumValue)) ++#define R7 ((Register)(r7_RegisterEnumValue)) ++#define R8 ((Register)(r8_RegisterEnumValue)) ++#define R9 ((Register)(r9_RegisterEnumValue)) ++#define R10 ((Register)(r10_RegisterEnumValue)) ++#define R11 ((Register)(r11_RegisterEnumValue)) ++#define R12 ((Register)(r12_RegisterEnumValue)) ++#define R13 ((Register)(r13_RegisterEnumValue)) ++#define R14 ((Register)(r14_RegisterEnumValue)) ++#define R15 ((Register)(r15_RegisterEnumValue)) ++#define R16 ((Register)(r16_RegisterEnumValue)) ++#define R17 ((Register)(r17_RegisterEnumValue)) ++#define R18 ((Register)(r18_RegisterEnumValue)) ++#define R19 ((Register)(r19_RegisterEnumValue)) ++#define R20 ((Register)(r20_RegisterEnumValue)) ++#define R21 ((Register)(r21_RegisterEnumValue)) ++#define R22 ((Register)(r22_RegisterEnumValue)) ++#define R23 ((Register)(r23_RegisterEnumValue)) ++#define R24 ((Register)(r24_RegisterEnumValue)) ++#define R25 ((Register)(r25_RegisterEnumValue)) ++#define R26 ((Register)(r26_RegisterEnumValue)) ++#define R27 ((Register)(r27_RegisterEnumValue)) ++#define R28 ((Register)(r28_RegisterEnumValue)) ++#define R29 ((Register)(r29_RegisterEnumValue)) ++#define R30 ((Register)(r30_RegisterEnumValue)) ++#define R31 ((Register)(r31_RegisterEnumValue)) ++ ++ ++#define RA R1 ++#define TP R2 ++#define SP R3 ++#define A0 R4 ++#define A1 R5 ++#define A2 R6 ++#define A3 R7 ++#define A4 R8 ++#define A5 R9 ++#define A6 R10 ++#define A7 R11 ++#define RT0 R12 ++#define RT1 R13 ++#define RT2 R14 ++#define RT3 R15 ++#define RT4 R16 ++#define RT5 R17 ++#define RT6 R18 ++#define RT7 R19 ++#define RT8 R20 ++#define RX R21 ++#define FP R22 ++#define S0 R23 ++#define S1 R24 ++#define S2 R25 ++#define S3 R26 ++#define S4 R27 ++#define S5 R28 ++#define S6 R29 ++#define S7 R30 ++#define S8 R31 ++ ++#define c_rarg0 RT0 ++#define c_rarg1 RT1 ++#define Rmethod S3 ++#define Rsender S4 ++#define Rnext S1 ++ ++#define V0 A0 ++#define V1 A1 ++ ++#define SCR1 RT7 ++#define SCR2 RT4 ++ ++//for interpreter frame ++// bytecode pointer register ++#define BCP S0 ++// local variable pointer register ++#define LVP S7 ++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM ++// be sure to save and restore its value in call_stub ++#define TSR S2 ++ ++#define OPT_THREAD 1 ++ ++#define TREG S6 ++ ++#define S5_heapbase S5 ++ ++#define FSR V0 ++#define SSR T6 ++#define FSF FV0 ++ ++#define RECEIVER T0 ++#define IC_Klass T1 ++ ++#define SHIFT_count T3 ++ ++// ---------- Scratch Register ---------- ++#define AT RT7 ++#define fscratch F23 ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Use FloatRegister as shortcut ++class FloatRegisterImpl; ++typedef FloatRegisterImpl* FloatRegister; ++ ++inline FloatRegister as_FloatRegister(int encoding) { ++ return (FloatRegister)(intptr_t) encoding; ++} ++ ++// The implementation of floating point registers for the LoongArch architecture ++class FloatRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32, ++ save_slots_per_register = 2, ++ slots_per_lsx_register = 4, ++ slots_per_lasx_register = 8, ++ max_slots_per_register = 8 ++ }; ++ ++ // construction ++ inline friend FloatRegister as_FloatRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++ ++}; ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) ++#define F0 ((FloatRegister)( f0_FloatRegisterEnumValue)) ++#define F1 ((FloatRegister)( f1_FloatRegisterEnumValue)) ++#define F2 ((FloatRegister)( f2_FloatRegisterEnumValue)) ++#define F3 ((FloatRegister)( f3_FloatRegisterEnumValue)) ++#define F4 ((FloatRegister)( f4_FloatRegisterEnumValue)) ++#define F5 ((FloatRegister)( f5_FloatRegisterEnumValue)) ++#define F6 ((FloatRegister)( f6_FloatRegisterEnumValue)) ++#define F7 ((FloatRegister)( f7_FloatRegisterEnumValue)) ++#define F8 ((FloatRegister)( f8_FloatRegisterEnumValue)) ++#define F9 ((FloatRegister)( f9_FloatRegisterEnumValue)) ++#define F10 ((FloatRegister)( f10_FloatRegisterEnumValue)) ++#define F11 ((FloatRegister)( f11_FloatRegisterEnumValue)) ++#define F12 ((FloatRegister)( f12_FloatRegisterEnumValue)) ++#define F13 ((FloatRegister)( f13_FloatRegisterEnumValue)) ++#define F14 ((FloatRegister)( f14_FloatRegisterEnumValue)) ++#define F15 ((FloatRegister)( f15_FloatRegisterEnumValue)) ++#define F16 ((FloatRegister)( f16_FloatRegisterEnumValue)) ++#define F17 ((FloatRegister)( f17_FloatRegisterEnumValue)) ++#define F18 ((FloatRegister)( f18_FloatRegisterEnumValue)) ++#define F19 ((FloatRegister)( f19_FloatRegisterEnumValue)) ++#define F20 ((FloatRegister)( f20_FloatRegisterEnumValue)) ++#define F21 ((FloatRegister)( f21_FloatRegisterEnumValue)) ++#define F22 ((FloatRegister)( f22_FloatRegisterEnumValue)) ++#define F23 ((FloatRegister)( f23_FloatRegisterEnumValue)) ++#define F24 ((FloatRegister)( f24_FloatRegisterEnumValue)) ++#define F25 ((FloatRegister)( f25_FloatRegisterEnumValue)) ++#define F26 ((FloatRegister)( f26_FloatRegisterEnumValue)) ++#define F27 ((FloatRegister)( f27_FloatRegisterEnumValue)) ++#define F28 ((FloatRegister)( f28_FloatRegisterEnumValue)) ++#define F29 ((FloatRegister)( f29_FloatRegisterEnumValue)) ++#define F30 ((FloatRegister)( f30_FloatRegisterEnumValue)) ++#define F31 ((FloatRegister)( f31_FloatRegisterEnumValue)) ++ ++#define FA0 F0 ++#define FA1 F1 ++#define FA2 F2 ++#define FA3 F3 ++#define FA4 F4 ++#define FA5 F5 ++#define FA6 F6 ++#define FA7 F7 ++ ++#define FV0 F0 ++#define FV1 F1 ++ ++#define FT0 F8 ++#define FT1 F9 ++#define FT2 F10 ++#define FT3 F11 ++#define FT4 F12 ++#define FT5 F13 ++#define FT6 F14 ++#define FT7 F15 ++#define FT8 F16 ++#define FT9 F17 ++#define FT10 F18 ++#define FT11 F19 ++#define FT12 F20 ++#define FT13 F21 ++#define FT14 F22 ++#define FT15 F23 ++ ++#define FS0 F24 ++#define FS1 F25 ++#define FS2 F26 ++#define FS3 F27 ++#define FS4 F28 ++#define FS5 F29 ++#define FS6 F30 ++#define FS7 F31 ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Use ConditionalFlagRegister as shortcut ++class ConditionalFlagRegisterImpl; ++typedef ConditionalFlagRegisterImpl* ConditionalFlagRegister; ++ ++inline ConditionalFlagRegister as_ConditionalFlagRegister(int encoding) { ++ return (ConditionalFlagRegister)(intptr_t) encoding; ++} ++ ++// The implementation of floating point registers for the LoongArch architecture ++class ConditionalFlagRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++// conditionalflag_arg_base = 12, ++ number_of_registers = 8 ++ }; ++ ++ // construction ++ inline friend ConditionalFlagRegister as_ConditionalFlagRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ ConditionalFlagRegister successor() const { return as_ConditionalFlagRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++ ++}; ++ ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fccnoreg , (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc7 , ( 7)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define FCCNOREG ((ConditionalFlagRegister)(fccnoreg_ConditionalFlagRegisterEnumValue)) ++#define FCC0 ((ConditionalFlagRegister)( fcc0_ConditionalFlagRegisterEnumValue)) ++#define FCC1 ((ConditionalFlagRegister)( fcc1_ConditionalFlagRegisterEnumValue)) ++#define FCC2 ((ConditionalFlagRegister)( fcc2_ConditionalFlagRegisterEnumValue)) ++#define FCC3 ((ConditionalFlagRegister)( fcc3_ConditionalFlagRegisterEnumValue)) ++#define FCC4 ((ConditionalFlagRegister)( fcc4_ConditionalFlagRegisterEnumValue)) ++#define FCC5 ((ConditionalFlagRegister)( fcc5_ConditionalFlagRegisterEnumValue)) ++#define FCC6 ((ConditionalFlagRegister)( fcc6_ConditionalFlagRegisterEnumValue)) ++#define FCC7 ((ConditionalFlagRegister)( fcc7_ConditionalFlagRegisterEnumValue)) ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Need to know the total number of registers of all sorts for SharedInfo. ++// Define a class that exports it. ++class ConcreteRegisterImpl : public AbstractRegisterImpl { ++ public: ++ enum { ++ // A big enough number for C2: all the registers plus flags ++ // This number must be large enough to cover REG_COUNT (defined by c2) registers. ++ // There is no requirement that any ordering here matches any ordering c2 gives ++ // it's optoregs. ++ number_of_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + ++ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers ++ }; ++ ++ static const int max_gpr; ++ static const int max_fpr; ++}; ++ ++// A set of registers ++template ++class AbstractRegSet { ++ uint32_t _bitset; ++ ++ AbstractRegSet(uint32_t bitset) : _bitset(bitset) { } ++ ++public: ++ ++ AbstractRegSet() : _bitset(0) { } ++ ++ AbstractRegSet(RegImpl r1) : _bitset(1 << r1->encoding()) { } ++ ++ AbstractRegSet operator+(const AbstractRegSet aSet) const { ++ AbstractRegSet result(_bitset | aSet._bitset); ++ return result; ++ } ++ ++ AbstractRegSet operator-(const AbstractRegSet aSet) const { ++ AbstractRegSet result(_bitset & ~aSet._bitset); ++ return result; ++ } ++ ++ AbstractRegSet &operator+=(const AbstractRegSet aSet) { ++ *this = *this + aSet; ++ return *this; ++ } ++ ++ AbstractRegSet &operator-=(const AbstractRegSet aSet) { ++ *this = *this - aSet; ++ return *this; ++ } ++ ++ static AbstractRegSet of(RegImpl r1) { ++ return AbstractRegSet(r1); ++ } ++ ++ static AbstractRegSet of(RegImpl r1, RegImpl r2) { ++ return of(r1) + r2; ++ } ++ ++ static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3) { ++ return of(r1, r2) + r3; ++ } ++ ++ static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4) { ++ return of(r1, r2, r3) + r4; ++ } ++ ++ static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4, RegImpl r5) { ++ return of(r1, r2, r3, r4) + r5; ++ } ++ ++ static AbstractRegSet range(RegImpl start, RegImpl end) { ++ uint32_t bits = ~0; ++ bits <<= start->encoding(); ++ bits <<= 31 - end->encoding(); ++ bits >>= 31 - end->encoding(); ++ ++ return AbstractRegSet(bits); ++ } ++ ++ uint32_t bits() const { return _bitset; } ++}; ++ ++typedef AbstractRegSet RegSet; ++ ++#endif //CPU_LOONGARCH_REGISTER_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP ++#define CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP ++ ++// machine-dependent implemention for register maps ++ friend class frame; ++ ++ private: ++#ifndef CORE ++ // This is the hook for finding a register in an "well-known" location, ++ // such as a register block of a predetermined format. ++ // Since there is none, we just return NULL. ++ // See registerMap_sparc.hpp for an example of grabbing registers ++ // from register save areas of a standard layout. ++ address pd_location(VMReg reg) const {return NULL;} ++#endif ++ ++ // no PD state to clear or copy: ++ void pd_clear() {} ++ void pd_initialize() {} ++ void pd_initialize_from(const RegisterMap* map) {} ++ ++#endif // CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,132 @@ ++/* ++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/relocInfo.hpp" ++#include "compiler/disassembler.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/compressedOops.inline.hpp" ++#include "oops/oop.hpp" ++#include "oops/klass.inline.hpp" ++#include "runtime/safepoint.hpp" ++ ++ ++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { ++ x += o; ++ typedef Assembler::WhichOperand WhichOperand; ++ WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop ++ assert(which == Assembler::disp32_operand || ++ which == Assembler::narrow_oop_operand || ++ which == Assembler::imm_operand, "format unpacks ok"); ++ if (type() == relocInfo::internal_word_type || ++ type() == relocInfo::section_word_type) { ++ MacroAssembler::pd_patch_instruction(addr(), x); ++ } else if (which == Assembler::imm_operand) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(x)); ++ } ++ } else if (which == Assembler::narrow_oop_operand) { ++ // both compressed oops and compressed classes look the same ++ if (Universe::heap()->is_in_reserved((oop)x)) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode((oop)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(oop(x))), (intptr_t)(x)); ++ } ++ } else { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x)); ++ } ++ } ++ } else { ++ // Note: Use runtime_call_type relocations for call32_operand. ++ assert(0, "call32_operand not supported in LoongArch64"); ++ } ++} ++ ++ ++address Relocation::pd_call_destination(address orig_addr) { ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_far_call()) { ++ return nativeFarCall_at(addr())->destination(orig_addr); ++ } else if (ni->is_call()) { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline) { ++ return nativeCallTrampolineStub_at(trampoline)->destination(); ++ } else { ++ address new_addr = nativeCall_at(addr())->target_addr_for_bl(orig_addr); ++ // If call is branch to self, don't try to relocate it, just leave it ++ // as branch to self. This happens during code generation if the code ++ // buffer expands. It will be relocated to the trampoline above once ++ // code generation is complete. ++ return (new_addr == orig_addr) ? addr() : new_addr; ++ } ++ } else if (ni->is_jump()) { ++ return nativeGeneralJump_at(addr())->jump_destination(orig_addr); ++ } else { ++ tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr())); ++ Disassembler::decode(addr() - 10 * BytesPerInstWord, addr() + 10 * BytesPerInstWord, tty); ++ ShouldNotReachHere(); ++ return NULL; ++ } ++} ++ ++void Relocation::pd_set_call_destination(address x) { ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_far_call()) { ++ nativeFarCall_at(addr())->set_destination(x); ++ } else if (ni->is_call()) { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline) { ++ nativeCall_at(addr())->set_destination_mt_safe(x, false); ++ } else { ++ nativeCall_at(addr())->set_destination(x); ++ } ++ } else if (ni->is_jump()) { ++ nativeGeneralJump_at(addr())->set_jump_destination(x); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++address* Relocation::pd_address_in_code() { ++ return (address*)addr(); ++} ++ ++address Relocation::pd_get_address_from_code() { ++ NativeMovConstReg* ni = nativeMovConstReg_at(addr()); ++ return (address)ni->data(); ++} ++ ++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++} ++ ++void metadata_Relocation::pd_fix_value(address x) { ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP ++#define CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP ++ ++ // machine-dependent parts of class relocInfo ++ private: ++ enum { ++ // Since LoongArch instructions are whole words, ++ // the two low-order offset bits can always be discarded. ++ offset_unit = 4, ++ ++ // imm_oop_operand vs. narrow_oop_operand ++ format_width = 2 ++ }; ++ ++ public: ++ ++ static bool mustIterateImmediateOopsInCode() { return false; } ++ ++#endif // CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,191 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#ifdef COMPILER2 ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "code/vmreg.hpp" ++#include "interpreter/interpreter.hpp" ++#include "opto/runtime.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/globalDefinitions.hpp" ++#include "vmreg_loongarch.inline.hpp" ++#endif ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++//-------------- generate_exception_blob ----------- ++// creates _exception_blob. ++// The exception blob is jumped to from a compiled method. ++// (see emit_exception_handler in sparc.ad file) ++// ++// Given an exception pc at a call we call into the runtime for the ++// handler in this method. This handler might merely restore state ++// (i.e. callee save registers) unwind the frame and jump to the ++// exception handler for the nmethod if there is no Java level handler ++// for the nmethod. ++// ++// This code is entered with a jump, and left with a jump. ++// ++// Arguments: ++// V0: exception oop ++// V1: exception pc ++// ++// Results: ++// A0: exception oop ++// A1: exception pc in caller or ??? ++// jumps to: exception handler of caller ++// ++// Note: the exception pc MUST be at a call (precise debug information) ++// ++// [stubGenerator_loongarch_64.cpp] generate_forward_exception() ++// |- V0, V1 are created ++// |- T4 <= SharedRuntime::exception_handler_for_return_address ++// `- jr T4 ++// `- the caller's exception_handler ++// `- jr OptoRuntime::exception_blob ++// `- here ++// ++void OptoRuntime::generate_exception_blob() { ++ // Capture info about frame layout ++ enum layout { ++ fp_off, ++ return_off, // slot for return address ++ framesize ++ }; ++ ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer("exception_blob", 5120, 5120); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ address start = __ pc(); ++ ++ __ addi_d(SP, SP, -1 * framesize * wordSize); // Prolog! ++ ++ // this frame will be treated as the original caller method. ++ // So, the return pc should be filled with the original exception pc. ++ // ref: X86's implementation ++ __ st_d(V1, SP, return_off * wordSize); // return address ++ __ st_d(FP, SP, fp_off * wordSize); ++ ++ // Save callee saved registers. None for UseSSE=0, ++ // floats-only for UseSSE=1, and doubles for UseSSE=2. ++ ++ __ addi_d(FP, SP, fp_off * wordSize); ++ ++ // Store exception in Thread object. We cannot pass any arguments to the ++ // handle_exception call, since we do not want to make any assumption ++ // about the size of the frame where the exception happened in. ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ st_d(V0, Address(thread, JavaThread::exception_oop_offset())); ++ __ st_d(V1, Address(thread, JavaThread::exception_pc_offset())); ++ ++ // This call does all the hard work. It checks if an exception handler ++ // exists in the method. ++ // If so, it returns the handler address. ++ // If not, it prepares for stack-unwinding, restoring the callee-save ++ // registers of the frame being removed. ++ Label L; ++ address the_pc = __ pc(); ++ __ bind(L); ++ __ set_last_Java_frame(thread, NOREG, NOREG, L); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ __ move(A0, thread); ++ // TODO: confirm reloc ++ __ call((address)OptoRuntime::handle_exception_C, relocInfo::runtime_call_type); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0)); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(thread, true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // V0: exception handler ++ ++ // We have a handler in V0, (could be deopt blob) ++ __ move(T4, V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // Get the exception ++ __ ld_d(A0, Address(thread, JavaThread::exception_oop_offset())); ++ // Get the exception pc in case we are deoptimized ++ __ ld_d(A1, Address(thread, JavaThread::exception_pc_offset())); ++#ifdef ASSERT ++ __ st_d(R0, Address(thread, JavaThread::exception_handler_pc_offset())); ++ __ st_d(R0, Address(thread, JavaThread::exception_pc_offset())); ++#endif ++ // Clear the exception oop so GC no longer processes it as a root. ++ __ st_d(R0, Address(thread, JavaThread::exception_oop_offset())); ++ ++ // Fix seg fault when running: ++ // Eclipse + Plugin + Debug As ++ // This is the only condition where C2 calls SharedRuntime::generate_deopt_blob() ++ // ++ __ move(V0, A0); ++ __ move(V1, A1); ++ ++ // V0: exception oop ++ // T4: exception handler ++ // A1: exception pc ++ __ jr(T4); ++ ++ // make sure all code is generated ++ masm->flush(); ++ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,3621 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/debugInfoRec.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/vframeArray.hpp" ++#include "vmreg_loongarch.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++#if INCLUDE_JVMCI ++#include "jvmci/jvmciJavaClasses.hpp" ++#endif ++ ++#include ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; ++ ++class RegisterSaver { ++ // Capture info about frame layout ++ enum layout { ++ fpr0_off = 0, ++ fpr1_off, ++ fpr2_off, ++ fpr3_off, ++ fpr4_off, ++ fpr5_off, ++ fpr6_off, ++ fpr7_off, ++ fpr8_off, ++ fpr9_off, ++ fpr10_off, ++ fpr11_off, ++ fpr12_off, ++ fpr13_off, ++ fpr14_off, ++ fpr15_off, ++ fpr16_off, ++ fpr17_off, ++ fpr18_off, ++ fpr19_off, ++ fpr20_off, ++ fpr21_off, ++ fpr22_off, ++ fpr23_off, ++ fpr24_off, ++ fpr25_off, ++ fpr26_off, ++ fpr27_off, ++ fpr28_off, ++ fpr29_off, ++ fpr30_off, ++ fpr31_off, ++ a0_off, ++ a1_off, ++ a2_off, ++ a3_off, ++ a4_off, ++ a5_off, ++ a6_off, ++ a7_off, ++ t0_off, ++ t1_off, ++ t2_off, ++ t3_off, ++ t4_off, ++ t5_off, ++ t6_off, ++ t7_off, ++ t8_off, ++ s0_off, ++ s1_off, ++ s2_off, ++ s3_off, ++ s4_off, ++ s5_off, ++ s6_off, ++ s7_off, ++ s8_off, ++ fp_off, ++ ra_off, ++ fpr_size = fpr31_off - fpr0_off + 1, ++ gpr_size = ra_off - a0_off + 1, ++ }; ++ ++ const bool _save_vectors; ++ public: ++ RegisterSaver(bool save_vectors) : _save_vectors(save_vectors) {} ++ ++ OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); ++ void restore_live_registers(MacroAssembler* masm); ++ ++ int slots_save() { ++ int slots = gpr_size * VMRegImpl::slots_per_word; ++ ++ if (_save_vectors && UseLASX) ++ slots += FloatRegisterImpl::slots_per_lasx_register * fpr_size; ++ else if (_save_vectors && UseLSX) ++ slots += FloatRegisterImpl::slots_per_lsx_register * fpr_size; ++ else ++ slots += FloatRegisterImpl::save_slots_per_register * fpr_size; ++ ++ return slots; ++ } ++ ++ int gpr_offset(int off) { ++ int slots_per_fpr = FloatRegisterImpl::save_slots_per_register; ++ int slots_per_gpr = VMRegImpl::slots_per_word; ++ ++ if (_save_vectors && UseLASX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register; ++ else if (_save_vectors && UseLSX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register; ++ ++ return (fpr_size * slots_per_fpr + (off - a0_off) * slots_per_gpr) * VMRegImpl::stack_slot_size; ++ } ++ ++ int fpr_offset(int off) { ++ int slots_per_fpr = FloatRegisterImpl::save_slots_per_register; ++ ++ if (_save_vectors && UseLASX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register; ++ else if (_save_vectors && UseLSX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register; ++ ++ return off * slots_per_fpr * VMRegImpl::stack_slot_size; ++ } ++ ++ int ra_offset() { return gpr_offset(ra_off); } ++ int t5_offset() { return gpr_offset(t5_off); } ++ int s3_offset() { return gpr_offset(s3_off); } ++ int v0_offset() { return gpr_offset(a0_off); } ++ int v1_offset() { return gpr_offset(a1_off); } ++ ++ int fpr0_offset() { return fpr_offset(fpr0_off); } ++ int fpr1_offset() { return fpr_offset(fpr1_off); } ++ ++ // During deoptimization only the result register need to be restored ++ // all the other values have already been extracted. ++ void restore_result_registers(MacroAssembler* masm); ++}; ++ ++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { ++ ++ // Always make the frame size 16-byte aligned ++ int frame_size_in_bytes = align_up(additional_frame_words * wordSize + slots_save() * VMRegImpl::stack_slot_size, StackAlignmentInBytes); ++ // OopMap frame size is in compiler stack slots (jint's) not bytes or words ++ int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size; ++ // The caller will allocate additional_frame_words ++ int additional_frame_slots = additional_frame_words * wordSize / VMRegImpl::stack_slot_size; ++ // CodeBlob frame size is in words. ++ int frame_size_in_words = frame_size_in_bytes / wordSize; ++ ++ *total_frame_words = frame_size_in_words; ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap(frame_size_in_slots, 0); ++ ++ // save registers ++ __ addi_d(SP, SP, -slots_save() * VMRegImpl::stack_slot_size); ++ ++ for (int i = 0; i < fpr_size; i++) { ++ FloatRegister fpr = as_FloatRegister(i); ++ int off = fpr_offset(i); ++ ++ if (_save_vectors && UseLASX) ++ __ xvst(fpr, SP, off); ++ else if (_save_vectors && UseLSX) ++ __ vst(fpr, SP, off); ++ else ++ __ fst_d(fpr, SP, off); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), fpr->as_VMReg()); ++ } ++ ++ for (int i = a0_off; i <= a7_off; i++) { ++ Register gpr = as_Register(A0->encoding() + (i - a0_off)); ++ int off = gpr_offset(i); ++ ++ __ st_d(gpr, SP, gpr_offset(i)); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); ++ } ++ ++ for (int i = t0_off; i <= t6_off; i++) { ++ Register gpr = as_Register(T0->encoding() + (i - t0_off)); ++ int off = gpr_offset(i); ++ ++ __ st_d(gpr, SP, gpr_offset(i)); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); ++ } ++ __ st_d(T8, SP, gpr_offset(t8_off)); ++ map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(t8_off) / VMRegImpl::stack_slot_size + additional_frame_slots), T8->as_VMReg()); ++ ++ for (int i = s0_off; i <= s8_off; i++) { ++ Register gpr = as_Register(S0->encoding() + (i - s0_off)); ++ int off = gpr_offset(i); ++ ++ __ st_d(gpr, SP, gpr_offset(i)); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); ++ } ++ ++ __ st_d(FP, SP, gpr_offset(fp_off)); ++ map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(fp_off) / VMRegImpl::stack_slot_size + additional_frame_slots), FP->as_VMReg()); ++ __ st_d(RA, SP, gpr_offset(ra_off)); ++ map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(ra_off) / VMRegImpl::stack_slot_size + additional_frame_slots), RA->as_VMReg()); ++ ++ __ addi_d(FP, SP, gpr_offset(fp_off)); ++ ++ return map; ++} ++ ++ ++// Pop the current frame and restore all the registers that we ++// saved. ++void RegisterSaver::restore_live_registers(MacroAssembler* masm) { ++ for (int i = 0; i < fpr_size; i++) { ++ FloatRegister fpr = as_FloatRegister(i); ++ int off = fpr_offset(i); ++ ++ if (_save_vectors && UseLASX) ++ __ xvld(fpr, SP, off); ++ else if (_save_vectors && UseLSX) ++ __ vld(fpr, SP, off); ++ else ++ __ fld_d(fpr, SP, off); ++ } ++ ++ for (int i = a0_off; i <= a7_off; i++) { ++ Register gpr = as_Register(A0->encoding() + (i - a0_off)); ++ int off = gpr_offset(i); ++ ++ __ ld_d(gpr, SP, gpr_offset(i)); ++ } ++ ++ for (int i = t0_off; i <= t6_off; i++) { ++ Register gpr = as_Register(T0->encoding() + (i - t0_off)); ++ int off = gpr_offset(i); ++ ++ __ ld_d(gpr, SP, gpr_offset(i)); ++ } ++ __ ld_d(T8, SP, gpr_offset(t8_off)); ++ ++ for (int i = s0_off; i <= s8_off; i++) { ++ Register gpr = as_Register(S0->encoding() + (i - s0_off)); ++ int off = gpr_offset(i); ++ ++ __ ld_d(gpr, SP, gpr_offset(i)); ++ } ++ ++ __ ld_d(FP, SP, gpr_offset(fp_off)); ++ __ ld_d(RA, SP, gpr_offset(ra_off)); ++ ++ __ addi_d(SP, SP, slots_save() * VMRegImpl::stack_slot_size); ++} ++ ++// Pop the current frame and restore the registers that might be holding ++// a result. ++void RegisterSaver::restore_result_registers(MacroAssembler* masm) { ++ // Just restore result register. Only used by deoptimization. By ++ // now any callee save register that needs to be restore to a c2 ++ // caller of the deoptee has been extracted into the vframeArray ++ // and will be stuffed into the c2i adapter we create for later ++ // restoration so only result registers need to be restored here. ++ ++ __ ld_d(V0, SP, gpr_offset(a0_off)); ++ __ ld_d(V1, SP, gpr_offset(a1_off)); ++ ++ __ fld_d(F0, SP, fpr_offset(fpr0_off)); ++ __ fld_d(F1, SP, fpr_offset(fpr1_off)); ++ ++ __ addi_d(SP, SP, gpr_offset(ra_off)); ++} ++ ++// Is vector's size (in bytes) bigger than a size saved by default? ++// 8 bytes registers are saved by default using fld/fst instructions. ++bool SharedRuntime::is_wide_vector(int size) { ++ return size > 8; ++} ++ ++size_t SharedRuntime::trampoline_size() { ++ return 32; ++} ++ ++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { ++ // trampoline is not in CodeCache ++ __ li(T4, (long)destination); ++ __ jr(T4); ++} ++ ++// The java_calling_convention describes stack locations as ideal slots on ++// a frame with no abi restrictions. Since we must observe abi restrictions ++// (like the placement of the register window) the slots must be biased by ++// the following value. ++ ++static int reg2offset_in(VMReg r) { ++ // Account for saved fp and return address ++ // This should really be in_preserve_stack_slots ++ return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); ++} ++ ++static int reg2offset_out(VMReg r) { ++ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++} ++ ++// --------------------------------------------------------------------------- ++// Read the array of BasicTypes from a signature, and compute where the ++// arguments should go. Values in the VMRegPair regs array refer to 4-byte ++// quantities. Values less than SharedInfo::stack0 are registers, those above ++// refer to 4-byte stack slots. All stack slots are based off of the stack pointer ++// as framesizes are fixed. ++// VMRegImpl::stack0 refers to the first slot 0(sp). ++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register ++// up to RegisterImpl::number_of_registers) are the 32-bit ++// integer registers. ++ ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++// Note: the INPUTS in sig_bt are in units of Java argument words, which are ++// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit ++// units regardless of build. ++ ++ ++// --------------------------------------------------------------------------- ++// The compiled Java calling convention. ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++int SharedRuntime::java_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ int total_args_passed, ++ int is_outgoing) { ++ ++ // Create the mapping between argument positions and registers. ++ static const Register INT_ArgReg[Argument::n_register_parameters + 1] = { ++ T0, A0, A1, A2, A3, A4, A5, A6, A7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7 ++ }; ++ ++ uint int_args = 0; ++ uint fp_args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: ++ // halves of T_LONG or T_DOUBLE ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (int_args < Argument::n_register_parameters + 1) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ if (int_args < Argument::n_register_parameters + 1) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return round_to(stk_args, 2); ++} ++ ++// Patch the callers callsite with entry to compiled code if it exists. ++static void patch_callers_callsite(MacroAssembler *masm) { ++ Label L; ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, L); ++ // Schedule the branch target address early. ++ // Call into the VM to patch the caller, then jump to compiled callee ++ // T5 isn't live so capture return address while we easily can ++ __ move(T5, RA); ++ ++ __ pushad(); ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // VM needs caller's callsite ++ // VM needs target method ++ ++ __ move(A0, Rmethod); ++ __ move(A1, T5); ++ // we should preserve the return address ++ __ move(TSR, SP); ++ __ li(AT, -(StackAlignmentInBytes)); // align the stack ++ __ andr(SP, SP, AT); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), ++ relocInfo::runtime_call_type); ++ ++ __ move(SP, TSR); ++ __ popad(); ++ __ bind(L); ++} ++ ++static void gen_c2i_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ Label& skip_fixup) { ++ ++ // Before we get into the guts of the C2I adapter, see if we should be here ++ // at all. We've come from compiled code and are attempting to jump to the ++ // interpreter, which means the caller made a static call to get here ++ // (vcalls always get a compiled target if there is one). Check for a ++ // compiled target. If there is one, we need to patch the caller's call. ++ // However we will run interpreted if we come thru here. The next pass ++ // thru the call site will run compiled. If we ran compiled here then ++ // we can (theorectically) do endless i2c->c2i->i2c transitions during ++ // deopt/uncommon trap cycles. If we always go interpreted here then ++ // we can have at most one and don't need to play any tricks to keep ++ // from endlessly growing the stack. ++ // ++ // Actually if we detected that we had an i2c->c2i transition here we ++ // ought to be able to reset the world back to the state of the interpreted ++ // call and not bother building another interpreter arg area. We don't ++ // do that at this point. ++ ++ patch_callers_callsite(masm); ++ __ bind(skip_fixup); ++ ++#ifdef COMPILER2 ++ __ empty_FPU_stack(); ++#endif ++ //this is for native ? ++ // Since all args are passed on the stack, total_args_passed * interpreter_ ++ // stack_element_size is the ++ // space we need. ++ int extraspace = total_args_passed * Interpreter::stackElementSize; ++ ++ // stack is aligned, keep it that way ++ extraspace = round_to(extraspace, 2*wordSize); ++ ++ // Get return address ++ __ move(T5, RA); ++ // set senderSP value ++ //refer to interpreter_loongarch.cpp:generate_asm_entry ++ __ move(Rsender, SP); ++ __ addi_d(SP, SP, -extraspace); ++ ++ // Now write the args into the outgoing interpreter space ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // st_off points to lowest address on stack. ++ int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; ++ // Say 4 args: ++ // i st_off ++ // 0 12 T_LONG ++ // 1 8 T_VOID ++ // 2 4 T_OBJECT ++ // 3 0 T_BOOL ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // memory to memory use fpu stack top ++ int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; ++ if (!r_2->is_valid()) { ++ __ ld_ptr(AT, Address(SP, ld_off)); ++ __ st_ptr(AT, Address(SP, st_off)); ++ ++ } else { ++ ++ ++ int next_off = st_off - Interpreter::stackElementSize; ++ __ ld_ptr(AT, Address(SP, ld_off)); ++ __ st_ptr(AT, Address(SP, st_off)); ++ ++ // Ref to is_Register condition ++ if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ st_ptr(AT, SP, st_off - 8); ++ } ++ } else if (r_1->is_Register()) { ++ Register r = r_1->as_Register(); ++ if (!r_2->is_valid()) { ++ __ st_d(r, SP, st_off); ++ } else { ++ //FIXME, LA will not enter here ++ // long/double in gpr ++ __ st_d(r, SP, st_off); ++ // In [java/util/zip/ZipFile.java] ++ // ++ // private static native long open(String name, int mode, long lastModified); ++ // private static native int getTotal(long jzfile); ++ // ++ // We need to transfer T_LONG paramenters from a compiled method to a native method. ++ // It's a complex process: ++ // ++ // Caller -> lir_static_call -> gen_resolve_stub ++ // -> -- resolve_static_call_C ++ // `- gen_c2i_adapter() [*] ++ // | ++ // `- AdapterHandlerLibrary::get_create_apapter_index ++ // -> generate_native_entry ++ // -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] ++ // ++ // In [**], T_Long parameter is stored in stack as: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // However, the sequence is reversed here: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). ++ // ++ if (sig_bt[i] == T_LONG) ++ __ st_d(r, SP, st_off - 8); ++ } ++ } else if (r_1->is_FloatRegister()) { ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ fst_s(fr, SP, st_off); ++ else { ++ __ fst_d(fr, SP, st_off); ++ __ fst_d(fr, SP, st_off - 8); // T_DOUBLE needs two slots ++ } ++ } ++ } ++ ++ // Schedule the branch target address early. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) ); ++ // And repush original return address ++ __ move(RA, T5); ++ __ jr (AT); ++} ++ ++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs) { ++ ++ // Generate an I2C adapter: adjust the I-frame to make space for the C-frame ++ // layout. Lesp was saved by the calling I-frame and will be restored on ++ // return. Meanwhile, outgoing arg space is all owned by the callee ++ // C-frame, so we can mangle it at will. After adjusting the frame size, ++ // hoist register arguments and repack other args according to the compiled ++ // code convention. Finally, end in a jump to the compiled code. The entry ++ // point address is the start of the buffer. ++ ++ // We will only enter here from an interpreted frame and never from after ++ // passing thru a c2i. Azul allowed this but we do not. If we lose the ++ // race and use a c2i we will remain interpreted for the race loser(s). ++ // This removes all sorts of headaches on the LA side and also eliminates ++ // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. ++ ++ __ move(T4, SP); ++ ++ // Cut-out for having no stack args. Since up to 2 int/oop args are passed ++ // in registers, we will occasionally have no stack args. ++ int comp_words_on_stack = 0; ++ if (comp_args_on_stack) { ++ // Sig words on the stack are greater-than VMRegImpl::stack0. Those in ++ // registers are below. By subtracting stack0, we either get a negative ++ // number (all values in registers) or the maximum stack slot accessed. ++ // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); ++ // Convert 4-byte stack slots to words. ++ // did LA need round? FIXME ++ comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; ++ // Round up to miminum stack alignment, in wordSize ++ comp_words_on_stack = round_to(comp_words_on_stack, 2); ++ __ addi_d(SP, SP, -comp_words_on_stack * wordSize); ++ } ++ ++ // Align the outgoing SP ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ // push the return address on the stack (note that pushing, rather ++ // than storing it, yields the correct frame alignment for the callee) ++ // Put saved SP in another register ++ const Register saved_sp = T5; ++ __ move(saved_sp, T4); ++ ++ ++ // Will jump to the compiled code just as if compiled code was doing it. ++ // Pre-load the register-jump target early, to schedule it better. ++ __ ld_d(T4, Rmethod, in_bytes(Method::from_compiled_offset())); ++ ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ // check if this call should be routed towards a specific entry point ++ __ ld_d(AT, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); ++ Label no_alternative_target; ++ __ beqz(AT, no_alternative_target); ++ __ move(T4, AT); ++ __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); ++ __ bind(no_alternative_target); ++ } ++#endif // INCLUDE_JVMCI ++ ++ // Now generate the shuffle code. Pick up all register args and move the ++ // rest through the floating point stack top. ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ // Longs and doubles are passed in native word order, but misaligned ++ // in the 32-bit build. ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // Pick up 0, 1 or 2 words from SP+offset. ++ ++ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); ++ // Load in argument order going down. ++ int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; ++ // Point to interpreter value (vs. tag) ++ int next_off = ld_off - Interpreter::stackElementSize; ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset (+ wordSize to ++ // account for return address ) ++ // NOTICE HERE!!!! I sub a wordSize here ++ int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; ++ //+ wordSize; ++ ++ if (!r_2->is_valid()) { ++ __ ld_d(AT, saved_sp, ld_off); ++ __ st_d(AT, SP, st_off); ++ } else { ++ // Interpreter local[n] == MSW, local[n+1] == LSW however locals ++ // are accessed as negative so LSW is at LOW address ++ ++ // ld_off is MSW so get LSW ++ // st_off is LSW (i.e. reg.first()) ++ ++ // [./org/eclipse/swt/graphics/GC.java] ++ // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, ++ // int destX, int destY, int destWidth, int destHeight, ++ // boolean simple, ++ // int imgWidth, int imgHeight, ++ // long maskPixmap, <-- Pass T_LONG in stack ++ // int maskType); ++ // Before this modification, Eclipse displays icons with solid black background. ++ // ++ __ ld_d(AT, saved_sp, ld_off); ++ if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ ld_d(AT, saved_sp, ld_off - 8); ++ __ st_d(AT, SP, st_off); ++ } ++ } else if (r_1->is_Register()) { // Register argument ++ Register r = r_1->as_Register(); ++ if (r_2->is_valid()) { ++ // Remember r_1 is low address (and LSB on LA) ++ // So r_2 gets loaded from high address regardless of the platform ++ assert(r_2->as_Register() == r_1->as_Register(), ""); ++ __ ld_d(r, saved_sp, ld_off); ++ ++ // ++ // For T_LONG type, the real layout is as below: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // We should load the low-8 bytes. ++ // ++ if (sig_bt[i] == T_LONG) ++ __ ld_d(r, saved_sp, ld_off - 8); ++ } else { ++ __ ld_w(r, saved_sp, ld_off); ++ } ++ } else if (r_1->is_FloatRegister()) { // Float Register ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ fld_s(fr, saved_sp, ld_off); ++ else { ++ __ fld_d(fr, saved_sp, ld_off); ++ __ fld_d(fr, saved_sp, ld_off - 8); ++ } ++ } ++ } ++ ++ // 6243940 We might end up in handle_wrong_method if ++ // the callee is deoptimized as we race thru here. If that ++ // happens we don't want to take a safepoint because the ++ // caller frame will look interpreted and arguments are now ++ // "compiled" so it is much better to make this transition ++ // invisible to the stack walking code. Unfortunately if ++ // we try and find the callee by normal means a safepoint ++ // is possible. So we stash the desired callee in the thread ++ // and the vm will find there should this case occur. ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ st_d(Rmethod, thread, in_bytes(JavaThread::callee_target_offset())); ++ ++ // move methodOop to T5 in case we end up in an c2i adapter. ++ // the c2i adapters expect methodOop in T5 (c2) because c2's ++ // resolve stubs return the result (the method) in T5. ++ // I'd love to fix this. ++ __ move(T5, Rmethod); ++ __ jr(T4); ++} ++ ++// --------------------------------------------------------------- ++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ AdapterFingerPrint* fingerprint) { ++ address i2c_entry = __ pc(); ++ ++ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); ++ ++ // ------------------------------------------------------------------------- ++ // Generate a C2I adapter. On entry we know G5 holds the methodOop. The ++ // args start out packed in the compiled layout. They need to be unpacked ++ // into the interpreter layout. This will almost always require some stack ++ // space. We grow the current (compiled) stack, then repack the args. We ++ // finally end in a jump to the generic interpreter entry point. On exit ++ // from the interpreter, the interpreter will restore our SP (lest the ++ // compiled code, which relys solely on SP and not FP, get sick). ++ ++ address c2i_unverified_entry = __ pc(); ++ Label skip_fixup; ++ { ++ Register holder = T1; ++ Register receiver = T0; ++ Register temp = T8; ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ Label missed; ++ ++ //add for compressedoops ++ __ load_klass(temp, receiver); ++ ++ __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset()); ++ __ bne(AT, temp, missed); ++ // Method might have been compiled since the call site was patched to ++ // interpreted if that is the case treat it as a miss so we can get ++ // the call site corrected. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, skip_fixup); ++ __ bind(missed); ++ ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ } ++ address c2i_entry = __ pc(); ++ ++ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); ++ ++ __ flush(); ++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); ++} ++ ++int SharedRuntime::c_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ VMRegPair *regs2, ++ int total_args_passed) { ++ assert(regs2 == NULL, "not needed on LA"); ++ // Return the number of VMReg stack_slots needed for the args. ++ // This value does not include an abi space (like register window ++ // save area). ++ ++ // We return the amount of VMReg stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. Since we always ++ // have space for storing at least 6 registers to memory we start with that. ++ // See int_stk_helper for a further discussion. ++ // We return the amount of VMRegImpl stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. ++ static const Register INT_ArgReg[Argument::n_register_parameters] = { ++ A0, A1, A2, A3, A4, A5, A6, A7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7 ++ }; ++ uint int_args = 0; ++ uint fp_args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++// Example: ++// n java.lang.UNIXProcess::forkAndExec ++// private native int forkAndExec(byte[] prog, ++// byte[] argBlock, int argc, ++// byte[] envBlock, int envc, ++// byte[] dir, ++// boolean redirectErrorStream, ++// FileDescriptor stdin_fd, ++// FileDescriptor stdout_fd, ++// FileDescriptor stderr_fd) ++// JNIEXPORT jint JNICALL ++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env, ++// jobject process, ++// jbyteArray prog, ++// jbyteArray argBlock, jint argc, ++// jbyteArray envBlock, jint envc, ++// jbyteArray dir, ++// jboolean redirectErrorStream, ++// jobject stdin_fd, ++// jobject stdout_fd, ++// jobject stderr_fd) ++// ++// ::c_calling_convention ++// 0: // env <-- a0 ++// 1: L // klass/obj <-- t0 => a1 ++// 2: [ // prog[] <-- a0 => a2 ++// 3: [ // argBlock[] <-- a1 => a3 ++// 4: I // argc <-- a2 => a4 ++// 5: [ // envBlock[] <-- a3 => a5 ++// 6: I // envc <-- a4 => a5 ++// 7: [ // dir[] <-- a5 => a7 ++// 8: Z // redirectErrorStream <-- a6 => sp[0] ++// 9: L // stdin <-- a7 => sp[8] ++// 10: L // stdout fp[16] => sp[16] ++// 11: L // stderr fp[24] => sp[24] ++// ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: // Halves of longs and doubles ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (int_args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ case T_METADATA: ++ if (int_args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else if (int_args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else if (int_args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return round_to(stk_args, 2); ++} ++ ++// --------------------------------------------------------------------------- ++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ fst_s(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ fst_d(FSF, FP, -wordSize ); ++ break; ++ case T_VOID: break; ++ case T_LONG: ++ __ st_d(V0, FP, -wordSize); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ st_d(V0, FP, -wordSize); ++ break; ++ default: { ++ __ st_w(V0, FP, -wordSize); ++ } ++ } ++} ++ ++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ fld_s(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ fld_d(FSF, FP, -wordSize ); ++ break; ++ case T_LONG: ++ __ ld_d(V0, FP, -wordSize); ++ break; ++ case T_VOID: break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ ld_d(V0, FP, -wordSize); ++ break; ++ default: { ++ __ ld_w(V0, FP, -wordSize); ++ } ++ } ++} ++ ++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = first_arg ; i < arg_count ; i++ ) { ++ if (args[i].first()->is_Register()) { ++ __ push(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ push(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { ++ if (args[i].first()->is_Register()) { ++ __ pop(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ pop(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++// A simple move of integer like type ++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ ld_w(AT, FP, reg2offset_in(src.first())); ++ __ st_d(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ // stack to reg ++ __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ if (dst.first() != src.first()){ ++ __ move(dst.first()->as_Register(), src.first()->as_Register()); ++ } ++ } ++} ++ ++// An oop arg. Must pass a handle not the oop itself ++static void object_move(MacroAssembler* masm, ++ OopMap* map, ++ int oop_handle_offset, ++ int framesize_in_slots, ++ VMRegPair src, ++ VMRegPair dst, ++ bool is_receiver, ++ int* receiver_offset) { ++ ++ // must pass a handle. First figure out the location we use as a handle ++ ++ if (src.first()->is_stack()) { ++ // Oop is already on the stack as an argument ++ Register rHandle = T5; ++ Label nil; ++ __ xorr(rHandle, rHandle, rHandle); ++ __ ld_d(AT, FP, reg2offset_in(src.first())); ++ __ beq(AT, R0, nil); ++ __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); ++ __ bind(nil); ++ if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move( (dst.first())->as_Register(), rHandle); ++ ++ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); ++ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); ++ if (is_receiver) { ++ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; ++ } ++ } else { ++ // Oop is in an a register we must store it to the space we reserve ++ // on the stack for oop_handles ++ const Register rOop = src.first()->as_Register(); ++ assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); ++ const Register rHandle = T5; ++ //Important: refer to java_calling_convertion ++ int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; ++ int offset = oop_slot*VMRegImpl::stack_slot_size; ++ Label skip; ++ __ st_d( rOop , SP, offset ); ++ map->set_oop(VMRegImpl::stack2reg(oop_slot)); ++ __ xorr( rHandle, rHandle, rHandle); ++ __ beq(rOop, R0, skip); ++ __ lea(rHandle, Address(SP, offset)); ++ __ bind(skip); ++ // Store the handle parameter ++ if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move((dst.first())->as_Register(), rHandle); ++ ++ if (is_receiver) { ++ *receiver_offset = offset; ++ } ++ } ++} ++ ++// A float arg may have to do float reg int reg conversion ++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); ++ if (src.first()->is_stack()) { ++ // stack to stack/reg ++ if (dst.first()->is_stack()) { ++ __ ld_w(AT, FP, reg2offset_in(src.first())); ++ __ st_w(AT, SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fld_s(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } else { ++ __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else { ++ // reg to stack/reg ++ if(dst.first()->is_stack()) { ++ __ fst_s(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fmov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ __ movfr2gr_s(dst.first()->as_Register(), src.first()->as_FloatRegister()); ++ } ++ } ++} ++ ++// A long move ++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibility for a long_move VMRegPair is: ++ // 1: two stack slots (possibly unaligned) ++ // as neither the java or C calling convention will use registers ++ // for longs. ++ if (src.first()->is_stack()) { ++ assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); ++ if( dst.first()->is_stack()){ ++ __ ld_d(AT, FP, reg2offset_in(src.first())); ++ __ st_d(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else { ++ if( dst.first()->is_stack()){ ++ __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ __ move(dst.first()->as_Register(), src.first()->as_Register()); ++ } ++ } ++} ++ ++// A double move ++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibilities for a double_move VMRegPair are: ++ // The painful thing here is that like long_move a VMRegPair might be ++ ++ // Because of the calling convention we know that src is either ++ // 1: a single physical register (xmm registers only) ++ // 2: two stack slots (possibly unaligned) ++ // dst can only be a pair of stack slots. ++ ++ if (src.first()->is_stack()) { ++ // source is all stack ++ if( dst.first()->is_stack()){ ++ __ ld_d(AT, FP, reg2offset_in(src.first())); ++ __ st_d(AT, SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fld_d(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } else { ++ __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else { ++ // reg to stack/reg ++ // No worries about stack alignment ++ if( dst.first()->is_stack()){ ++ __ fst_d(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fmov_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ __ movfr2gr_d(dst.first()->as_Register(), src.first()->as_FloatRegister()); ++ } ++ } ++} ++ ++static void verify_oop_args(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ Register temp_reg = T4; // not part of any compiled calling seq ++ if (VerifyOops) { ++ for (int i = 0; i < method->size_of_parameters(); i++) { ++ if (sig_bt[i] == T_OBJECT || ++ sig_bt[i] == T_ARRAY) { ++ VMReg r = regs[i].first(); ++ assert(r->is_valid(), "bad oop arg"); ++ if (r->is_stack()) { ++ __ ld_d(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); ++ __ verify_oop(temp_reg); ++ } else { ++ __ verify_oop(r->as_Register()); ++ } ++ } ++ } ++ } ++} ++ ++static void gen_special_dispatch(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ verify_oop_args(masm, method, sig_bt, regs); ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ ++ // Now write the args into the outgoing interpreter space ++ bool has_receiver = false; ++ Register receiver_reg = noreg; ++ int member_arg_pos = -1; ++ Register member_reg = noreg; ++ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); ++ if (ref_kind != 0) { ++ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument ++ member_reg = S3; // known to be free at this point ++ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); ++ } else if (iid == vmIntrinsics::_invokeBasic) { ++ has_receiver = true; ++ } else { ++ fatal("unexpected intrinsic id %d", iid); ++ } ++ ++ if (member_reg != noreg) { ++ // Load the member_arg into register, if necessary. ++ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); ++ VMReg r = regs[member_arg_pos].first(); ++ if (r->is_stack()) { ++ __ ld_d(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ member_reg = r->as_Register(); ++ } ++ } ++ ++ if (has_receiver) { ++ // Make sure the receiver is loaded into a register. ++ assert(method->size_of_parameters() > 0, "oob"); ++ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); ++ VMReg r = regs[0].first(); ++ assert(r->is_valid(), "bad receiver arg"); ++ if (r->is_stack()) { ++ // Porting note: This assumes that compiled calling conventions always ++ // pass the receiver oop in a register. If this is not true on some ++ // platform, pick a temp and load the receiver from stack. ++ fatal("receiver always in a register"); ++ receiver_reg = SSR; // known to be free at this point ++ __ ld_d(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ receiver_reg = r->as_Register(); ++ } ++ } ++ ++ // Figure out which address we are really jumping to: ++ MethodHandles::generate_method_handle_dispatch(masm, iid, ++ receiver_reg, member_reg, /*for_compiler_entry:*/ true); ++} ++ ++// --------------------------------------------------------------------------- ++// Generate a native wrapper for a given method. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// convention (handlizes oops, etc), transitions to native, makes the call, ++// returns to java state (possibly blocking), unhandlizes any result and ++// returns. ++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, ++ const methodHandle& method, ++ int compile_id, ++ BasicType* in_sig_bt, ++ VMRegPair* in_regs, ++ BasicType ret_type, ++ address critical_entry) { ++ if (method->is_method_handle_intrinsic()) { ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ intptr_t start = (intptr_t)__ pc(); ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ gen_special_dispatch(masm, ++ method, ++ in_sig_bt, ++ in_regs); ++ assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord, ++ "valid size for make_non_entrant"); ++ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period ++ __ flush(); ++ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually ++ return nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ in_ByteSize(-1), ++ in_ByteSize(-1), ++ (OopMapSet*)NULL); ++ } ++ ++ bool is_critical_native = true; ++ address native_func = critical_entry; ++ if (native_func == NULL) { ++ native_func = method->native_function(); ++ is_critical_native = false; ++ } ++ assert(native_func != NULL, "must have function"); ++ ++ // Native nmethod wrappers never take possesion of the oop arguments. ++ // So the caller will gc the arguments. The only thing we need an ++ // oopMap for is if the call is static ++ // ++ // An OopMap for lock (and class if static), and one for the VM call itself ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the jni function will expect them. To figure out where they go ++ // we convert the java signature to a C signature by inserting ++ // the hidden arguments as arg[0] and possibly arg[1] (static method) ++ ++ const int total_in_args = method->size_of_parameters(); ++ int total_c_args = total_in_args; ++ if (!is_critical_native) { ++ total_c_args += 1; ++ if (method->is_static()) { ++ total_c_args++; ++ } ++ } else { ++ for (int i = 0; i < total_in_args; i++) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ total_c_args++; ++ } ++ } ++ } ++ ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); ++ BasicType* in_elem_bt = NULL; ++ ++ int argc = 0; ++ if (!is_critical_native) { ++ out_sig_bt[argc++] = T_ADDRESS; ++ if (method->is_static()) { ++ out_sig_bt[argc++] = T_OBJECT; ++ } ++ ++ for (int i = 0; i < total_in_args ; i++ ) { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ } ++ } else { ++ Thread* THREAD = Thread::current(); ++ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); ++ SignatureStream ss(method->signature()); ++ for (int i = 0; i < total_in_args ; i++ ) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ // Arrays are passed as int, elem* pair ++ out_sig_bt[argc++] = T_INT; ++ out_sig_bt[argc++] = T_ADDRESS; ++ Symbol* atype = ss.as_symbol(CHECK_NULL); ++ const char* at = atype->as_C_string(); ++ if (strlen(at) == 2) { ++ assert(at[0] == '[', "must be"); ++ switch (at[1]) { ++ case 'B': in_elem_bt[i] = T_BYTE; break; ++ case 'C': in_elem_bt[i] = T_CHAR; break; ++ case 'D': in_elem_bt[i] = T_DOUBLE; break; ++ case 'F': in_elem_bt[i] = T_FLOAT; break; ++ case 'I': in_elem_bt[i] = T_INT; break; ++ case 'J': in_elem_bt[i] = T_LONG; break; ++ case 'S': in_elem_bt[i] = T_SHORT; break; ++ case 'Z': in_elem_bt[i] = T_BOOLEAN; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } else { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ in_elem_bt[i] = T_VOID; ++ } ++ if (in_sig_bt[i] != T_VOID) { ++ assert(in_sig_bt[i] == ss.type(), "must match"); ++ ss.next(); ++ } ++ } ++ } ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ // ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Compute framesize for the wrapper. We need to handlize all oops in ++ // registers. We must create space for them here that is disjoint from ++ // the windowed save area because we have no control over when we might ++ // flush the window again and overwrite values that gc has since modified. ++ // (The live window race) ++ // ++ // We always just allocate 6 word for storing down these object. This allow ++ // us to simply record the base and use the Ireg number to decide which ++ // slot to use. (Note that the reg number is the inbound number not the ++ // outbound number). ++ // We must shuffle args to match the native convention, and include var-args space. ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Now the space for the inbound oop handle area ++ int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers ++ if (is_critical_native) { ++ // Critical natives may have to call out so they need a save area ++ // for register arguments. ++ int double_slots = 0; ++ int single_slots = 0; ++ for ( int i = 0; i < total_in_args; i++) { ++ if (in_regs[i].first()->is_Register()) { ++ const Register reg = in_regs[i].first()->as_Register(); ++ switch (in_sig_bt[i]) { ++ case T_BOOLEAN: ++ case T_BYTE: ++ case T_SHORT: ++ case T_CHAR: ++ case T_INT: single_slots++; break; ++ case T_ARRAY: ++ case T_LONG: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ switch (in_sig_bt[i]) { ++ case T_FLOAT: single_slots++; break; ++ case T_DOUBLE: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } ++ total_save_slots = double_slots * 2 + single_slots; ++ // align the save area ++ if (double_slots != 0) { ++ stack_slots = round_to(stack_slots, 2); ++ } ++ } ++ ++ int oop_handle_offset = stack_slots; ++ stack_slots += total_save_slots; ++ ++ // Now any space we need for handlizing a klass if static method ++ ++ int klass_slot_offset = 0; ++ int klass_offset = -1; ++ int lock_slot_offset = 0; ++ bool is_static = false; ++ ++ if (method->is_static()) { ++ klass_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; ++ is_static = true; ++ } ++ ++ // Plus a lock if needed ++ ++ if (method->is_synchronized()) { ++ lock_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ } ++ ++ // Now a place to save return value or as a temporary for any gpr -> fpr moves ++ // + 2 for return address (which we own) and saved fp ++ stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // |---------------------| ++ // | 2 slots for moves | ++ // |---------------------| ++ // | lock box (if sync) | ++ // |---------------------| <- lock_slot_offset ++ // | klass (if static) | ++ // |---------------------| <- klass_slot_offset ++ // | oopHandle area | ++ // |---------------------| <- oop_handle_offset ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | vararg area | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = round_to(stack_slots, StackAlignmentInSlots); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ ++ intptr_t start = (intptr_t)__ pc(); ++ ++ ++ ++ // First thing make an ic check to see if we should even be here ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ // We are free to use all registers as temps without saving them and ++ // restoring them except fp. fp is the only callee save register ++ // as far as the interpreter and the compiler(s) are concerned. ++ ++ //refer to register_loongarch.hpp:IC_Klass ++ const Register ic_reg = T1; ++ const Register receiver = T0; ++ ++ Label hit; ++ Label exception_pending; ++ ++ __ verify_oop(receiver); ++ //add for compressedoops ++ __ load_klass(T4, receiver); ++ __ beq(T4, ic_reg, hit); ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ __ bind(hit); ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++#ifdef COMPILER1 ++ if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) { ++ // Object.hashCode can pull the hashCode from the header word ++ // instead of doing a full VM transition once it's been computed. ++ // Since hashCode is usually polymorphic at call sites we can't do ++ // this optimization at the call site without a lot of work. ++ Label slowCase; ++ Register receiver = T0; ++ Register result = V0; ++ __ ld_d ( result, receiver, oopDesc::mark_offset_in_bytes()); ++ // check if locked ++ __ andi(AT, result, markOopDesc::unlocked_value); ++ __ beq(AT, R0, slowCase); ++ if (UseBiasedLocking) { ++ // Check if biased and fall through to runtime if so ++ __ andi (AT, result, markOopDesc::biased_lock_bit_in_place); ++ __ bne(AT, R0, slowCase); ++ } ++ // get hash ++ __ li(AT, markOopDesc::hash_mask_in_place); ++ __ andr (AT, result, AT); ++ // test if hashCode exists ++ __ beq (AT, R0, slowCase); ++ __ shr(result, markOopDesc::hash_shift); ++ __ jr(RA); ++ __ bind (slowCase); ++ } ++#endif // COMPILER1 ++ ++ // Generate stack overflow check ++ if (UseStackBanging) { ++ __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size()); ++ } ++ ++ // The instruction at the verified entry point must be 4 bytes or longer ++ // because it can be patched on the fly by make_non_entrant. ++ if (((intptr_t)__ pc() - start - vep_offset) < 1 * BytesPerInstWord) { ++ __ nop(); ++ } ++ ++ // Generate a new frame for the wrapper. ++ // do LA need this ? ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ __ enter(); ++ // -2 because return address is already present and so is saved fp ++ __ addi_d(SP, SP, -1 * (stack_size - 2*wordSize)); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++ // Calculate the difference between sp and fp. We need to know it ++ // after the native call because on windows Java Natives will pop ++ // the arguments and it is painful to do sp relative addressing ++ // in a platform independent way. So after the call we switch to ++ // fp relative addressing. ++ //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change ++ //the SP ++ int fp_adjustment = stack_size - 2*wordSize; ++ ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // Compute the fp offset for any slots used after the jni call ++ ++ int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; ++ // We use TREG as a thread pointer because it is callee save and ++ // if we load it once it is usable thru the entire wrapper ++ const Register thread = TREG; ++ ++ // We use S4 as the oop handle for the receiver/klass ++ // It is callee save so it survives the call to native ++ ++ const Register oop_handle_reg = S4; ++ if (is_critical_native) { ++ Unimplemented(); ++ // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, ++ // oop_handle_offset, oop_maps, in_regs, in_sig_bt); ++ } ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // ++ // We immediately shuffle the arguments so that any vm call we have to ++ // make from here on out (sync slow path, jvmpi, etc.) we will have ++ // captured the oops from our caller and have a valid oopMap for ++ // them. ++ ++ // ----------------- ++ // The Grand Shuffle ++ // ++ // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* ++ // and, if static, the class mirror instead of a receiver. This pretty much ++ // guarantees that register layout will not match (and LA doesn't use reg ++ // parms though amd does). Since the native abi doesn't use register args ++ // and the java conventions does we don't have to worry about collisions. ++ // All of our moved are reg->stack or stack->stack. ++ // We ignore the extra arguments during the shuffle and handle them at the ++ // last moment. The shuffle is described by the two calling convention ++ // vectors we have in our possession. We simply walk the java vector to ++ // get the source locations and the c vector to get the destinations. ++ ++ int c_arg = method->is_static() ? 2 : 1 ; ++ ++ // Record sp-based slot for receiver on stack for non-static methods ++ int receiver_offset = -1; ++ ++ // This is a trick. We double the stack slots so we can claim ++ // the oops in the caller's frame. Since we are sure to have ++ // more args than the caller doubling is enough to make ++ // sure we can capture all the incoming oop args from the ++ // caller. ++ // ++ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); ++ ++ // Mark location of fp (someday) ++ // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp)); ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ // This may iterate in two different directions depending on the ++ // kind of native it is. The reason is that for regular JNI natives ++ // the incoming and outgoing registers are offset upwards and for ++ // critical natives they are offset down. ++ GrowableArray arg_order(2 * total_in_args); ++ VMRegPair tmp_vmreg; ++ tmp_vmreg.set2(T8->as_VMReg()); ++ ++ if (!is_critical_native) { ++ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { ++ arg_order.push(i); ++ arg_order.push(c_arg); ++ } ++ } else { ++ // Compute a valid move order, using tmp_vmreg to break any cycles ++ Unimplemented(); ++ // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); ++ } ++ ++ int temploc = -1; ++ for (int ai = 0; ai < arg_order.length(); ai += 2) { ++ int i = arg_order.at(ai); ++ int c_arg = arg_order.at(ai + 1); ++ __ block_comment(err_msg("move %d -> %d", i, c_arg)); ++ if (c_arg == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // This arg needs to be moved to a temporary ++ __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); ++ in_regs[i] = tmp_vmreg; ++ temploc = i; ++ continue; ++ } else if (i == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // Read from the temporary location ++ assert(temploc != -1, "must be valid"); ++ i = temploc; ++ temploc = -1; ++ } ++#ifdef ASSERT ++ if (in_regs[i].first()->is_Register()) { ++ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); ++ } ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif /* ASSERT */ ++ switch (in_sig_bt[i]) { ++ case T_ARRAY: ++ if (is_critical_native) { ++ Unimplemented(); ++ // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); ++ c_arg++; ++#ifdef ASSERT ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif ++ break; ++ } ++ case T_OBJECT: ++ assert(!is_critical_native, "no oop arguments"); ++ object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ++ ((i == 0) && (!is_static)), ++ &receiver_offset); ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ float_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_DOUBLE: ++ assert( i + 1 < total_in_args && ++ in_sig_bt[i + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ double_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_LONG : ++ long_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ simple_move32(masm, in_regs[i], out_regs[c_arg]); ++ } ++ } ++ ++ // point c_arg at the first arg that is already loaded in case we ++ // need to spill before we call out ++ c_arg = total_c_args - total_in_args; ++ // Pre-load a static method's oop. Used both by locking code and ++ // the normal JNI call code. ++ ++ __ move(oop_handle_reg, A1); ++ ++ if (method->is_static() && !is_critical_native) { ++ ++ // load opp into a register ++ int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( ++ (method->method_holder())->java_mirror())); ++ ++ ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ __ relocate(rspec); ++ __ patchable_li52(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); ++ // Now handlize the static class mirror it's known not-null. ++ __ st_d( oop_handle_reg, SP, klass_offset); ++ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); ++ ++ // Now get the handle ++ __ lea(oop_handle_reg, Address(SP, klass_offset)); ++ // store the klass handle as second argument ++ __ move(A1, oop_handle_reg); ++ // and protect the arg if we must spill ++ c_arg--; ++ } ++ ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a a stack traversal). It is enough that the pc() ++ // points into the right code segment. It does not have to be the correct return pc. ++ // We use the same pc/oopMap repeatedly when we call out ++ ++ Label native_return; ++ __ set_last_Java_frame(SP, noreg, native_return); ++ ++ // We have all of the arguments setup at this point. We must not touch any register ++ // argument registers at this point (what if we save/restore them there are no oop? ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ save_args(masm, total_c_args, c_arg, out_regs); ++ int metadata_index = __ oop_recorder()->find_index(method()); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_li52(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ thread, AT); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ } ++ ++ // These are register definitions we need for locking/unlocking ++ const Register swap_reg = T8; // Must use T8 for cmpxchg instruction ++ const Register obj_reg = T4; // Will contain the oop ++ //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) ++ const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) ++ ++ ++ ++ Label slow_path_lock; ++ Label lock_done; ++ ++ // Lock a synchronized method ++ if (method->is_synchronized()) { ++ assert(!is_critical_native, "unhandled"); ++ ++ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Get the handle (the 2nd argument) ++ __ move(oop_handle_reg, A1); ++ ++ // Get address of the box ++ __ lea(lock_reg, Address(FP, lock_slot_fp_offset)); ++ ++ // Load the oop from the handle ++ __ ld_d(obj_reg, oop_handle_reg, 0); ++ ++ if (UseBiasedLocking) { ++ // Note that oop_handle_reg is trashed during this call ++ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock); ++ } ++ ++ // Load immediate 1 into swap_reg %T8 ++ __ li(swap_reg, 1); ++ ++ __ ld_d(AT, obj_reg, 0); ++ __ orr(swap_reg, swap_reg, AT); ++ ++ __ st_d(swap_reg, lock_reg, mark_word_offset); ++ __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done); ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg ++ ++ __ sub_d(swap_reg, swap_reg, SP); ++ __ li(AT, 3 - os::vm_page_size()); ++ __ andr(swap_reg , swap_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ __ st_d(swap_reg, lock_reg, mark_word_offset); ++ __ bne(swap_reg, R0, slow_path_lock); ++ // Slow path will re-enter here ++ __ bind(lock_done); ++ ++ if (UseBiasedLocking) { ++ // Re-fetch oop_handle_reg as we trashed it above ++ __ move(A1, oop_handle_reg); ++ } ++ } ++ ++ ++ // Finally just about ready to make the JNI call ++ ++ ++ // get JNIEnv* which is first argument to native ++ if (!is_critical_native) { ++ __ addi_d(A0, thread, in_bytes(JavaThread::jni_environment_offset())); ++ } ++ ++ // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob) ++ // Load the second arguments into A1 ++ //__ ld(A1, SP , wordSize ); // klass ++ ++ // Now set thread in native ++ __ addi_d(AT, R0, _thread_in_native); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ // do the call ++ __ call(native_func, relocInfo::runtime_call_type); ++ __ bind(native_return); ++ ++ oop_maps->add_gc_map(((intptr_t)__ pc()) - start, map); ++ ++ // WARNING - on Windows Java Natives use pascal calling convention and pop the ++ // arguments off of the stack. We could just re-adjust the stack pointer here ++ // and continue to do SP relative addressing but we instead switch to FP ++ // relative addressing. ++ ++ // Unpack native results. ++ switch (ret_type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ bstrpick_d(V0, V0, 15, 0); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : // nothing to do break; ++ case T_DOUBLE : ++ case T_FLOAT : ++ // Result is in st0 we'll save as needed ++ break; ++ case T_ARRAY: // Really a handle ++ case T_OBJECT: // Really a handle ++ break; // can't de-handlize until after safepoint check ++ case T_VOID: break; ++ case T_LONG: break; ++ default : ShouldNotReachHere(); ++ } ++ // Switch thread to "native transition" state before reading the synchronization state. ++ // This additional state is necessary because reading and testing the synchronization ++ // state is not atomic w.r.t. GC, as this scenario demonstrates: ++ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. ++ // VM thread changes sync state to synchronizing and suspends threads for GC. ++ // Thread A is resumed to finish this native method, but doesn't block here since it ++ // didn't see any synchronization is progress, and escapes. ++ __ addi_d(AT, R0, _thread_in_native_trans); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) { ++ if (UseMembar) { ++ // Force this write out before the read below ++ __ membar(__ AnyAny); ++ } else { ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(thread, T5); ++ } ++ } ++ ++ Label after_transition; ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { ++ Label Continue; ++ Label slow_path; ++ ++ __ safepoint_poll_acquire(slow_path, thread); ++ __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ bind(slow_path); ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // ++ save_native_result(masm, ret_type, stack_slots); ++ __ move(A0, thread); ++ __ addi_d(SP, SP, -wordSize); ++ __ push(S2); ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ if (!is_critical_native) { ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); ++ } else { ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type); ++ } ++ __ move(SP, S2); // use S2 as a sender SP holder ++ __ pop(S2); ++ __ addi_d(SP, SP, wordSize); ++ // Restore any method result value ++ restore_native_result(masm, ret_type, stack_slots); ++ ++ if (is_critical_native) { ++ // The call above performed the transition to thread_in_Java so ++ // skip the transition logic below. ++ __ beq(R0, R0, after_transition); ++ } ++ ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ addi_d(AT, R0, _thread_in_Java); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ bind(after_transition); ++ Label reguard; ++ Label reguard_done; ++ __ ld_w(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ addi_d(AT, AT, -JavaThread::stack_guard_yellow_reserved_disabled); ++ __ beq(AT, R0, reguard); ++ // slow path reguard re-enters here ++ __ bind(reguard_done); ++ ++ // Handle possible exception (will unlock if necessary) ++ ++ // native result if any is live ++ ++ // Unlock ++ Label slow_path_unlock; ++ Label unlock_done; ++ if (method->is_synchronized()) { ++ ++ Label done; ++ ++ // Get locked oop from the handle we passed to jni ++ __ ld_d( obj_reg, oop_handle_reg, 0); ++ if (UseBiasedLocking) { ++ __ biased_locking_exit(obj_reg, T8, done); ++ ++ } ++ ++ // Simple recursive lock? ++ ++ __ ld_d(AT, FP, lock_slot_fp_offset); ++ __ beq(AT, R0, done); ++ // Must save FSF if if it is live now because cmpxchg must use it ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // get old displaced header ++ __ ld_d (T8, FP, lock_slot_fp_offset); ++ // get address of the stack lock ++ __ addi_d (c_rarg0, FP, lock_slot_fp_offset); ++ // Atomic swap old header if oop still contains the stack lock ++ __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock); ++ ++ // slow path re-enters here ++ __ bind(unlock_done); ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ __ bind(done); ++ ++ } ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ // Tell dtrace about this method exit ++ save_native_result(masm, ret_type, stack_slots); ++ int metadata_index = __ oop_recorder()->find_index( (method())); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_li52(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ thread, AT); ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // We can finally stop using that last_Java_frame we setup ages ago ++ ++ __ reset_last_Java_frame(false); ++ ++ // Unpack oop result, e.g. JNIHandles::resolve value. ++ if (ret_type == T_OBJECT || ret_type == T_ARRAY) { ++ __ resolve_jobject(V0, thread, T4); ++ } ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ st_d(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ if (!is_critical_native) { ++ // reset handle block ++ __ ld_d(AT, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ st_w(R0, AT, JNIHandleBlock::top_offset_in_bytes()); ++ } ++ ++ if (!is_critical_native) { ++ // Any exception pending? ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, exception_pending); ++ } ++ // no exception, we're almost done ++ ++ // check that only result value is on FPU stack ++ __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); ++ ++ // Return ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ leave(); ++ ++ __ jr(RA); ++ // Unexpected paths are out of line and go here ++ // Slow path locking & unlocking ++ if (method->is_synchronized()) { ++ ++ // BEGIN Slow path lock ++ __ bind(slow_path_lock); ++ ++ // protect the args we've loaded ++ save_args(masm, total_c_args, c_arg, out_regs); ++ ++ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM ++ // args are (oop obj, BasicLock* lock, JavaThread* thread) ++ ++ __ move(A0, obj_reg); ++ __ move(A1, lock_reg); ++ __ move(A2, thread); ++ __ addi_d(SP, SP, - 3*wordSize); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); ++ __ move(SP, S2); ++ __ addi_d(SP, SP, 3*wordSize); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ stop("no pending exception allowed on exit from monitorenter"); ++ __ bind(L); ++ } ++#endif ++ __ b(lock_done); ++ // END Slow path lock ++ ++ // BEGIN Slow path unlock ++ __ bind(slow_path_unlock); ++ ++ // Slow path unlock ++ ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ // Save pending exception around call to VM (which contains an EXCEPTION_MARK) ++ ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ push(AT); ++ __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ ++ // should be a peal ++ // +wordSize because of the push above ++ __ addi_d(A1, FP, lock_slot_fp_offset); ++ ++ __ move(A0, obj_reg); ++ __ move(A2, thread); ++ __ addi_d(SP, SP, -2*wordSize); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), ++ relocInfo::runtime_call_type); ++ __ addi_d(SP, SP, 2*wordSize); ++ __ move(SP, S2); ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_d( AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); ++ __ bind(L); ++ } ++#endif /* ASSERT */ ++ ++ __ pop(AT); ++ __ st_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ __ b(unlock_done); ++ // END Slow path unlock ++ ++ } ++ ++ // SLOW PATH Reguard the stack if needed ++ ++ __ bind(reguard); ++ save_native_result(masm, ret_type, stack_slots); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), ++ relocInfo::runtime_call_type); ++ restore_native_result(masm, ret_type, stack_slots); ++ __ b(reguard_done); ++ ++ // BEGIN EXCEPTION PROCESSING ++ if (!is_critical_native) { ++ // Forward the exception ++ __ bind(exception_pending); ++ ++ // remove possible return value from FPU register stack ++ __ empty_FPU_stack(); ++ ++ // pop our frame ++ //forward_exception_entry need return address on stack ++ __ move(SP, FP); ++ __ pop(FP); ++ ++ // and forward the exception ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ } ++ __ flush(); ++ ++ nmethod *nm = nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), ++ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), ++ oop_maps); ++ ++ if (is_critical_native) { ++ nm->set_lazy_critical_native(true); ++ } ++ return nm; ++} ++ ++#ifdef HAVE_DTRACE_H ++// --------------------------------------------------------------------------- ++// Generate a dtrace nmethod for a given signature. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// abi and then leaves nops at the position you would expect to call a native ++// function. When the probe is enabled the nops are replaced with a trap ++// instruction that dtrace inserts and the trace will cause a notification ++// to dtrace. ++// ++// The probes are only able to take primitive types and java/lang/String as ++// arguments. No other java types are allowed. Strings are converted to utf8 ++// strings so that from dtrace point of view java strings are converted to C ++// strings. There is an arbitrary fixed limit on the total space that a method ++// can use for converting the strings. (256 chars per string in the signature). ++// So any java string larger then this is truncated. ++ ++static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; ++static bool offsets_initialized = false; ++ ++static VMRegPair reg64_to_VMRegPair(Register r) { ++ VMRegPair ret; ++ if (wordSize == 8) { ++ ret.set2(r->as_VMReg()); ++ } else { ++ ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); ++ } ++ return ret; ++} ++ ++ ++nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm, ++ methodHandle method) { ++ ++ ++ // generate_dtrace_nmethod is guarded by a mutex so we are sure to ++ // be single threaded in this method. ++ assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); ++ ++ // Fill in the signature array, for the calling-convention call. ++ int total_args_passed = method->size_of_parameters(); ++ ++ BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); ++ VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); ++ ++ // The signature we are going to use for the trap that dtrace will see ++ // java/lang/String is converted. We drop "this" and any other object ++ // is converted to NULL. (A one-slot java/lang/Long object reference ++ // is converted to a two-slot long, which is why we double the allocation). ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); ++ ++ int i=0; ++ int total_strings = 0; ++ int first_arg_to_pass = 0; ++ int total_c_args = 0; ++ ++ // Skip the receiver as dtrace doesn't want to see it ++ if( !method->is_static() ) { ++ in_sig_bt[i++] = T_OBJECT; ++ first_arg_to_pass = 1; ++ } ++ ++ SignatureStream ss(method->signature()); ++ for ( ; !ss.at_return_type(); ss.next()) { ++ BasicType bt = ss.type(); ++ in_sig_bt[i++] = bt; // Collect remaining bits of signature ++ out_sig_bt[total_c_args++] = bt; ++ if( bt == T_OBJECT) { ++ symbolOop s = ss.as_symbol_or_null(); ++ if (s == vmSymbols::java_lang_String()) { ++ total_strings++; ++ out_sig_bt[total_c_args-1] = T_ADDRESS; ++ } else if (s == vmSymbols::java_lang_Boolean() || ++ s == vmSymbols::java_lang_Byte()) { ++ out_sig_bt[total_c_args-1] = T_BYTE; ++ } else if (s == vmSymbols::java_lang_Character() || ++ s == vmSymbols::java_lang_Short()) { ++ out_sig_bt[total_c_args-1] = T_SHORT; ++ } else if (s == vmSymbols::java_lang_Integer() || ++ s == vmSymbols::java_lang_Float()) { ++ out_sig_bt[total_c_args-1] = T_INT; ++ } else if (s == vmSymbols::java_lang_Long() || ++ s == vmSymbols::java_lang_Double()) { ++ out_sig_bt[total_c_args-1] = T_LONG; ++ out_sig_bt[total_c_args++] = T_VOID; ++ } ++ } else if ( bt == T_LONG || bt == T_DOUBLE ) { ++ in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots ++ // We convert double to long ++ out_sig_bt[total_c_args-1] = T_LONG; ++ out_sig_bt[total_c_args++] = T_VOID; ++ } else if ( bt == T_FLOAT) { ++ // We convert float to int ++ out_sig_bt[total_c_args-1] = T_INT; ++ } ++ } ++ ++ assert(i==total_args_passed, "validly parsed signature"); ++ ++ // Now get the compiled-Java layout as input arguments ++ int comp_args_on_stack; ++ comp_args_on_stack = SharedRuntime::java_calling_convention( ++ in_sig_bt, in_regs, total_args_passed, false); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the a native (non-jni) function would expect them. To figure out ++ // where they go we convert the java signature to a C signature and remove ++ // T_VOID for any long/double we might have received. ++ ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Plus a temp for possible converion of float/double/long register args ++ ++ int conversion_temp = stack_slots; ++ stack_slots += 2; ++ ++ ++ // Now space for the string(s) we must convert ++ ++ int string_locs = stack_slots; ++ stack_slots += total_strings * ++ (max_dtrace_string_size / VMRegImpl::stack_slot_size); ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // |---------------------| ++ // | string[n] | ++ // |---------------------| <- string_locs[n] ++ // | string[n-1] | ++ // |---------------------| <- string_locs[n-1] ++ // | ... | ++ // | ... | ++ // |---------------------| <- string_locs[1] ++ // | string[0] | ++ // |---------------------| <- string_locs[0] ++ // | temp | ++ // |---------------------| <- conversion_temp ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ intptr_t start = (intptr_t)__ pc(); ++ ++ // First thing make an ic check to see if we should even be here ++ ++ { ++ Label L; ++ const Register temp_reg = G3_scratch; ++ Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub()); ++ __ verify_oop(O0); ++ __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); ++ __ cmp(temp_reg, G5_inline_cache_reg); ++ __ brx(Assembler::equal, true, Assembler::pt, L); ++ ++ __ jump_to(ic_miss, 0); ++ __ align(CodeEntryAlignment); ++ __ bind(L); ++ } ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ // The instruction at the verified entry point must be 4 bytes or longer ++ // because it can be patched on the fly by make_non_entrant. The stack bang ++ // instruction fits that requirement. ++ ++ // Generate stack overflow check before creating frame ++ __ generate_stack_overflow_check(stack_size); ++ ++ assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord, ++ "valid size for make_non_entrant"); ++ ++ // Generate a new frame for the wrapper. ++ __ save(SP, -stack_size, SP); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ VMRegPair zero; ++ const Register g0 = G0; // without this we get a compiler warning (why??) ++ zero.set2(g0->as_VMReg()); ++ ++ int c_arg, j_arg; ++ ++ Register conversion_off = noreg; ++ ++ for (j_arg = first_arg_to_pass, c_arg = 0 ; ++ j_arg < total_args_passed ; j_arg++, c_arg++ ) { ++ ++ VMRegPair src = in_regs[j_arg]; ++ VMRegPair dst = out_regs[c_arg]; ++ ++#ifdef ASSERT ++ if (src.first()->is_Register()) { ++ assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); ++ } else if (src.first()->is_FloatRegister()) { ++ assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( ++ FloatRegisterImpl::S)], "ack!"); ++ } ++ if (dst.first()->is_Register()) { ++ reg_destroyed[dst.first()->as_Register()->encoding()] = true; ++ } else if (dst.first()->is_FloatRegister()) { ++ freg_destroyed[dst.first()->as_FloatRegister()->encoding( ++ FloatRegisterImpl::S)] = true; ++ } ++#endif /* ASSERT */ ++ ++ switch (in_sig_bt[j_arg]) { ++ case T_ARRAY: ++ case T_OBJECT: ++ { ++ if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || ++ out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { ++ // need to unbox a one-slot value ++ Register in_reg = L0; ++ Register tmp = L2; ++ if ( src.first()->is_reg() ) { ++ in_reg = src.first()->as_Register(); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), ++ "must be"); ++ __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); ++ } ++ // If the final destination is an acceptable register ++ if ( dst.first()->is_reg() ) { ++ if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { ++ tmp = dst.first()->as_Register(); ++ } ++ } ++ ++ Label skipUnbox; ++ if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { ++ __ mov(G0, tmp->successor()); ++ } ++ __ mov(G0, tmp); ++ __ br_null(in_reg, true, Assembler::pn, skipUnbox); ++ ++ BasicType bt = out_sig_bt[c_arg]; ++ int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); ++ switch (bt) { ++ case T_BYTE: ++ __ ldub(in_reg, box_offset, tmp); break; ++ case T_SHORT: ++ __ lduh(in_reg, box_offset, tmp); break; ++ case T_INT: ++ __ ld(in_reg, box_offset, tmp); break; ++ case T_LONG: ++ __ ld_long(in_reg, box_offset, tmp); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ bind(skipUnbox); ++ // If tmp wasn't final destination copy to final destination ++ if (tmp == L2) { ++ VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); ++ if (out_sig_bt[c_arg] == T_LONG) { ++ long_move(masm, tmp_as_VM, dst); ++ } else { ++ move32_64(masm, tmp_as_VM, out_regs[c_arg]); ++ } ++ } ++ if (out_sig_bt[c_arg] == T_LONG) { ++ assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); ++ ++c_arg; // move over the T_VOID to keep the loop indices in sync ++ } ++ } else if (out_sig_bt[c_arg] == T_ADDRESS) { ++ Register s = ++ src.first()->is_reg() ? src.first()->as_Register() : L2; ++ Register d = ++ dst.first()->is_reg() ? dst.first()->as_Register() : L2; ++ ++ // We store the oop now so that the conversion pass can reach ++ // while in the inner frame. This will be the only store if ++ // the oop is NULL. ++ if (s != L2) { ++ // src is register ++ if (d != L2) { ++ // dst is register ++ __ mov(s, d); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } else { ++ // src not a register ++ assert(Assembler::is_simm13(reg2offset(src.first()) + ++ STACK_BIAS), "must be"); ++ __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); ++ if (d == L2) { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } ++ } else if (out_sig_bt[c_arg] != T_VOID) { ++ // Convert the arg to NULL ++ if (dst.first()->is_reg()) { ++ __ mov(G0, dst.first()->as_Register()); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } ++ } ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ if (src.first()->is_stack()) { ++ // Stack to stack/reg is simple ++ move32_64(masm, src, dst); ++ } else { ++ if (dst.first()->is_reg()) { ++ // freg -> reg ++ int off = ++ STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ Register d = dst.first()->as_Register(); ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, off); ++ __ ld(SP, off, d); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ __ ld(SP, conversion_off , d); ++ } ++ } else { ++ // freg -> mem ++ int off = STACK_BIAS + reg2offset(dst.first()); ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, off); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ } ++ } ++ } ++ break; ++ ++ case T_DOUBLE: ++ assert( j_arg + 1 < total_args_passed && ++ in_sig_bt[j_arg + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ if (src.first()->is_stack()) { ++ // Stack to stack/reg is simple ++ long_move(masm, src, dst); ++ } else { ++ Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; ++ ++ // Destination could be an odd reg on 32bit in which case ++ // we can't load direct to the destination. ++ ++ if (!d->is_even() && wordSize == 4) { ++ d = L2; ++ } ++ int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), ++ SP, off); ++ __ ld_long(SP, off, d); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ __ ld_long(SP, conversion_off, d); ++ } ++ if (d == L2) { ++ long_move(masm, reg64_to_VMRegPair(L2), dst); ++ } ++ } ++ break; ++ ++ case T_LONG : ++ // 32bit can't do a split move of something like g1 -> O0, O1 ++ // so use a memory temp ++ if (src.is_single_phys_reg() && wordSize == 4) { ++ Register tmp = L2; ++ if (dst.first()->is_reg() && ++ (wordSize == 8 || dst.first()->as_Register()->is_even())) { ++ tmp = dst.first()->as_Register(); ++ } ++ ++ int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ if (Assembler::is_simm13(off)) { ++ __ stx(src.first()->as_Register(), SP, off); ++ __ ld_long(SP, off, tmp); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stx(src.first()->as_Register(), SP, conversion_off); ++ __ ld_long(SP, conversion_off, tmp); ++ } ++ ++ if (tmp == L2) { ++ long_move(masm, reg64_to_VMRegPair(L2), dst); ++ } ++ } else { ++ long_move(masm, src, dst); ++ } ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ move32_64(masm, src, dst); ++ } ++ } ++ ++ ++ // If we have any strings we must store any register based arg to the stack ++ // This includes any still live xmm registers too. ++ ++ if (total_strings > 0 ) { ++ ++ // protect all the arg registers ++ __ save_frame(0); ++ __ mov(G2_thread, L7_thread_cache); ++ const Register L2_string_off = L2; ++ ++ // Get first string offset ++ __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); ++ ++ for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { ++ if (out_sig_bt[c_arg] == T_ADDRESS) { ++ ++ VMRegPair dst = out_regs[c_arg]; ++ const Register d = dst.first()->is_reg() ? ++ dst.first()->as_Register()->after_save() : noreg; ++ ++ // It's a string the oop and it was already copied to the out arg ++ // position ++ if (d != noreg) { ++ __ mov(d, O0); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), ++ "must be"); ++ __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); ++ } ++ Label skip; ++ ++ __ add_d(FP, L2_string_off, O1); ++ __ br_null(O0, false, Assembler::pn, skip); ++ ++ if (d != noreg) { ++ __ mov(O1, d); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), ++ "must be"); ++ __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ ++ __ addi_d(L2_string_off, max_dtrace_string_size, L2_string_off); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), ++ relocInfo::runtime_call_type); ++ ++ __ bind(skip); ++ ++ } ++ ++ } ++ __ mov(L7_thread_cache, G2_thread); ++ __ restore(); ++ ++ } ++ ++ ++ // Ok now we are done. Need to place the nop that dtrace wants in order to ++ // patch in the trap ++ ++ int patch_offset = ((intptr_t)__ pc()) - start; ++ ++ __ nop(); ++ ++ ++ // Return ++ ++ __ restore(); ++ __ ret(); ++ ++ __ flush(); ++ nmethod *nm = nmethod::new_dtrace_nmethod( ++ method, masm->code(), vep_offset, patch_offset, frame_complete, ++ stack_slots / VMRegImpl::slots_per_word); ++ return nm; ++} ++ ++#endif // HAVE_DTRACE_H ++ ++// this function returns the adjust size (in number of words) to a c2i adapter ++// activation for use during deoptimization ++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { ++ return (callee_locals - callee_parameters) * Interpreter::stackElementWords; ++} ++ ++// "Top of Stack" slots that may be unused by the calling convention but must ++// otherwise be preserved. ++// On Intel these are not necessary and the value can be zero. ++// On Sparc this describes the words reserved for storing a register window ++// when an interrupt occurs. ++uint SharedRuntime::out_preserve_stack_slots() { ++ return 0; ++} ++ ++//------------------------------generate_deopt_blob---------------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_deopt_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ int pad = 0; ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ pad += 512; // Increase the buffer size when compiling for JVMCI ++ } ++#endif ++ //CodeBuffer buffer ("deopt_blob", 4000, 2048); ++ CodeBuffer buffer ("deopt_blob", 8000+pad, 2048); // FIXME for debug ++ MacroAssembler* masm = new MacroAssembler( & buffer); ++ int frame_size_in_words; ++ OopMap* map = NULL; ++ // Account for the extra args we place on the stack ++ // by the time we call fetch_unroll_info ++ const int additional_words = 2; // deopt kind, thread ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ RegisterSaver reg_save(COMPILER2_OR_JVMCI != 0); ++ ++ address start = __ pc(); ++ Label cont; ++ // we use S3 for DeOpt reason register ++ Register reason = S3; ++ // use S6 for thread register ++ Register thread = TREG; ++ // use S7 for fetch_unroll_info returned UnrollBlock ++ Register unroll = S7; ++ // Prolog for non exception case! ++ ++ // We have been called from the deopt handler of the deoptee. ++ // ++ // deoptee: ++ // ... ++ // call X ++ // ... ++ // deopt_handler: call_deopt_stub ++ // cur. return pc --> ... ++ // ++ // So currently RA points behind the call in the deopt handler. ++ // We adjust it such that it points to the start of the deopt handler. ++ // The return_pc has been stored in the frame of the deoptee and ++ // will replace the address of the deopt_handler in the call ++ // to Deoptimization::fetch_unroll_info below. ++ ++ // HandlerImpl::size_deopt_handler() ++ __ addi_d(RA, RA, - NativeFarCall::instruction_size); ++ // Save everything in sight. ++ map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ // Normal deoptimization ++ __ li(reason, Deoptimization::Unpack_deopt); ++ __ b(cont); ++ ++ int reexecute_offset = __ pc() - start; ++#if INCLUDE_JVMCI && !defined(COMPILER1) ++ if (EnableJVMCI && UseJVMCICompiler) { ++ // JVMCI does not use this kind of deoptimization ++ __ should_not_reach_here(); ++ } ++#endif ++ ++ // Reexecute case ++ // return address is the pc describes what bci to do re-execute at ++ ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ __ li(reason, Deoptimization::Unpack_reexecute); ++ __ b(cont); ++ ++#if INCLUDE_JVMCI ++ Label after_fetch_unroll_info_call; ++ int implicit_exception_uncommon_trap_offset = 0; ++ int uncommon_trap_offset = 0; ++ ++ if (EnableJVMCI) { ++ implicit_exception_uncommon_trap_offset = __ pc() - start; ++ ++ __ ld_d(RA, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); ++ __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); ++ ++ uncommon_trap_offset = __ pc() - start; ++ ++ // Save everything in sight. ++ (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ __ addi_d(SP, SP, -additional_words * wordSize); ++ // fetch_unroll_info needs to call last_java_frame() ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, NOREG, retaddr); ++ ++ __ ld_w(A1, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset()))); ++ __ li(AT, -1); ++ __ st_w(AT, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset()))); ++ ++ __ li(reason, (int32_t)Deoptimization::Unpack_reexecute); ++ __ move(A0, TREG); ++ __ move(A2, reason); // exec mode ++ __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ oop_maps->add_gc_map( __ pc()-start, map->deep_copy()); ++ __ addi_d(SP, SP, additional_words * wordSize); ++ ++ __ reset_last_Java_frame(false); ++ ++ __ b(after_fetch_unroll_info_call); ++ } // EnableJVMCI ++#endif // INCLUDE_JVMCI ++ ++ int exception_offset = __ pc() - start; ++ // Prolog for exception case ++ ++ // all registers are dead at this entry point, except for V0 and ++ // V1 which contain the exception oop and exception pc ++ // respectively. Set them in TLS and fall thru to the ++ // unpack_with_exception_in_tls entry point. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ int exception_in_tls_offset = __ pc() - start; ++ // new implementation because exception oop is now passed in JavaThread ++ ++ // Prolog for exception case ++ // All registers must be preserved because they might be used by LinearScan ++ // Exceptiop oop and throwing PC are passed in JavaThread ++ // tos: stack at point of call to method that threw the exception (i.e. only ++ // args are on the stack, no return address) ++ ++ // Return address will be patched later with the throwing pc. The correct value is not ++ // available now because loading it from memory would destroy registers. ++ // Save everything in sight. ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ ++ // Now it is safe to overwrite any register ++ // store the correct deoptimization type ++ __ li(reason, Deoptimization::Unpack_exception); ++ // load throwing pc from JavaThread and patch it as the return address ++ // of the current frame. Then clear the field in JavaThread ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V1, SP, reg_save.ra_offset()); //save ra ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ ++ ++#ifdef ASSERT ++ // verify that there is really an exception oop in JavaThread ++ __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ verify_oop(AT); ++ // verify that there is no pending exception ++ Label no_pending_exception; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, no_pending_exception); ++ __ stop("must not have pending exception here"); ++ __ bind(no_pending_exception); ++#endif ++ __ bind(cont); ++ // Compiled code leaves the floating point stack dirty, empty it. ++ __ empty_FPU_stack(); ++ ++ ++ // Call C code. Need thread and this frame, but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ move(A0, thread); ++ __ move(A1, reason); // exec_mode ++ __ addi_d(SP, SP, -additional_words * wordSize); ++ ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, NOREG, retaddr); ++ ++ // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. Call should capture return values. ++ ++ // TODO: confirm reloc ++ __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ oop_maps->add_gc_map(__ pc() - start, map); ++ __ addi_d(SP, SP, additional_words * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ __ bind(after_fetch_unroll_info_call); ++ } ++#endif ++ ++ // Load UnrollBlock into S7 ++ __ move(unroll, V0); ++ ++ ++ // Move the unpack kind to a safe place in the UnrollBlock because ++ // we are very short of registers ++ ++ Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ st_w(reason, unpack_kind); ++ // save the unpack_kind value ++ // Retrieve the possible live values (return values) ++ // All callee save registers representing jvm state ++ // are now in the vframeArray. ++ ++ Label noException; ++ __ li(AT, Deoptimization::Unpack_exception); ++ __ bne(AT, reason, noException);// Was exception pending? ++ __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ ++ __ verify_oop(V0); ++ ++ // Overwrite the result registers with the exception results. ++ __ st_ptr(V0, SP, reg_save.v0_offset()); ++ __ st_ptr(V1, SP, reg_save.v1_offset()); ++ ++ __ bind(noException); ++ ++ ++ // Stack is back to only having register save data on the stack. ++ // Now restore the result registers. Everything else is either dead or captured ++ // in the vframeArray. ++ ++ reg_save.restore_result_registers(masm); ++ // All of the register save area has been popped of the stack. Only the ++ // return address remains. ++ // Pop all the frames we must move/replace. ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: caller of deopting frame (could be compiled/interpreted). ++ // ++ // Note: by leaving the return address of self-frame on the stack ++ // and using the size of frame 2 to adjust the stack ++ // when we are done the return to frame 3 will still be on the stack. ++ ++ // register for the sender's sp ++ Register sender_sp = Rsender; ++ // register for frame pcs ++ Register pcs = T0; ++ // register for frame sizes ++ Register sizes = T1; ++ // register for frame count ++ Register count = T3; ++ ++ // Pop deoptimized frame ++ __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ add_d(SP, SP, AT); ++ // sp should be pointing at the return address to the caller (3) ++ ++ // Load array of frame pcs into pcs ++ __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ __ addi_d(SP, SP, wordSize); // trash the old pc ++ // Load array of frame sizes into T6 ++ __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ ++ ++ ++ // Load count of frams into T3 ++ __ ld_w(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ // Pick up the initial fp we should save ++ __ ld_d(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ __ move(sender_sp, SP); ++ __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ sub_d(SP, SP, AT); ++ ++ Label loop; ++ __ bind(loop); ++ __ ld_d(T2, sizes, 0); // Load frame size ++ __ ld_ptr(AT, pcs, 0); // save return address ++ __ addi_d(T2, T2, -2 * wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ sub_d(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ addi_d(count, count, -1); // decrement counter ++ __ addi_d(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ addi_d(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ bne(count, R0, loop); ++ __ ld_d(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); ++ // Re-push self-frame ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ __ addi_d(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); ++ ++ // Restore frame locals after moving the frame ++ __ st_d(V0, SP, reg_save.v0_offset()); ++ __ st_d(V1, SP, reg_save.v1_offset()); ++ __ fst_d(F0, SP, reg_save.fpr0_offset()); ++ __ fst_d(F1, SP, reg_save.fpr1_offset()); ++ ++ // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. ++ __ move(A1, reason); // exec_mode ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(A0, thread); // thread ++ __ addi_d(SP, SP, (-additional_words) *wordSize); ++ ++ // set last_Java_sp, last_Java_fp ++ Label L; ++ address the_pc = __ pc(); ++ __ bind(L); ++ __ set_last_Java_frame(NOREG, FP, L); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); ++ // Revert SP alignment after call since we're going to do some SP relative addressing below ++ __ ld_d(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map(the_pc - start, new OopMap(frame_size_in_words, 0)); ++ ++ __ push(V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(true); ++ ++ // Collect return values ++ __ ld_d(V0, SP, reg_save.v0_offset() + (additional_words + 1) * wordSize); ++ __ ld_d(V1, SP, reg_save.v1_offset() + (additional_words + 1) * wordSize); ++ // Pop float stack and store in local ++ __ fld_d(F0, SP, reg_save.fpr0_offset() + (additional_words + 1) * wordSize); ++ __ fld_d(F1, SP, reg_save.fpr1_offset() + (additional_words + 1) * wordSize); ++ ++ //FIXME, ++ // Clear floating point stack before returning to interpreter ++ __ empty_FPU_stack(); ++ //FIXME, we should consider about float and double ++ // Push a float or double return value if necessary. ++ __ leave(); ++ ++ // Jump to interpreter ++ __ jr(RA); ++ ++ masm->flush(); ++ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); ++ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); ++ _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); ++ } ++#endif ++} ++ ++#ifdef COMPILER2 ++ ++//------------------------------generate_uncommon_trap_blob-------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_uncommon_trap_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ enum frame_layout { ++ fp_off, fp_off2, ++ return_off, return_off2, ++ framesize ++ }; ++ assert(framesize % 4 == 0, "sp not 16-byte aligned"); ++ address start = __ pc(); ++ ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ // Push self-frame. ++ __ addi_d(SP, SP, -framesize * BytesPerInt); ++ ++ __ st_d(RA, SP, return_off * BytesPerInt); ++ __ st_d(FP, SP, fp_off * BytesPerInt); ++ ++ __ addi_d(FP, SP, fp_off * BytesPerInt); ++ ++ // Clear the floating point exception stack ++ __ empty_FPU_stack(); ++ ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // set last_Java_sp ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, FP, retaddr); ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // capture callee-saved registers as well as return values. ++ __ move(A0, thread); ++ // argument already in T0 ++ __ move(A1, T0); ++ __ addi_d(A2, R0, Deoptimization::Unpack_uncommon_trap); ++ __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap( framesize, 0 ); ++ ++ oop_maps->add_gc_map(__ pc() - start, map); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock into S7 ++ Register unroll = S7; ++ __ move(unroll, V0); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ li(T4, Deoptimization::Unpack_uncommon_trap); ++ __ beq(AT, T4, L); ++ __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); ++ __ bind(L); ++ } ++#endif ++ ++ // Pop all the frames we must move/replace. ++ // ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: possible-i2c-adapter-frame ++ // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an ++ // and c2i here) ++ ++ __ addi_d(SP, SP, framesize * BytesPerInt); ++ ++ // Pop deoptimized frame ++ __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ add_d(SP, SP, AT); ++ ++ // register for frame pcs ++ Register pcs = T8; ++ // register for frame sizes ++ Register sizes = T4; ++ // register for frame count ++ Register count = T3; ++ // register for the sender's sp ++ Register sender_sp = T1; ++ ++ // sp should be pointing at the return address to the caller (4) ++ // Load array of frame pcs ++ __ ld_d(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ ++ // Load array of frame sizes ++ __ ld_d(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ __ ld_wu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ ++ // Pick up the initial fp we should save ++ __ ld_d(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ ++ __ move(sender_sp, SP); ++ __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ sub_d(SP, SP, AT); ++ // Push interpreter frames in a loop ++ Label loop; ++ __ bind(loop); ++ __ ld_d(T2, sizes, 0); // Load frame size ++ __ ld_d(AT, pcs, 0); // save return address ++ __ addi_d(T2, T2, -2*wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ sub_d(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ addi_d(count, count, -1); // decrement counter ++ __ addi_d(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ addi_d(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ bne(count, R0, loop); ++ ++ __ ld_d(RA, pcs, 0); ++ ++ // Re-push self-frame ++ // save old & set new FP ++ // save final return address ++ __ enter(); ++ ++ // Use FP because the frames look interpreted now ++ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. ++ // Don't need the precise return PC here, just precise enough to point into this code blob. ++ Label L; ++ address the_pc = __ pc(); ++ __ bind(L); ++ __ set_last_Java_frame(NOREG, FP, L); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // restore return values to their stack-slots with the new SP. ++ __ move(A0, thread); ++ __ li(A1, Deoptimization::Unpack_uncommon_trap); ++ __ call((address)Deoptimization::unpack_frames, relocInfo::runtime_call_type); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0)); ++ ++ __ reset_last_Java_frame(true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // Jump to interpreter ++ __ jr(RA); ++ // ------------- ++ // make sure all code is generated ++ masm->flush(); ++ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2); ++} ++ ++#endif // COMPILER2 ++ ++//------------------------------generate_handler_blob------------------- ++// ++// Generate a special Compile2Runtime blob that saves all registers, and sets ++// up an OopMap and calls safepoint code to stop the compiled code for ++// a safepoint. ++// ++// This blob is jumped to (via a breakpoint and the signal handler) from a ++// safepoint in compiled code. ++ ++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { ++ ++ // Account for thread arg in our frame ++ const int additional_words = 0; ++ int frame_size_in_words; ++ ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ ResourceMark rm; ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map; ++ ++ // allocate space for the code ++ // setup code generation tools ++ CodeBuffer buffer ("handler_blob", 2048, 512); ++ MacroAssembler* masm = new MacroAssembler( &buffer); ++ ++ const Register thread = TREG; ++ address start = __ pc(); ++ bool cause_return = (poll_type == POLL_AT_RETURN); ++ RegisterSaver reg_save(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // The following is basically a call_VM. However, we need the precise ++ // address of the call in order to generate an oopmap. Hence, we do all the ++ // work outselvs. ++ ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, NOREG, retaddr); ++ ++ if (!cause_return) { ++ // overwrite the return address pushed by save_live_registers ++ // Additionally, TSR is a callee-saved register so we can look at ++ // it later to determine if someone changed the return address for ++ // us! ++ __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ __ st_ptr(TSR, SP, reg_save.ra_offset()); ++ } ++ ++ // Do the call ++ __ move(A0, thread); ++ // TODO: confirm reloc ++ __ call(call_ptr, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ ++ // Set an oopmap for the call site. This oopmap will map all ++ // oop-registers and debug-info registers as callee-saved. This ++ // will allow deoptimization at this safepoint to find all possible ++ // debug-info recordings, as well as let GC find all oops. ++ oop_maps->add_gc_map(__ pc() - start, map); ++ ++ Label noException; ++ ++ // Clear last_Java_sp again ++ __ reset_last_Java_frame(false); ++ ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, noException); ++ ++ // Exception pending ++ ++ reg_save.restore_live_registers(masm); ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++ // TODO: confirm reloc ++ __ jmp((address)StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ ++ // No exception case ++ __ bind(noException); ++ ++ Label no_adjust, bail; ++ if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { ++ // If our stashed return pc was modified by the runtime we avoid touching it ++ __ ld_ptr(AT, SP, reg_save.ra_offset()); ++ __ bne(AT, TSR, no_adjust); ++ ++#ifdef ASSERT ++ // Verify the correct encoding of the poll we're about to skip. ++ // See NativeInstruction::is_safepoint_poll() ++ __ ld_wu(AT, TSR, 0); ++ __ push(T5); ++ __ li(T5, 0xffc0001f); ++ __ andr(AT, AT, T5); ++ __ li(T5, 0x28800013); ++ __ xorr(AT, AT, T5); ++ __ pop(T5); ++ __ bne(AT, R0, bail); ++#endif ++ // Adjust return pc forward to step over the safepoint poll instruction ++ __ addi_d(RA, TSR, 4); // NativeInstruction::instruction_size=4 ++ __ st_ptr(RA, SP, reg_save.ra_offset()); ++ } ++ ++ __ bind(no_adjust); ++ // Normal exit, register restoring and exit ++ reg_save.restore_live_registers(masm); ++ __ jr(RA); ++ ++#ifdef ASSERT ++ __ bind(bail); ++ __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); ++#endif ++ ++ // Make sure all code is generated ++ masm->flush(); ++ // Fill-out other meta info ++ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); ++} ++ ++// ++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss ++// ++// Generate a stub that calls into vm to find out the proper destination ++// of a java call. All the argument registers are live at this point ++// but since this is generic code we don't know what they are and the caller ++// must do any gc of the args. ++// ++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ // allocate space for the code ++ ResourceMark rm; ++ ++ //CodeBuffer buffer(name, 1000, 512); ++ //FIXME. code_size ++ CodeBuffer buffer(name, 2000, 2048); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ int frame_size_words; ++ RegisterSaver reg_save(false /* save_vectors */); ++ //we put the thread in A0 ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = NULL; ++ ++ address start = __ pc(); ++ map = reg_save.save_live_registers(masm, 0, &frame_size_words); ++ ++ ++ int frame_complete = __ offset(); ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ __ get_thread(thread); ++#else ++ const Register thread = TREG; ++#endif ++ ++ __ move(A0, thread); ++ Label retaddr; ++ __ set_last_Java_frame(noreg, FP, retaddr); ++ // align the stack before invoke native ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ // TODO: confirm reloc ++ __ call(destination, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ ++ // Set an oopmap for the call site. ++ // We need this not only for callee-saved registers, but also for volatile ++ // registers that the compiler might be keeping live across a safepoint. ++ oop_maps->add_gc_map(__ pc() - start, map); ++ // V0 contains the address we are going to jump to assuming no exception got installed ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // clear last_Java_sp ++ __ reset_last_Java_frame(true); ++ // check for pending exceptions ++ Label pending; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, pending); ++ // get the returned Method* ++ __ get_vm_result_2(Rmethod, thread); ++ __ st_ptr(Rmethod, SP, reg_save.s3_offset()); ++ __ st_ptr(V0, SP, reg_save.t5_offset()); ++ reg_save.restore_live_registers(masm); ++ ++ // We are back the the original state on entry and ready to go the callee method. ++ __ jr(T5); ++ // Pending exception after the safepoint ++ ++ __ bind(pending); ++ ++ reg_save.restore_live_registers(masm); ++ ++ // exception pending => remove activation and forward to exception handler ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); ++ __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ // ++ // make sure all code is generated ++ masm->flush(); ++ RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); ++ return tmp; ++} ++ ++extern "C" int SpinPause() {return 0;} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,4804 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/instanceOop.hpp" ++#include "oops/method.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++// Declaration and definition of StubGenerator (no .hpp file). ++// For a more detailed description of the stub routine structure ++// see the comment in stubRoutines.hpp ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++ ++//#ifdef PRODUCT ++//#define BLOCK_COMMENT(str) /* nothing */ ++//#else ++//#define BLOCK_COMMENT(str) __ block_comment(str) ++//#endif ++ ++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions ++ ++// Stub Code definitions ++ ++class StubGenerator: public StubCodeGenerator { ++ private: ++ ++ // This fig is not LA ABI. It is call Java from C ABI. ++ // Call stubs are used to call Java from C ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ // ... ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S0) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp ++ // 3 [ result ] <--- a1 ++ // 4 [ result_type ] <--- a2 ++ // 5 [ method ] <--- a3 ++ // 6 [ entry_point ] <--- a4 ++ // 7 [ parameters ] <--- a5 ++ // 8 [ parameter_size ] <--- a6 ++ // 9 [ thread ] <--- a7 ++ ++ // ++ // LA ABI does not save paras in sp. ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ //-22 [ F31 ] ++ // ... ++ //-15 [ F24 ] ++ //-14 [ S8 ] ++ //-13 [ thread ] ++ //-12 [ result_type ] <--- a2 ++ //-11 [ result ] <--- a1 ++ //-10 [ ] ++ // -9 [ ptr. to call wrapper ] <--- a0 ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S0) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ] <--- old sp ++ // ++ // Find a right place in the call_stub for S8. ++ // S8 will point to the starting point of Interpreter::dispatch_table(itos). ++ // It should be saved/restored before/after Java calls. ++ // ++ enum call_stub_layout { ++ RA_off = 1, ++ FP_off = 0, ++ BCP_off = -1, ++ LVP_off = -2, ++ TSR_off = -3, ++ S1_off = -4, ++ S3_off = -5, ++ S4_off = -6, ++ S5_off = -7, ++ S6_off = -8, ++ call_wrapper_off = -9, ++ result_off = -11, ++ result_type_off = -12, ++ thread_off = -13, ++ S8_off = -14, ++ F24_off = -15, ++ F25_off = -16, ++ F26_off = -17, ++ F27_off = -18, ++ F28_off = -19, ++ F29_off = -20, ++ F30_off = -21, ++ F31_off = -22, ++ total_off = F31_off, ++ }; ++ ++ address generate_call_stub(address& return_address) { ++ assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code"); ++ StubCodeMark mark(this, "StubRoutines", "call_stub"); ++ address start = __ pc(); ++ ++ // same as in generate_catch_exception()! ++ ++ // stub code ++ // save ra and fp ++ __ enter(); ++ // I think 14 is the max gap between argument and callee saved register ++ __ addi_d(SP, SP, total_off * wordSize); ++ __ st_d(BCP, FP, BCP_off * wordSize); ++ __ st_d(LVP, FP, LVP_off * wordSize); ++ __ st_d(TSR, FP, TSR_off * wordSize); ++ __ st_d(S1, FP, S1_off * wordSize); ++ __ st_d(S3, FP, S3_off * wordSize); ++ __ st_d(S4, FP, S4_off * wordSize); ++ __ st_d(S5, FP, S5_off * wordSize); ++ __ st_d(S6, FP, S6_off * wordSize); ++ __ st_d(A0, FP, call_wrapper_off * wordSize); ++ __ st_d(A1, FP, result_off * wordSize); ++ __ st_d(A2, FP, result_type_off * wordSize); ++ __ st_d(A7, FP, thread_off * wordSize); ++ __ st_d(S8, FP, S8_off * wordSize); ++ ++ __ fst_d(F24, FP, F24_off * wordSize); ++ __ fst_d(F25, FP, F25_off * wordSize); ++ __ fst_d(F26, FP, F26_off * wordSize); ++ __ fst_d(F27, FP, F27_off * wordSize); ++ __ fst_d(F28, FP, F28_off * wordSize); ++ __ fst_d(F29, FP, F29_off * wordSize); ++ __ fst_d(F30, FP, F30_off * wordSize); ++ __ fst_d(F31, FP, F31_off * wordSize); ++ ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ ++#ifdef OPT_THREAD ++ __ move(TREG, A7); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ ++#ifdef ASSERT ++ // make sure we have no pending exceptions ++ { ++ Label L; ++ __ ld_d(AT, A7, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ /* FIXME: I do not know how to realize stop in LA, do it in the future */ ++ __ stop("StubRoutines::call_stub: entered with pending exception"); ++ __ bind(L); ++ } ++#endif ++ ++ // pass parameters if any ++ // A5: parameter ++ // A6: parameter_size ++ // T0: parameter_size_tmp(--) ++ // T2: offset(++) ++ // T3: tmp ++ Label parameters_done; ++ // judge if the parameter_size equals 0 ++ __ beq(A6, R0, parameters_done); ++ __ slli_d(AT, A6, Interpreter::logStackElementSize); ++ __ sub_d(SP, SP, AT); ++ __ li(AT, -StackAlignmentInBytes); ++ __ andr(SP, SP, AT); ++ // Copy Java parameters in reverse order (receiver last) ++ // Note that the argument order is inverted in the process ++ Label loop; ++ __ move(T0, A6); ++ __ move(T2, R0); ++ __ bind(loop); ++ ++ // get parameter ++ __ alsl_d(T3, T0, A5, LogBytesPerWord - 1); ++ __ ld_d(AT, T3, -wordSize); ++ __ alsl_d(T3, T2, SP, LogBytesPerWord - 1); ++ __ st_d(AT, T3, Interpreter::expr_offset_in_bytes(0)); ++ __ addi_d(T2, T2, 1); ++ __ addi_d(T0, T0, -1); ++ __ bne(T0, R0, loop); ++ // advance to next parameter ++ ++ // call Java function ++ __ bind(parameters_done); ++ ++ // receiver in V0, methodOop in Rmethod ++ ++ __ move(Rmethod, A3); ++ __ move(Rsender, SP); //set sender sp ++ __ jalr(A4); ++ return_address = __ pc(); ++ ++ Label common_return; ++ __ bind(common_return); ++ ++ // store result depending on type ++ // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) ++ __ ld_d(T0, FP, result_off * wordSize); // result --> T0 ++ Label is_long, is_float, is_double, exit; ++ __ ld_d(T2, FP, result_type_off * wordSize); // result_type --> T2 ++ __ addi_d(T3, T2, (-1) * T_LONG); ++ __ beq(T3, R0, is_long); ++ __ addi_d(T3, T2, (-1) * T_FLOAT); ++ __ beq(T3, R0, is_float); ++ __ addi_d(T3, T2, (-1) * T_DOUBLE); ++ __ beq(T3, R0, is_double); ++ ++ // handle T_INT case ++ __ st_d(V0, T0, 0 * wordSize); ++ __ bind(exit); ++ ++ // restore ++ __ ld_d(BCP, FP, BCP_off * wordSize); ++ __ ld_d(LVP, FP, LVP_off * wordSize); ++ __ ld_d(S8, FP, S8_off * wordSize); ++ __ ld_d(TSR, FP, TSR_off * wordSize); ++ ++ __ ld_d(S1, FP, S1_off * wordSize); ++ __ ld_d(S3, FP, S3_off * wordSize); ++ __ ld_d(S4, FP, S4_off * wordSize); ++ __ ld_d(S5, FP, S5_off * wordSize); ++ __ ld_d(S6, FP, S6_off * wordSize); ++ ++ __ fld_d(F24, FP, F24_off * wordSize); ++ __ fld_d(F25, FP, F25_off * wordSize); ++ __ fld_d(F26, FP, F26_off * wordSize); ++ __ fld_d(F27, FP, F27_off * wordSize); ++ __ fld_d(F28, FP, F28_off * wordSize); ++ __ fld_d(F29, FP, F29_off * wordSize); ++ __ fld_d(F30, FP, F30_off * wordSize); ++ __ fld_d(F31, FP, F31_off * wordSize); ++ ++ __ leave(); ++ ++ // return ++ __ jr(RA); ++ ++ // handle return types different from T_INT ++ __ bind(is_long); ++ __ st_d(V0, T0, 0 * wordSize); ++ __ b(exit); ++ ++ __ bind(is_float); ++ __ fst_s(FV0, T0, 0 * wordSize); ++ __ b(exit); ++ ++ __ bind(is_double); ++ __ fst_d(FV0, T0, 0 * wordSize); ++ __ b(exit); ++ StubRoutines::la::set_call_stub_compiled_return(__ pc()); ++ __ b(common_return); ++ return start; ++ } ++ ++ // Return point for a Java call if there's an exception thrown in ++ // Java code. The exception is caught and transformed into a ++ // pending exception stored in JavaThread that can be tested from ++ // within the VM. ++ // ++ // Note: Usually the parameters are removed by the callee. In case ++ // of an exception crossing an activation frame boundary, that is ++ // not the case if the callee is compiled code => need to setup the ++ // sp. ++ // ++ // V0: exception oop ++ ++ address generate_catch_exception() { ++ StubCodeMark mark(this, "StubRoutines", "catch_exception"); ++ address start = __ pc(); ++ ++ Register thread = TREG; ++ ++ // get thread directly ++#ifndef OPT_THREAD ++ __ ld_d(thread, FP, thread_off * wordSize); ++#endif ++ ++#ifdef ASSERT ++ // verify that threads correspond ++ { Label L; ++ __ get_thread(T8); ++ __ beq(T8, thread, L); ++ __ stop("StubRoutines::catch_exception: threads must correspond"); ++ __ bind(L); ++ } ++#endif ++ // set pending exception ++ __ verify_oop(V0); ++ __ st_d(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ li(AT, (long)__FILE__); ++ __ st_d(AT, thread, in_bytes(Thread::exception_file_offset ())); ++ __ li(AT, (long)__LINE__); ++ __ st_d(AT, thread, in_bytes(Thread::exception_line_offset ())); ++ ++ // complete return to VM ++ assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); ++ __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); ++ return start; ++ } ++ ++ // Continuation point for runtime calls returning with a pending ++ // exception. The pending exception check happened in the runtime ++ // or native call stub. The pending exception in Thread is ++ // converted into a Java-level exception. ++ // ++ // Contract with Java-level exception handlers: ++ // V0: exception ++ // V1: throwing pc ++ // ++ // NOTE: At entry of this stub, exception-pc must be on stack !! ++ ++ address generate_forward_exception() { ++ StubCodeMark mark(this, "StubRoutines", "forward exception"); ++ //Register thread = TREG; ++ Register thread = TREG; ++ address start = __ pc(); ++ ++ // Upon entry, the sp points to the return address returning into ++ // Java (interpreted or compiled) code; i.e., the return address ++ // throwing pc. ++ // ++ // Arguments pushed before the runtime call are still on the stack ++ // but the exception handler will reset the stack pointer -> ++ // ignore them. A potential result in registers can be ignored as ++ // well. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++#ifdef ASSERT ++ // make sure this code is only executed if there is a pending exception ++ { ++ Label L; ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ stop("StubRoutines::forward exception: no pending exception (1)"); ++ __ bind(L); ++ } ++#endif ++ ++ // compute exception handler into T4 ++ __ ld_d(A1, SP, 0); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T4, V0); ++ __ pop(V1); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_d(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++#ifdef ASSERT ++ // make sure exception is set ++ { ++ Label L; ++ __ bne(V0, R0, L); ++ __ stop("StubRoutines::forward exception: no pending exception (2)"); ++ __ bind(L); ++ } ++#endif ++ ++ // continue at exception handler (return address removed) ++ // V0: exception ++ // T4: exception handler ++ // V1: throwing pc ++ __ verify_oop(V0); ++ __ jr(T4); ++ return start; ++ } ++ ++ // Non-destructive plausibility checks for oops ++ // ++ address generate_verify_oop() { ++ StubCodeMark mark(this, "StubRoutines", "verify_oop"); ++ address start = __ pc(); ++ __ verify_oop_subroutine(); ++ address end = __ pc(); ++ return start; ++ } ++ ++ // ++ // Generate stub for array fill. If "aligned" is true, the ++ // "to" address is assumed to be heapword aligned. ++ // ++ // Arguments for generated stub: ++ // to: A0 ++ // value: A1 ++ // count: A2 treated as signed ++ // ++ address generate_fill(BasicType t, bool aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ const Register to = A0; // source array address ++ const Register value = A1; // value ++ const Register count = A2; // elements count ++ ++ const Register end = T5; // source array address end ++ const Register tmp = T8; // temp register ++ ++ Label L_fill_elements; ++ ++ int shift = -1; ++ switch (t) { ++ case T_BYTE: ++ shift = 0; ++ __ slti(AT, count, 9); // Short arrays (<= 8 bytes) fill by element ++ __ bstrins_d(value, value, 15, 8); // 8 bit -> 16 bit ++ __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit ++ __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit ++ __ bnez(AT, L_fill_elements); ++ break; ++ case T_SHORT: ++ shift = 1; ++ __ slti(AT, count, 5); // Short arrays (<= 8 bytes) fill by element ++ __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit ++ __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit ++ __ bnez(AT, L_fill_elements); ++ break; ++ case T_INT: ++ shift = 2; ++ __ slti(AT, count, 3); // Short arrays (<= 8 bytes) fill by element ++ __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit ++ __ bnez(AT, L_fill_elements); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ switch (t) { ++ case T_BYTE: ++ __ add_d(end, to, count); ++ break; ++ case T_SHORT: ++ case T_INT: ++ __ alsl_d(end, count, to, shift-1); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ if (!aligned) { ++ __ st_d(value, to, 0); ++ __ bstrins_d(to, R0, 2, 0); ++ __ addi_d(to, to, 8); ++ } ++ __ st_d(value, end, -8); ++ __ bstrins_d(end, R0, 2, 0); ++ ++ // ++ // Fill large chunks ++ // ++ Label L_loop_begin, L_not_64bytes_fill, L_loop_end, L_jtab1, L_jtab2; ++ __ addi_d(AT, to, 64); ++ __ blt(end, AT, L_not_64bytes_fill); ++ __ addi_d(to, to, 64); ++ __ bind(L_loop_begin); ++ __ st_d(value, to, -8); ++ __ st_d(value, to, -16); ++ __ st_d(value, to, -24); ++ __ st_d(value, to, -32); ++ __ st_d(value, to, -40); ++ __ st_d(value, to, -48); ++ __ st_d(value, to, -56); ++ __ st_d(value, to, -64); ++ __ addi_d(to, to, 64); ++ __ bge(end, to, L_loop_begin); ++ __ addi_d(to, to, -64); ++ __ beq(to, end, L_loop_end); ++ ++ __ bind(L_not_64bytes_fill); ++ // There are 0 - 7 words ++ __ lipc(AT, L_jtab1); ++ __ sub_d(tmp, end, to); ++ __ alsl_d(AT, tmp, AT, 1); ++ __ jr(AT); ++ ++ __ bind(L_jtab1); ++ // 0: ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ st_d(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ st_d(value, to, 32); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 6: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ st_d(value, to, 32); ++ __ st_d(value, to, 40); ++ __ jr(RA); ++ __ nop(); ++ ++ // 7: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ st_d(value, to, 32); ++ __ st_d(value, to, 40); ++ __ st_d(value, to, 48); ++ ++ __ bind(L_loop_end); ++ __ jr(RA); ++ ++ // Short arrays (<= 8 bytes) ++ __ bind(L_fill_elements); ++ __ lipc(AT, L_jtab2); ++ __ slli_d(tmp, count, 4 + shift); ++ __ add_d(AT, AT, tmp); ++ __ jr(AT); ++ ++ __ bind(L_jtab2); ++ // 0: ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ st_b(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ st_h(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ st_h(value, to, 0); ++ __ st_b(value, to, 2); ++ __ jr(RA); ++ __ nop(); ++ ++ // 4: ++ __ st_w(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ st_w(value, to, 0); ++ __ st_b(value, to, 4); ++ __ jr(RA); ++ __ nop(); ++ ++ // 6: ++ __ st_w(value, to, 0); ++ __ st_h(value, to, 4); ++ __ jr(RA); ++ __ nop(); ++ ++ // 7: ++ __ st_w(value, to, 0); ++ __ st_w(value, to, 3); ++ __ jr(RA); ++ __ nop(); ++ ++ // 8: ++ __ st_d(value, to, 0); ++ __ jr(RA); ++ return start; ++ } ++ ++ // ++ // Generate overlap test for array copy stubs ++ // ++ // Input: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count ++ // ++ // Temp: ++ // AT - destination array address - source array address ++ // T4 - element count * element size ++ // ++ void array_overlap_test(address no_overlap_target, int log2_elem_size) { ++ __ slli_d(T4, A2, log2_elem_size); ++ __ sub_d(AT, A1, A0); ++ __ bgeu(AT, T4, no_overlap_target); ++ } ++ ++ // disjoint large copy ++ void generate_disjoint_large_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label loop, le32, le16, le8, lt8; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ ld_d(A6, A0, 0); ++ __ ld_d(A7, A2, -8); ++ ++ __ andi(T1, A0, 7); ++ __ sub_d(T0, R0, T1); ++ __ addi_d(T0, T0, 8); ++ ++ __ add_d(A0, A0, T0); ++ __ add_d(A5, A1, T0); ++ ++ __ addi_d(A4, A2, -64); ++ __ bgeu(A0, A4, le32); ++ ++ __ bind(loop); ++ __ ld_d(T0, A0, 0); ++ __ ld_d(T1, A0, 8); ++ __ ld_d(T2, A0, 16); ++ __ ld_d(T3, A0, 24); ++ __ ld_d(T4, A0, 32); ++ __ ld_d(T5, A0, 40); ++ __ ld_d(T6, A0, 48); ++ __ ld_d(T7, A0, 56); ++ __ addi_d(A0, A0, 64); ++ __ st_d(T0, A5, 0); ++ __ st_d(T1, A5, 8); ++ __ st_d(T2, A5, 16); ++ __ st_d(T3, A5, 24); ++ __ st_d(T4, A5, 32); ++ __ st_d(T5, A5, 40); ++ __ st_d(T6, A5, 48); ++ __ st_d(T7, A5, 56); ++ __ addi_d(A5, A5, 64); ++ __ bltu(A0, A4, loop); ++ ++ __ bind(le32); ++ __ addi_d(A4, A2, -32); ++ __ bgeu(A0, A4, le16); ++ __ ld_d(T0, A0, 0); ++ __ ld_d(T1, A0, 8); ++ __ ld_d(T2, A0, 16); ++ __ ld_d(T3, A0, 24); ++ __ addi_d(A0, A0, 32); ++ __ st_d(T0, A5, 0); ++ __ st_d(T1, A5, 8); ++ __ st_d(T2, A5, 16); ++ __ st_d(T3, A5, 24); ++ __ addi_d(A5, A5, 32); ++ ++ __ bind(le16); ++ __ addi_d(A4, A2, -16); ++ __ bgeu(A0, A4, le8); ++ __ ld_d(T0, A0, 0); ++ __ ld_d(T1, A0, 8); ++ __ addi_d(A0, A0, 16); ++ __ st_d(T0, A5, 0); ++ __ st_d(T1, A5, 8); ++ __ addi_d(A5, A5, 16); ++ ++ __ bind(le8); ++ __ addi_d(A4, A2, -8); ++ __ bgeu(A0, A4, lt8); ++ __ ld_d(T0, A0, 0); ++ __ st_d(T0, A5, 0); ++ ++ __ bind(lt8); ++ __ st_d(A6, A1, 0); ++ __ st_d(A7, A3, -8); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // disjoint large copy lsx ++ void generate_disjoint_large_copy_lsx(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label loop, le64, le32, le16, lt16; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ vld(F0, A0, 0); ++ __ vld(F1, A2, -16); ++ ++ __ andi(T1, A0, 15); ++ __ sub_d(T0, R0, T1); ++ __ addi_d(T0, T0, 16); ++ ++ __ add_d(A0, A0, T0); ++ __ add_d(A5, A1, T0); ++ ++ __ addi_d(A4, A2, -128); ++ __ bgeu(A0, A4, le64); ++ ++ __ bind(loop); ++ __ vld(FT0, A0, 0); ++ __ vld(FT1, A0, 16); ++ __ vld(FT2, A0, 32); ++ __ vld(FT3, A0, 48); ++ __ vld(FT4, A0, 64); ++ __ vld(FT5, A0, 80); ++ __ vld(FT6, A0, 96); ++ __ vld(FT7, A0, 112); ++ __ addi_d(A0, A0, 128); ++ __ vst(FT0, A5, 0); ++ __ vst(FT1, A5, 16); ++ __ vst(FT2, A5, 32); ++ __ vst(FT3, A5, 48); ++ __ vst(FT4, A5, 64); ++ __ vst(FT5, A5, 80); ++ __ vst(FT6, A5, 96); ++ __ vst(FT7, A5, 112); ++ __ addi_d(A5, A5, 128); ++ __ bltu(A0, A4, loop); ++ ++ __ bind(le64); ++ __ addi_d(A4, A2, -64); ++ __ bgeu(A0, A4, le32); ++ __ vld(FT0, A0, 0); ++ __ vld(FT1, A0, 16); ++ __ vld(FT2, A0, 32); ++ __ vld(FT3, A0, 48); ++ __ addi_d(A0, A0, 64); ++ __ vst(FT0, A5, 0); ++ __ vst(FT1, A5, 16); ++ __ vst(FT2, A5, 32); ++ __ vst(FT3, A5, 48); ++ __ addi_d(A5, A5, 64); ++ ++ __ bind(le32); ++ __ addi_d(A4, A2, -32); ++ __ bgeu(A0, A4, le16); ++ __ vld(FT0, A0, 0); ++ __ vld(FT1, A0, 16); ++ __ addi_d(A0, A0, 32); ++ __ vst(FT0, A5, 0); ++ __ vst(FT1, A5, 16); ++ __ addi_d(A5, A5, 32); ++ ++ __ bind(le16); ++ __ addi_d(A4, A2, -16); ++ __ bgeu(A0, A4, lt16); ++ __ vld(FT0, A0, 0); ++ __ vst(FT0, A5, 0); ++ ++ __ bind(lt16); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A3, -16); ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // disjoint large copy lasx ++ void generate_disjoint_large_copy_lasx(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label loop, le128, le64, le32, lt32; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ xvld(F0, A0, 0); ++ __ xvld(F1, A2, -32); ++ ++ __ andi(T1, A0, 31); ++ __ sub_d(T0, R0, T1); ++ __ addi_d(T0, T0, 32); ++ ++ __ add_d(A0, A0, T0); ++ __ add_d(A5, A1, T0); ++ ++ __ addi_d(A4, A2, -256); ++ __ bgeu(A0, A4, le128); ++ ++ __ bind(loop); ++ __ xvld(FT0, A0, 0); ++ __ xvld(FT1, A0, 32); ++ __ xvld(FT2, A0, 64); ++ __ xvld(FT3, A0, 96); ++ __ xvld(FT4, A0, 128); ++ __ xvld(FT5, A0, 160); ++ __ xvld(FT6, A0, 192); ++ __ xvld(FT7, A0, 224); ++ __ addi_d(A0, A0, 256); ++ __ xvst(FT0, A5, 0); ++ __ xvst(FT1, A5, 32); ++ __ xvst(FT2, A5, 64); ++ __ xvst(FT3, A5, 96); ++ __ xvst(FT4, A5, 128); ++ __ xvst(FT5, A5, 160); ++ __ xvst(FT6, A5, 192); ++ __ xvst(FT7, A5, 224); ++ __ addi_d(A5, A5, 256); ++ __ bltu(A0, A4, loop); ++ ++ __ bind(le128); ++ __ addi_d(A4, A2, -128); ++ __ bgeu(A0, A4, le64); ++ __ xvld(FT0, A0, 0); ++ __ xvld(FT1, A0, 32); ++ __ xvld(FT2, A0, 64); ++ __ xvld(FT3, A0, 96); ++ __ addi_d(A0, A0, 128); ++ __ xvst(FT0, A5, 0); ++ __ xvst(FT1, A5, 32); ++ __ xvst(FT2, A5, 64); ++ __ xvst(FT3, A5, 96); ++ __ addi_d(A5, A5, 128); ++ ++ __ bind(le64); ++ __ addi_d(A4, A2, -64); ++ __ bgeu(A0, A4, le32); ++ __ xvld(FT0, A0, 0); ++ __ xvld(FT1, A0, 32); ++ __ addi_d(A0, A0, 64); ++ __ xvst(FT0, A5, 0); ++ __ xvst(FT1, A5, 32); ++ __ addi_d(A5, A5, 64); ++ ++ __ bind(le32); ++ __ addi_d(A4, A2, -32); ++ __ bgeu(A0, A4, lt32); ++ __ xvld(FT0, A0, 0); ++ __ xvst(FT0, A5, 0); ++ ++ __ bind(lt32); ++ __ xvst(F0, A1, 0); ++ __ xvst(F1, A3, -32); ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // conjoint large copy ++ void generate_conjoint_large_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label loop, le32, le16, le8, lt8; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ ld_d(A6, A0, 0); ++ __ ld_d(A7, A2, -8); ++ ++ __ andi(T1, A2, 7); ++ __ sub_d(A2, A2, T1); ++ __ sub_d(A5, A3, T1); ++ ++ __ addi_d(A4, A0, 64); ++ __ bgeu(A4, A2, le32); ++ ++ __ bind(loop); ++ __ ld_d(T0, A2, -8); ++ __ ld_d(T1, A2, -16); ++ __ ld_d(T2, A2, -24); ++ __ ld_d(T3, A2, -32); ++ __ ld_d(T4, A2, -40); ++ __ ld_d(T5, A2, -48); ++ __ ld_d(T6, A2, -56); ++ __ ld_d(T7, A2, -64); ++ __ addi_d(A2, A2, -64); ++ __ st_d(T0, A5, -8); ++ __ st_d(T1, A5, -16); ++ __ st_d(T2, A5, -24); ++ __ st_d(T3, A5, -32); ++ __ st_d(T4, A5, -40); ++ __ st_d(T5, A5, -48); ++ __ st_d(T6, A5, -56); ++ __ st_d(T7, A5, -64); ++ __ addi_d(A5, A5, -64); ++ __ bltu(A4, A2, loop); ++ ++ __ bind(le32); ++ __ addi_d(A4, A0, 32); ++ __ bgeu(A4, A2, le16); ++ __ ld_d(T0, A2, -8); ++ __ ld_d(T1, A2, -16); ++ __ ld_d(T2, A2, -24); ++ __ ld_d(T3, A2, -32); ++ __ addi_d(A2, A2, -32); ++ __ st_d(T0, A5, -8); ++ __ st_d(T1, A5, -16); ++ __ st_d(T2, A5, -24); ++ __ st_d(T3, A5, -32); ++ __ addi_d(A5, A5, -32); ++ ++ __ bind(le16); ++ __ addi_d(A4, A0, 16); ++ __ bgeu(A4, A2, le8); ++ __ ld_d(T0, A2, -8); ++ __ ld_d(T1, A2, -16); ++ __ addi_d(A2, A2, -16); ++ __ st_d(T0, A5, -8); ++ __ st_d(T1, A5, -16); ++ __ addi_d(A5, A5, -16); ++ ++ __ bind(le8); ++ __ addi_d(A4, A0, 8); ++ __ bgeu(A4, A2, lt8); ++ __ ld_d(T0, A2, -8); ++ __ st_d(T0, A5, -8); ++ ++ __ bind(lt8); ++ __ st_d(A6, A1, 0); ++ __ st_d(A7, A3, -8); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // conjoint large copy lsx ++ void generate_conjoint_large_copy_lsx(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label loop, le64, le32, le16, lt16; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ vld(F0, A0, 0); ++ __ vld(F1, A2, -16); ++ ++ __ andi(T1, A2, 15); ++ __ sub_d(A2, A2, T1); ++ __ sub_d(A5, A3, T1); ++ ++ __ addi_d(A4, A0, 128); ++ __ bgeu(A4, A2, le64); ++ ++ __ bind(loop); ++ __ vld(FT0, A2, -16); ++ __ vld(FT1, A2, -32); ++ __ vld(FT2, A2, -48); ++ __ vld(FT3, A2, -64); ++ __ vld(FT4, A2, -80); ++ __ vld(FT5, A2, -96); ++ __ vld(FT6, A2, -112); ++ __ vld(FT7, A2, -128); ++ __ addi_d(A2, A2, -128); ++ __ vst(FT0, A5, -16); ++ __ vst(FT1, A5, -32); ++ __ vst(FT2, A5, -48); ++ __ vst(FT3, A5, -64); ++ __ vst(FT4, A5, -80); ++ __ vst(FT5, A5, -96); ++ __ vst(FT6, A5, -112); ++ __ vst(FT7, A5, -128); ++ __ addi_d(A5, A5, -128); ++ __ bltu(A4, A2, loop); ++ ++ __ bind(le64); ++ __ addi_d(A4, A0, 64); ++ __ bgeu(A4, A2, le32); ++ __ vld(FT0, A2, -16); ++ __ vld(FT1, A2, -32); ++ __ vld(FT2, A2, -48); ++ __ vld(FT3, A2, -64); ++ __ addi_d(A2, A2, -64); ++ __ vst(FT0, A5, -16); ++ __ vst(FT1, A5, -32); ++ __ vst(FT2, A5, -48); ++ __ vst(FT3, A5, -64); ++ __ addi_d(A5, A5, -64); ++ ++ __ bind(le32); ++ __ addi_d(A4, A0, 32); ++ __ bgeu(A4, A2, le16); ++ __ vld(FT0, A2, -16); ++ __ vld(FT1, A2, -32); ++ __ addi_d(A2, A2, -32); ++ __ vst(FT0, A5, -16); ++ __ vst(FT1, A5, -32); ++ __ addi_d(A5, A5, -32); ++ ++ __ bind(le16); ++ __ addi_d(A4, A0, 16); ++ __ bgeu(A4, A2, lt16); ++ __ vld(FT0, A2, -16); ++ __ vst(FT0, A5, -16); ++ ++ __ bind(lt16); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A3, -16); ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // conjoint large copy lasx ++ void generate_conjoint_large_copy_lasx(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label loop, le128, le64, le32, lt32; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ xvld(F0, A0, 0); ++ __ xvld(F1, A2, -32); ++ ++ __ andi(T1, A2, 31); ++ __ sub_d(A2, A2, T1); ++ __ sub_d(A5, A3, T1); ++ ++ __ addi_d(A4, A0, 256); ++ __ bgeu(A4, A2, le128); ++ ++ __ bind(loop); ++ __ xvld(FT0, A2, -32); ++ __ xvld(FT1, A2, -64); ++ __ xvld(FT2, A2, -96); ++ __ xvld(FT3, A2, -128); ++ __ xvld(FT4, A2, -160); ++ __ xvld(FT5, A2, -192); ++ __ xvld(FT6, A2, -224); ++ __ xvld(FT7, A2, -256); ++ __ addi_d(A2, A2, -256); ++ __ xvst(FT0, A5, -32); ++ __ xvst(FT1, A5, -64); ++ __ xvst(FT2, A5, -96); ++ __ xvst(FT3, A5, -128); ++ __ xvst(FT4, A5, -160); ++ __ xvst(FT5, A5, -192); ++ __ xvst(FT6, A5, -224); ++ __ xvst(FT7, A5, -256); ++ __ addi_d(A5, A5, -256); ++ __ bltu(A4, A2, loop); ++ ++ __ bind(le128); ++ __ addi_d(A4, A0, 128); ++ __ bgeu(A4, A2, le64); ++ __ xvld(FT0, A2, -32); ++ __ xvld(FT1, A2, -64); ++ __ xvld(FT2, A2, -96); ++ __ xvld(FT3, A2, -128); ++ __ addi_d(A2, A2, -128); ++ __ xvst(FT0, A5, -32); ++ __ xvst(FT1, A5, -64); ++ __ xvst(FT2, A5, -96); ++ __ xvst(FT3, A5, -128); ++ __ addi_d(A5, A5, -128); ++ ++ __ bind(le64); ++ __ addi_d(A4, A0, 64); ++ __ bgeu(A4, A2, le32); ++ __ xvld(FT0, A2, -32); ++ __ xvld(FT1, A2, -64); ++ __ addi_d(A2, A2, -64); ++ __ xvst(FT0, A5, -32); ++ __ xvst(FT1, A5, -64); ++ __ addi_d(A5, A5, -64); ++ ++ __ bind(le32); ++ __ addi_d(A4, A0, 32); ++ __ bgeu(A4, A2, lt32); ++ __ xvld(FT0, A2, -32); ++ __ xvst(FT0, A5, -32); ++ ++ __ bind(lt32); ++ __ xvst(F0, A1, 0); ++ __ xvst(F1, A3, -32); ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // Byte small copy: less than { int:9, lsx:17, lasx:33 } elements. ++ void generate_byte_small_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ ld_b(AT, A0, 0); ++ __ st_b(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ ld_h(AT, A0, 0); ++ __ st_h(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ ld_h(AT, A0, 0); ++ __ ld_b(A2, A0, 2); ++ __ st_h(AT, A1, 0); ++ __ st_b(A2, A1, 2); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ __ ld_w(AT, A0, 0); ++ __ st_w(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ ld_w(AT, A0, 0); ++ __ ld_b(A2, A0, 4); ++ __ st_w(AT, A1, 0); ++ __ st_b(A2, A1, 4); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 6: ++ __ ld_w(AT, A0, 0); ++ __ ld_h(A2, A0, 4); ++ __ st_w(AT, A1, 0); ++ __ st_h(A2, A1, 4); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 7: ++ __ ld_w(AT, A0, 0); ++ __ ld_w(A2, A0, 3); ++ __ st_w(AT, A1, 0); ++ __ st_w(A2, A1, 3); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 8: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ ++ if (!UseLSX) ++ return; ++ ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 9: ++ __ ld_d(AT, A0, 0); ++ __ ld_b(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_b(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 10: ++ __ ld_d(AT, A0, 0); ++ __ ld_h(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_h(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 11: ++ __ ld_d(AT, A0, 0); ++ __ ld_w(A2, A0, 7); ++ __ st_d(AT, A1, 0); ++ __ st_w(A2, A1, 7); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 12: ++ __ ld_d(AT, A0, 0); ++ __ ld_w(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_w(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 13: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 5); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 5); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 14: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 6); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 6); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 15: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 7); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 7); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 16: ++ __ vld(F0, A0, 0); ++ __ vst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ if (!UseLASX) ++ return; ++ ++ // 17: ++ __ vld(F0, A0, 0); ++ __ ld_b(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_b(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 18: ++ __ vld(F0, A0, 0); ++ __ ld_h(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_h(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 19: ++ __ vld(F0, A0, 0); ++ __ ld_w(AT, A0, 15); ++ __ vst(F0, A1, 0); ++ __ st_w(AT, A1, 15); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 20: ++ __ vld(F0, A0, 0); ++ __ ld_w(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_w(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 21: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 13); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 13); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 22: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 14); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 14); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 23: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 15); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 15); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 24: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 25: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 9); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 9); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 26: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 10); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 10); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 27: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 11); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 11); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 28: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 12); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 12); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 29: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 13); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 13); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 30: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 14); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 14); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 31: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 15); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 15); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 32: ++ __ xvld(F0, A0, 0); ++ __ xvst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_byte_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_byte_copy(). ++ // ++ address generate_disjoint_byte_copy(bool aligned, Label &small, Label &large, ++ Label &large_aligned, const char * name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ if (UseLASX) ++ __ sltui(T0, A2, 33); ++ else if (UseLSX) ++ __ sltui(T0, A2, 17); ++ else ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ if (large_aligned.is_bound()) { ++ __ orr(T0, A0, A1); ++ __ andi(T0, T0, 7); ++ __ beqz(T0, large_aligned); ++ } ++ ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_byte_copy(bool aligned, Label &small, Label &large, ++ Label &large_aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ array_overlap_test(StubRoutines::jbyte_disjoint_arraycopy(), 0); ++ ++ if (UseLASX) ++ __ sltui(T0, A2, 33); ++ else if (UseLSX) ++ __ sltui(T0, A2, 17); ++ else ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ if (large_aligned.is_bound()) { ++ __ orr(T0, A0, A1); ++ __ andi(T0, T0, 7); ++ __ beqz(T0, large_aligned); ++ } ++ ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Short small copy: less than { int:9, lsx:9, lasx:17 } elements. ++ void generate_short_small_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ ld_h(AT, A0, 0); ++ __ st_h(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ ld_w(AT, A0, 0); ++ __ st_w(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ ld_w(AT, A0, 0); ++ __ ld_h(A2, A0, 4); ++ __ st_w(AT, A1, 0); ++ __ st_h(A2, A1, 4); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ ld_d(AT, A0, 0); ++ __ ld_h(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_h(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 6: ++ __ ld_d(AT, A0, 0); ++ __ ld_w(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_w(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 7: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 6); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 6); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 8: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ vst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ if (!UseLASX) ++ return; ++ ++ __ nop(); ++ __ nop(); ++ ++ // 9: ++ __ vld(F0, A0, 0); ++ __ ld_h(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_h(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 10: ++ __ vld(F0, A0, 0); ++ __ ld_w(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_w(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 11: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 14); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 14); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 12: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 13: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 10); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 10); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 14: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 12); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 12); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 15: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 14); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 14); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 16: ++ __ xvld(F0, A0, 0); ++ __ xvst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_short_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_short_copy(). ++ // ++ address generate_disjoint_short_copy(bool aligned, Label &small, Label &large, ++ Label &large_aligned, const char * name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ if (UseLASX) ++ __ sltui(T0, A2, 17); ++ else ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ __ slli_d(A2, A2, 1); ++ ++ if (large_aligned.is_bound()) { ++ __ orr(T0, A0, A1); ++ __ andi(T0, T0, 7); ++ __ beqz(T0, large_aligned); ++ } ++ ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we ++ // let the hardware handle it. The two or four words within dwords ++ // or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_short_copy(bool aligned, Label &small, Label &large, ++ Label &large_aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ array_overlap_test(StubRoutines::jshort_disjoint_arraycopy(), 1); ++ ++ if (UseLASX) ++ __ sltui(T0, A2, 17); ++ else ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ __ slli_d(A2, A2, 1); ++ ++ if (large_aligned.is_bound()) { ++ __ orr(T0, A0, A1); ++ __ andi(T0, T0, 7); ++ __ beqz(T0, large_aligned); ++ } ++ ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Int small copy: less than { int:7, lsx:7, lasx:9 } elements. ++ void generate_int_small_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ ld_w(AT, A0, 0); ++ __ st_w(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ ld_d(AT, A0, 0); ++ __ ld_w(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_w(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ vst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ ld_w(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_w(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ ld_w(A3, A0, 16); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ st_w(A3, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // 6: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ ld_d(A3, A0, 16); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ st_d(A3, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ if (!UseLASX) ++ return; ++ ++ // 7: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 12); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 12); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 8: ++ __ xvld(F0, A0, 0); ++ __ xvst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // Generate maybe oop copy ++ void gen_maybe_oop_copy(bool is_oop, bool disjoint, bool aligned, Label &small, ++ Label &large, Label &large_aligned, const char *name, ++ int small_limit, int log2_elem_size, bool dest_uninitialized = false) { ++ Label post, _large; ++ DecoratorSet decorators = 0; ++ BarrierSetAssembler *bs = NULL; ++ ++ if (is_oop) { ++ decorators = IN_HEAP | IS_ARRAY; ++ ++ if (disjoint) { ++ decorators |= ARRAYCOPY_DISJOINT; ++ } ++ ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_d(A2, SP, 3 * wordSize); ++ __ st_d(A1, SP, 2 * wordSize); ++ __ st_d(A0, SP, 1 * wordSize); ++ __ st_d(RA, SP, 0 * wordSize); ++ ++ bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2, RegSet()); ++ ++ __ ld_d(A2, SP, 3 * wordSize); ++ __ ld_d(A1, SP, 2 * wordSize); ++ __ ld_d(A0, SP, 1 * wordSize); ++ } ++ ++ __ sltui(T0, A2, small_limit); ++ if (is_oop) { ++ __ beqz(T0, _large); ++ __ bl(small); ++ __ b(post); ++ } else { ++ __ bnez(T0, small); ++ } ++ ++ __ bind(_large); ++ __ slli_d(A2, A2, log2_elem_size); ++ ++ if (large_aligned.is_bound()) { ++ __ orr(T0, A0, A1); ++ __ andi(T0, T0, (1 << (log2_elem_size + 1)) - 1); ++ if (is_oop) { ++ Label skip; ++ __ bnez(T0, skip); ++ __ bl(large_aligned); ++ __ b(post); ++ __ bind(skip); ++ } else { ++ __ beqz(T0, large_aligned); ++ } ++ } ++ ++ if (is_oop) { ++ __ bl(large); ++ } else { ++ __ b(large); ++ } ++ ++ if (is_oop) { ++ __ bind(post); ++ __ ld_d(A2, SP, 3 * wordSize); ++ __ ld_d(A1, SP, 2 * wordSize); ++ ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1, RegSet()); ++ ++ __ ld_d(RA, SP, 0 * wordSize); ++ __ addi_d(SP, SP, 4 * wordSize); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, Label &small, ++ Label &large, Label &large_aligned, const char *name, ++ int small_limit, bool dest_uninitialized = false) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ gen_maybe_oop_copy(is_oop, true, aligned, small, large, large_aligned, ++ name, small_limit, 2, dest_uninitialized); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, Label &small, ++ Label &large, Label &large_aligned, const char *name, ++ int small_limit, bool dest_uninitialized = false) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ if (is_oop) { ++ array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 2); ++ } else { ++ array_overlap_test(StubRoutines::jint_disjoint_arraycopy(), 2); ++ } ++ ++ gen_maybe_oop_copy(is_oop, false, aligned, small, large, large_aligned, ++ name, small_limit, 2, dest_uninitialized); ++ ++ return start; ++ } ++ ++ // Long small copy: less than { int:4, lsx:4, lasx:5 } elements. ++ void generate_long_small_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ vst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ ld_d(A3, A0, 16); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ st_d(A3, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ if (!UseLASX) ++ return; ++ ++ // 4: ++ __ xvld(F0, A0, 0); ++ __ xvst(F0, A1, 0); ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, Label &small, ++ Label &large, Label &large_aligned, const char *name, ++ int small_limit, bool dest_uninitialized = false) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ gen_maybe_oop_copy(is_oop, true, aligned, small, large, large_aligned, ++ name, small_limit, 3, dest_uninitialized); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, Label &small, ++ Label &large, Label &large_aligned, const char *name, ++ int small_limit, bool dest_uninitialized = false) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ if (is_oop) { ++ array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 3); ++ } else { ++ array_overlap_test(StubRoutines::jlong_disjoint_arraycopy(), 3); ++ } ++ ++ gen_maybe_oop_copy(is_oop, false, aligned, small, large, large_aligned, ++ name, small_limit, 3, dest_uninitialized); ++ ++ return start; ++ } ++ ++ // Helper for generating a dynamic type check. ++ // Smashes scratch1, scratch2. ++ void generate_type_check(Register sub_klass, ++ Register super_check_offset, ++ Register super_klass, ++ Register tmp1, ++ Register tmp2, ++ Label& L_success) { ++ assert_different_registers(sub_klass, super_check_offset, super_klass); ++ ++ __ block_comment("type_check:"); ++ ++ Label L_miss; ++ ++ __ check_klass_subtype_fast_path(sub_klass, super_klass, tmp1, &L_success, &L_miss, NULL, ++ super_check_offset); ++ __ check_klass_subtype_slow_path(sub_klass, super_klass, tmp1, tmp2, &L_success, NULL); ++ ++ // Fall through on failure! ++ __ bind(L_miss); ++ } ++ ++ // ++ // Generate checkcasting array copy stub ++ // ++ // Input: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // A3 - size_t ckoff (super_check_offset) ++ // A4 - oop ckval (super_klass) ++ // ++ // Output: ++ // V0 == 0 - success ++ // V0 == -1^K - failure, where K is partial transfer count ++ // ++ address generate_checkcast_copy(const char *name, bool dest_uninitialized = false) { ++ Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop; ++ ++ // Input registers (after setup_arg_regs) ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elementscount ++ const Register ckoff = A3; // super_check_offset ++ const Register ckval = A4; // super_klass ++ ++ RegSet wb_pre_saved_regs = RegSet::range(A0, A4); ++ RegSet wb_post_saved_regs = RegSet::of(count); ++ ++ // Registers used as temps (S0, S1, S2, S3 are save-on-entry) ++ const Register copied_oop = S0; // actual oop copied ++ const Register count_save = S1; // orig elementscount ++ const Register start_to = S2; // destination array start address ++ const Register oop_klass = S3; // oop._klass ++ const Register tmp1 = A5; ++ const Register tmp2 = A6; ++ ++ //--------------------------------------------------------------- ++ // Assembler stub will be used for this call to arraycopy ++ // if the two arrays are subtypes of Object[] but the ++ // destination array type is not equal to or a supertype ++ // of the source type. Each element must be separately ++ // checked. ++ ++ assert_different_registers(from, to, count, ckoff, ckval, start_to, ++ copied_oop, oop_klass, count_save); ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ // caller guarantees that the arrays really are different ++ // otherwise, we would have to make conjoint checks ++ ++ // Caller of this entry point must set up the argument registers. ++ __ block_comment("Entry:"); ++ ++ // Empty array: Nothing to do. ++ __ beqz(count, L_done); ++ ++ __ push(RegSet::of(S0, S1, S2, S3, RA)); ++ ++#ifdef ASSERT ++ __ block_comment("assert consistent ckoff/ckval"); ++ // The ckoff and ckval must be mutually consistent, ++ // even though caller generates both. ++ { Label L; ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ __ ld_w(start_to, Address(ckval, sco_offset)); ++ __ beq(ckoff, start_to, L); ++ __ stop("super_check_offset inconsistent"); ++ __ bind(L); ++ } ++#endif //ASSERT ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT; ++ bool is_oop = true; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, to, count, wb_pre_saved_regs); ++ ++ // save the original count ++ __ move(count_save, count); ++ ++ // Copy from low to high addresses ++ __ move(start_to, to); // Save destination array start address ++ __ b(L_load_element); ++ ++ // ======== begin loop ======== ++ // (Loop is rotated; its entry is L_load_element.) ++ // Loop control: ++ // for (; count != 0; count--) { ++ // copied_oop = load_heap_oop(from++); ++ // ... generate_type_check ...; ++ // store_heap_oop(to++, copied_oop); ++ // } ++ __ align(OptoLoopAlignment); ++ ++ __ bind(L_store_element); ++ __ store_heap_oop(Address(to, 0), copied_oop, tmp1, tmp2, AS_RAW); // store the oop ++ __ addi_d(to, to, UseCompressedOops ? 4 : 8); ++ __ addi_d(count, count, -1); ++ __ beqz(count, L_do_card_marks); ++ ++ // ======== loop entry is here ======== ++ __ bind(L_load_element); ++ __ load_heap_oop(copied_oop, Address(from, 0), tmp1, tmp2, AS_RAW); // load the oop ++ __ addi_d(from, from, UseCompressedOops ? 4 : 8); ++ __ beqz(copied_oop, L_store_element); ++ ++ __ load_klass(oop_klass, copied_oop); // query the object klass ++ generate_type_check(oop_klass, ckoff, ckval, tmp1, tmp2, L_store_element); ++ // ======== end loop ======== ++ ++ // Register count = remaining oops, count_orig = total oops. ++ // Emit GC store barriers for the oops we have copied and report ++ // their number to the caller. ++ ++ __ sub_d(tmp1, count_save, count); // K = partially copied oop count ++ __ nor(count, tmp1, R0); // report (-1^K) to caller ++ __ beqz(tmp1, L_done_pop); ++ ++ __ bind(L_do_card_marks); ++ ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, tmp2, wb_post_saved_regs); ++ ++ __ bind(L_done_pop); ++ __ pop(RegSet::of(S0, S1, S2, S3, RA)); ++ ++#ifndef PRODUCT ++ __ li(SCR2, (address)&SharedRuntime::_checkcast_array_copy_ctr); ++ __ increment(Address(SCR2, 0), 1); ++#endif ++ ++ __ bind(L_done); ++ __ move(A0, count); ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ // ++ // Generate 'unsafe' array copy stub ++ // Though just as safe as the other stubs, it takes an unscaled ++ // size_t argument instead of an element count. ++ // ++ // Input: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - byte count, treated as ssize_t, can be zero ++ // ++ // Examines the alignment of the operands and dispatches ++ // to a long, int, short, or byte copy loop. ++ // ++ address generate_unsafe_copy(const char *name) { ++ Label L_long_aligned, L_int_aligned, L_short_aligned; ++ Register s = A0, d = A1, count = A2; ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ __ orr(AT, s, d); ++ __ orr(AT, AT, count); ++ ++ __ andi(AT, AT, BytesPerLong-1); ++ __ beqz(AT, L_long_aligned); ++ __ andi(AT, AT, BytesPerInt-1); ++ __ beqz(AT, L_int_aligned); ++ __ andi(AT, AT, BytesPerShort-1); ++ __ beqz(AT, L_short_aligned); ++ __ b(StubRoutines::_jbyte_arraycopy); ++ ++ __ bind(L_short_aligned); ++ __ srli_d(count, count, LogBytesPerShort); // size => short_count ++ __ b(StubRoutines::_jshort_arraycopy); ++ __ bind(L_int_aligned); ++ __ srli_d(count, count, LogBytesPerInt); // size => int_count ++ __ b(StubRoutines::_jint_arraycopy); ++ __ bind(L_long_aligned); ++ __ srli_d(count, count, LogBytesPerLong); // size => long_count ++ __ b(StubRoutines::_jlong_arraycopy); ++ ++ return start; ++ } ++ ++ // Perform range checks on the proposed arraycopy. ++ // Kills temp, but nothing else. ++ // Also, clean the sign bits of src_pos and dst_pos. ++ void arraycopy_range_checks(Register src, // source array oop (A0) ++ Register src_pos, // source position (A1) ++ Register dst, // destination array oo (A2) ++ Register dst_pos, // destination position (A3) ++ Register length, ++ Register temp, ++ Label& L_failed) { ++ __ block_comment("arraycopy_range_checks:"); ++ ++ assert_different_registers(SCR1, temp); ++ ++ // if (src_pos + length > arrayOop(src)->length()) FAIL; ++ __ ld_w(SCR1, Address(src, arrayOopDesc::length_offset_in_bytes())); ++ __ add_w(temp, length, src_pos); ++ __ bltu(SCR1, temp, L_failed); ++ ++ // if (dst_pos + length > arrayOop(dst)->length()) FAIL; ++ __ ld_w(SCR1, Address(dst, arrayOopDesc::length_offset_in_bytes())); ++ __ add_w(temp, length, dst_pos); ++ __ bltu(SCR1, temp, L_failed); ++ ++ // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'. ++ __ move(src_pos, src_pos); ++ __ move(dst_pos, dst_pos); ++ ++ __ block_comment("arraycopy_range_checks done"); ++ } ++ ++ // ++ // Generate generic array copy stubs ++ // ++ // Input: ++ // A0 - src oop ++ // A1 - src_pos (32-bits) ++ // A2 - dst oop ++ // A3 - dst_pos (32-bits) ++ // A4 - element count (32-bits) ++ // ++ // Output: ++ // V0 == 0 - success ++ // V0 == -1^K - failure, where K is partial transfer count ++ // ++ address generate_generic_copy(const char *name) { ++ Label L_failed, L_objArray; ++ Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; ++ ++ // Input registers ++ const Register src = A0; // source array oop ++ const Register src_pos = A1; // source position ++ const Register dst = A2; // destination array oop ++ const Register dst_pos = A3; // destination position ++ const Register length = A4; ++ ++ // Registers used as temps ++ const Register dst_klass = A5; ++ ++ __ align(CodeEntryAlignment); ++ ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ address start = __ pc(); ++ ++#ifndef PRODUCT ++ // bump this on entry, not on exit: ++ __ li(SCR2, (address)&SharedRuntime::_generic_array_copy_ctr); ++ __ increment(Address(SCR2, 0), 1); ++#endif ++ ++ //----------------------------------------------------------------------- ++ // Assembler stub will be used for this call to arraycopy ++ // if the following conditions are met: ++ // ++ // (1) src and dst must not be null. ++ // (2) src_pos must not be negative. ++ // (3) dst_pos must not be negative. ++ // (4) length must not be negative. ++ // (5) src klass and dst klass should be the same and not NULL. ++ // (6) src and dst should be arrays. ++ // (7) src_pos + length must not exceed length of src. ++ // (8) dst_pos + length must not exceed length of dst. ++ // ++ ++ // if (src == NULL) return -1; ++ __ beqz(src, L_failed); ++ ++ // if (src_pos < 0) return -1; ++ __ blt(src_pos, R0, L_failed); ++ ++ // if (dst == NULL) return -1; ++ __ beqz(dst, L_failed); ++ ++ // if (dst_pos < 0) return -1; ++ __ blt(dst_pos, R0, L_failed); ++ ++ // registers used as temp ++ const Register scratch_length = T0; // elements count to copy ++ const Register scratch_src_klass = T1; // array klass ++ const Register lh = T2; // layout helper ++ const Register tmp1 = T3; ++ const Register tmp2 = T4; ++ ++ // if (length < 0) return -1; ++ __ move(scratch_length, length); // length (elements count, 32-bits value) ++ __ blt(scratch_length, R0, L_failed); ++ ++ __ load_klass(scratch_src_klass, src); ++#ifdef ASSERT ++ // assert(src->klass() != NULL); ++ { ++ __ block_comment("assert klasses not null {"); ++ Label L1, L2; ++ __ bnez(scratch_src_klass, L2); // it is broken if klass is NULL ++ __ bind(L1); ++ __ stop("broken null klass"); ++ __ bind(L2); ++ __ load_klass(SCR2, dst); ++ __ beqz(SCR2, L1); // this would be broken also ++ __ block_comment("} assert klasses not null done"); ++ } ++#endif ++ ++ // Load layout helper (32-bits) ++ // ++ // |array_tag| | header_size | element_type | |log2_element_size| ++ // 32 30 24 16 8 2 0 ++ // ++ // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 ++ // ++ ++ const int lh_offset = in_bytes(Klass::layout_helper_offset()); ++ ++ // Handle objArrays completely differently... ++ const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); ++ __ ld_w(lh, Address(scratch_src_klass, lh_offset)); ++ __ li(SCR1, objArray_lh); ++ __ xorr(SCR2, lh, SCR1); ++ __ beqz(SCR2, L_objArray); ++ ++ // if (src->klass() != dst->klass()) return -1; ++ __ load_klass(SCR2, dst); ++ __ xorr(SCR2, SCR2, scratch_src_klass); ++ __ bnez(SCR2, L_failed); ++ ++ // if (!src->is_Array()) return -1; ++ __ bge(lh, R0, L_failed); // i.e. (lh >= 0) ++ ++ // At this point, it is known to be a typeArray (array_tag 0x3). ++#ifdef ASSERT ++ { ++ __ block_comment("assert primitive array {"); ++ Label L; ++ __ li(SCR2, (int)(Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); ++ __ bge(lh, SCR2, L); ++ __ stop("must be a primitive array"); ++ __ bind(L); ++ __ block_comment("} assert primitive array done"); ++ } ++#endif ++ ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, SCR2, L_failed); ++ ++ // TypeArrayKlass ++ // ++ // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); ++ // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); ++ // ++ ++ const Register scr1_offset = SCR1; // array offset ++ const Register elsize = lh; // element size ++ ++ __ bstrpick_d(scr1_offset, lh, Klass::_lh_header_size_shift + ++ exact_log2(Klass::_lh_header_size_mask+1) - 1, ++ Klass::_lh_header_size_shift); // array_offset ++ __ add_d(src, src, scr1_offset); // src array offset ++ __ add_d(dst, dst, scr1_offset); // dst array offset ++ __ block_comment("choose copy loop based on element size"); ++ ++ // next registers should be set before the jump to corresponding stub ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elements count ++ ++ // 'from', 'to', 'count' registers should be set in such order ++ // since they are the same as 'src', 'src_pos', 'dst'. ++ ++ assert(Klass::_lh_log2_element_size_shift == 0, "fix this code"); ++ ++ // The possible values of elsize are 0-3, i.e. exact_log2(element ++ // size in bytes). We do a simple bitwise binary search. ++ __ bind(L_copy_bytes); ++ __ andi(tmp1, elsize, 2); ++ __ bnez(tmp1, L_copy_ints); ++ __ andi(tmp1, elsize, 1); ++ __ bnez(tmp1, L_copy_shorts); ++ __ lea(from, Address(src, src_pos, Address::times_1)); // src_addr ++ __ lea(to, Address(dst, dst_pos, Address::times_1)); // dst_addr ++ __ move(count, scratch_length); // length ++ __ b(StubRoutines::_jbyte_arraycopy); ++ ++ __ bind(L_copy_shorts); ++ __ lea(from, Address(src, src_pos, Address::times_2)); // src_addr ++ __ lea(to, Address(dst, dst_pos, Address::times_2)); // dst_addr ++ __ move(count, scratch_length); // length ++ __ b(StubRoutines::_jshort_arraycopy); ++ ++ __ bind(L_copy_ints); ++ __ andi(tmp1, elsize, 1); ++ __ bnez(tmp1, L_copy_longs); ++ __ lea(from, Address(src, src_pos, Address::times_4)); // src_addr ++ __ lea(to, Address(dst, dst_pos, Address::times_4)); // dst_addr ++ __ move(count, scratch_length); // length ++ __ b(StubRoutines::_jint_arraycopy); ++ ++ __ bind(L_copy_longs); ++#ifdef ASSERT ++ { ++ __ block_comment("assert long copy {"); ++ Label L; ++ __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> elsize ++ __ li(tmp1, LogBytesPerLong); ++ __ beq(elsize, tmp1, L); ++ __ stop("must be long copy, but elsize is wrong"); ++ __ bind(L); ++ __ block_comment("} assert long copy done"); ++ } ++#endif ++ __ lea(from, Address(src, src_pos, Address::times_8)); // src_addr ++ __ lea(to, Address(dst, dst_pos, Address::times_8)); // dst_addr ++ __ move(count, scratch_length); // length ++ __ b(StubRoutines::_jlong_arraycopy); ++ ++ // ObjArrayKlass ++ __ bind(L_objArray); ++ // live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos] ++ ++ Label L_plain_copy, L_checkcast_copy; ++ // test array classes for subtyping ++ __ load_klass(tmp1, dst); ++ __ bne(scratch_src_klass, tmp1, L_checkcast_copy); // usual case is exact equality ++ ++ // Identically typed arrays can be copied without element-wise checks. ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, SCR2, L_failed); ++ ++ __ lea(from, Address(src, src_pos, Address::ScaleFactor(LogBytesPerHeapOop))); ++ __ addi_d(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ lea(to, Address(dst, dst_pos, Address::ScaleFactor(LogBytesPerHeapOop))); ++ __ addi_d(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ move(count, scratch_length); // length ++ __ bind(L_plain_copy); ++ __ b(StubRoutines::_oop_arraycopy); ++ ++ __ bind(L_checkcast_copy); ++ // live at this point: scratch_src_klass, scratch_length, tmp1 (dst_klass) ++ { ++ // Before looking at dst.length, make sure dst is also an objArray. ++ __ ld_w(SCR1, Address(tmp1, lh_offset)); ++ __ li(SCR2, objArray_lh); ++ __ xorr(SCR1, SCR1, SCR2); ++ __ bnez(SCR1, L_failed); ++ ++ // It is safe to examine both src.length and dst.length. ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, tmp1, L_failed); ++ ++ __ load_klass(dst_klass, dst); // reload ++ ++ // Marshal the base address arguments now, freeing registers. ++ __ lea(from, Address(src, src_pos, Address::ScaleFactor(LogBytesPerHeapOop))); ++ __ addi_d(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ lea(to, Address(dst, dst_pos, Address::ScaleFactor(LogBytesPerHeapOop))); ++ __ addi_d(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ move(count, length); // length (reloaded) ++ Register sco_temp = A3; // this register is free now ++ assert_different_registers(from, to, count, sco_temp, dst_klass, scratch_src_klass); ++ // assert_clean_int(count, sco_temp); ++ ++ // Generate the type check. ++ const int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ __ ld_w(sco_temp, Address(dst_klass, sco_offset)); ++ ++ // Smashes SCR1, SCR2 ++ generate_type_check(scratch_src_klass, sco_temp, dst_klass, tmp1, tmp2, L_plain_copy); ++ ++ // Fetch destination element klass from the ObjArrayKlass header. ++ int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); ++ __ ld_d(dst_klass, Address(dst_klass, ek_offset)); ++ __ ld_w(sco_temp, Address(dst_klass, sco_offset)); ++ ++ // the checkcast_copy loop needs two extra arguments: ++ assert(A3 == sco_temp, "#3 already in place"); ++ // Set up arguments for checkcast_arraycopy. ++ __ move(A4, dst_klass); // dst.klass.element_klass ++ __ b(StubRoutines::_checkcast_arraycopy); ++ } ++ ++ __ bind(L_failed); ++ __ li(V0, -1); ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ void generate_arraycopy_stubs() { ++ Label disjoint_large_copy, conjoint_large_copy; ++ Label disjoint_large_copy_lsx, conjoint_large_copy_lsx; ++ Label disjoint_large_copy_lasx, conjoint_large_copy_lasx; ++ Label byte_small_copy, short_small_copy, int_small_copy, long_small_copy; ++ Label none; ++ ++ generate_disjoint_large_copy(disjoint_large_copy, "disjoint_large_copy"); ++ generate_conjoint_large_copy(conjoint_large_copy, "conjoint_large_copy"); ++ if (UseLSX) { ++ generate_disjoint_large_copy_lsx(disjoint_large_copy_lsx, "disjoint_large_copy_lsx"); ++ generate_conjoint_large_copy_lsx(conjoint_large_copy_lsx, "conjoint_large_copy_lsx"); ++ } ++ if (UseLASX) { ++ generate_disjoint_large_copy_lasx(disjoint_large_copy_lasx, "disjoint_large_copy_lasx"); ++ generate_conjoint_large_copy_lasx(conjoint_large_copy_lasx, "conjoint_large_copy_lasx"); ++ } ++ generate_byte_small_copy(byte_small_copy, "jbyte_small_copy"); ++ generate_short_small_copy(short_small_copy, "jshort_small_copy"); ++ generate_int_small_copy(int_small_copy, "jint_small_copy"); ++ generate_long_small_copy(long_small_copy, "jlong_small_copy"); ++ ++ if (UseCompressedOops) { ++ if (UseLSX) { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "oop_disjoint_arraycopy", 7); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "oop_disjoint_arraycopy_uninit", 7, true); ++ } else { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy", 7); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy_uninit", 7, true); ++ } ++ if (UseLASX) { ++ StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "oop_arraycopy", 9); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "oop_arraycopy_uninit", 9, true); ++ } else if (UseLSX) { ++ StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "oop_arraycopy", 7); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "oop_arraycopy_uninit", 7, true); ++ } else { ++ StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, none, "oop_arraycopy", 7); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, none, "oop_arraycopy_uninit", 7, true); ++ } ++ } else { ++ if (UseLASX) { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lasx, "oop_disjoint_arraycopy", 5); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lasx, "oop_disjoint_arraycopy_uninit", 5, true); ++ StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lasx, "oop_arraycopy", 5); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lasx, "oop_arraycopy_uninit", 5, true); ++ } else if (UseLSX) { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lsx, "oop_disjoint_arraycopy", 4); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lsx, "oop_disjoint_arraycopy_uninit", 4, true); ++ StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "oop_arraycopy", 4); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "oop_arraycopy_uninit", 4, true); ++ } else { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy", 4); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy_uninit", 4, true); ++ StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, none, "oop_arraycopy", 4); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "oop_arraycopy_uninit", 4, true); ++ } ++ } ++ ++ if (UseLASX) { ++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy_lasx, disjoint_large_copy_lsx, "jbyte_disjoint_arraycopy"); ++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy_lasx, disjoint_large_copy, "jshort_disjoint_arraycopy"); ++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy_lasx, disjoint_large_copy, "jint_disjoint_arraycopy", 9); ++ ++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy_lasx, conjoint_large_copy_lsx, "jbyte_arraycopy"); ++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "jshort_arraycopy"); ++ StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "jint_arraycopy", 9); ++ } else if (UseLSX) { ++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy_lsx, none, "jbyte_disjoint_arraycopy"); ++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "jshort_disjoint_arraycopy"); ++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "jint_disjoint_arraycopy", 7); ++ ++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy_lsx, none, "jbyte_arraycopy"); ++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "jshort_arraycopy"); ++ StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "jint_arraycopy", 7); ++ } else { ++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy, none, "jbyte_disjoint_arraycopy"); ++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy, none, "jshort_disjoint_arraycopy"); ++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy, none, "jint_disjoint_arraycopy", 7); ++ ++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy, none, "jbyte_arraycopy"); ++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy, none, "jshort_arraycopy"); ++ StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy, none, "jint_arraycopy", 7); ++ } ++ ++ if (UseLASX) { ++ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, disjoint_large_copy_lasx, "jlong_disjoint_arraycopy", 5); ++ StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, conjoint_large_copy_lasx, "jlong_arraycopy", 5); ++ } else if (UseLSX) { ++ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, disjoint_large_copy_lsx, "jlong_disjoint_arraycopy", 4); ++ StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "jlong_arraycopy", 4); ++ } else { ++ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, none, "jlong_disjoint_arraycopy", 4); ++ StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, none, "jlong_arraycopy", 4); ++ } ++ ++ // We don't generate specialized code for HeapWord-aligned source ++ // arrays, so just use the code we've already generated ++ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; ++ StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; ++ ++ StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; ++ StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; ++ ++ StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; ++ StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; ++ ++ StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; ++ StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; ++ StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; ++ StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; ++ ++ StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy"); ++ StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", true); ++ ++ StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy"); ++ ++ StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy"); ++ ++ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); ++ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); ++ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); ++ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); ++ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); ++ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - source byte array address ++ // A1 - destination byte array address ++ // A2 - K (key) in little endian int array ++ // A3 - r vector byte array address ++ // A4 - input length ++ // ++ // Output: ++ // A0 - input length ++ // ++ address generate_aescrypt_encryptBlock(bool cbc) { ++ static const uint32_t ft_consts[256] = { ++ 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, ++ 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, ++ 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, ++ 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, ++ 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, ++ 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, ++ 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, ++ 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, ++ 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, ++ 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, ++ 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, ++ 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, ++ 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, ++ 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, ++ 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, ++ 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, ++ 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, ++ 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, ++ 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, ++ 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, ++ 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, ++ 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, ++ 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, ++ 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, ++ 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, ++ 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, ++ 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, ++ 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, ++ 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, ++ 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, ++ 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, ++ 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, ++ 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, ++ 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, ++ 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, ++ 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, ++ 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, ++ 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, ++ 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, ++ 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, ++ 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, ++ 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, ++ 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, ++ 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, ++ 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, ++ 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, ++ 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, ++ 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, ++ 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, ++ 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, ++ 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, ++ 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, ++ 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, ++ 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, ++ 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, ++ 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, ++ 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, ++ 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, ++ 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, ++ 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, ++ 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, ++ 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, ++ 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, ++ 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a ++ }; ++ static const uint8_t fsb_consts[256] = { ++ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, ++ 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, ++ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, ++ 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, ++ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, ++ 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, ++ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, ++ 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, ++ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, ++ 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, ++ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, ++ 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, ++ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, ++ 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, ++ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, ++ 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, ++ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, ++ 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, ++ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, ++ 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, ++ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, ++ 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, ++ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, ++ 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, ++ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, ++ 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, ++ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, ++ 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, ++ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, ++ 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, ++ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, ++ 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 ++ }; ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); ++ ++ // Allocate registers ++ Register src = A0; ++ Register dst = A1; ++ Register key = A2; ++ Register rve = A3; ++ Register srclen = A4; ++ Register keylen = T8; ++ Register srcend = A5; ++ Register keyold = A6; ++ Register t0 = A7; ++ Register t1, t2, t3, ftp; ++ Register xa[4] = { T0, T1, T2, T3 }; ++ Register ya[4] = { T4, T5, T6, T7 }; ++ ++ Label loop, tail, done; ++ address start = __ pc(); ++ ++ if (cbc) { ++ t1 = S0; ++ t2 = S1; ++ t3 = S2; ++ ftp = S3; ++ ++ __ beqz(srclen, done); ++ ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_d(S3, SP, 3 * wordSize); ++ __ st_d(S2, SP, 2 * wordSize); ++ __ st_d(S1, SP, 1 * wordSize); ++ __ st_d(S0, SP, 0 * wordSize); ++ ++ __ add_d(srcend, src, srclen); ++ __ move(keyold, key); ++ } else { ++ t1 = A3; ++ t2 = A4; ++ t3 = A5; ++ ftp = A6; ++ } ++ ++ __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ // Round 1 ++ if (cbc) { ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xa[i], rve, 4 * i); ++ } ++ ++ __ bind(loop); ++ ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], src, 4 * i); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ } else { ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xa[i], src, 4 * i); ++ } ++ } ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], key, 4 * i); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ ++ __ li(ftp, (intptr_t)ft_consts); ++ ++ // Round 2 - (N-1) ++ for (int r = 0; r < 14; r++) { ++ Register *xp; ++ Register *yp; ++ ++ if (r & 1) { ++ xp = xa; ++ yp = ya; ++ } else { ++ xp = ya; ++ yp = xa; ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xp[i], key, 4 * (4 * (r + 1) + i)); ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, yp[(i + 3) & 3], 7, 0); ++ __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, yp[(i + 1) & 3], 23, 16); ++ __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24); ++ __ slli_w(t0, t0, 2); ++ __ slli_w(t1, t1, 2); ++ __ slli_w(t2, t2, 2); ++ __ slli_w(t3, t3, 2); ++ __ ldx_w(t0, ftp, t0); ++ __ ldx_w(t1, ftp, t1); ++ __ ldx_w(t2, ftp, t2); ++ __ ldx_w(t3, ftp, t3); ++ __ rotri_w(t0, t0, 24); ++ __ rotri_w(t1, t1, 16); ++ __ rotri_w(t2, t2, 8); ++ __ XOR(xp[i], xp[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xp[i], xp[i], t3); ++ __ XOR(xp[i], xp[i], t0); ++ } ++ ++ if (r == 8) { ++ // AES 128 ++ __ li(t0, 44); ++ __ beq(t0, keylen, tail); ++ } else if (r == 10) { ++ // AES 192 ++ __ li(t0, 52); ++ __ beq(t0, keylen, tail); ++ } ++ } ++ ++ __ bind(tail); ++ __ li(ftp, (intptr_t)fsb_consts); ++ __ alsl_d(key, keylen, key, 2 - 1); ++ ++ // Round N ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, ya[(i + 3) & 3], 7, 0); ++ __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, ya[(i + 1) & 3], 23, 16); ++ __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24); ++ __ ldx_bu(t0, ftp, t0); ++ __ ldx_bu(t1, ftp, t1); ++ __ ldx_bu(t2, ftp, t2); ++ __ ldx_bu(t3, ftp, t3); ++ __ ld_w(xa[i], key, 4 * i - 16); ++ __ slli_w(t1, t1, 8); ++ __ slli_w(t2, t2, 16); ++ __ slli_w(t3, t3, 24); ++ __ XOR(xa[i], xa[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xa[i], xa[i], t3); ++ __ XOR(xa[i], xa[i], t0); ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ st_w(xa[i], dst, 4 * i); ++ } ++ ++ if (cbc) { ++ __ move(key, keyold); ++ __ addi_d(src, src, 16); ++ __ addi_d(dst, dst, 16); ++ __ blt(src, srcend, loop); ++ ++ for (int i = 0; i < 4; i++) { ++ __ st_w(xa[i], rve, 4 * i); ++ } ++ ++ __ ld_d(S3, SP, 3 * wordSize); ++ __ ld_d(S2, SP, 2 * wordSize); ++ __ ld_d(S1, SP, 1 * wordSize); ++ __ ld_d(S0, SP, 0 * wordSize); ++ __ addi_d(SP, SP, 4 * wordSize); ++ ++ __ bind(done); ++ __ move(A0, srclen); ++ } ++ ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ address generate_mulAdd() { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "mulAdd"); ++ ++ address entry = __ pc(); ++ ++ const Register out = A0; ++ const Register in = A1; ++ const Register offset = A2; ++ const Register len = A3; ++ const Register k = A4; ++ ++ __ block_comment("Entry:"); ++ __ mul_add(out, in, offset, len, k); ++ __ jr(RA); ++ ++ return entry; ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - source byte array address ++ // A1 - destination byte array address ++ // A2 - K (key) in little endian int array ++ // A3 - r vector byte array address ++ // A4 - input length ++ // ++ // Output: ++ // A0 - input length ++ // ++ address generate_aescrypt_decryptBlock(bool cbc) { ++ static const uint32_t rt_consts[256] = { ++ 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, ++ 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, ++ 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, ++ 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, ++ 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, ++ 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, ++ 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, ++ 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, ++ 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, ++ 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, ++ 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, ++ 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, ++ 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, ++ 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, ++ 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, ++ 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, ++ 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, ++ 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, ++ 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, ++ 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, ++ 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, ++ 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, ++ 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, ++ 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, ++ 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, ++ 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, ++ 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, ++ 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, ++ 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, ++ 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, ++ 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, ++ 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, ++ 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, ++ 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, ++ 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, ++ 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, ++ 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, ++ 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, ++ 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, ++ 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, ++ 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, ++ 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, ++ 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, ++ 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, ++ 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, ++ 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, ++ 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, ++ 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, ++ 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, ++ 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, ++ 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, ++ 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, ++ 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, ++ 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, ++ 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, ++ 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, ++ 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, ++ 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, ++ 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, ++ 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, ++ 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, ++ 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, ++ 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, ++ 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 ++ }; ++ static const uint8_t rsb_consts[256] = { ++ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, ++ 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, ++ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, ++ 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, ++ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, ++ 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, ++ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, ++ 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, ++ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, ++ 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, ++ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, ++ 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, ++ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, ++ 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, ++ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, ++ 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, ++ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, ++ 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, ++ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, ++ 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, ++ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, ++ 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, ++ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, ++ 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, ++ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, ++ 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, ++ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, ++ 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, ++ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, ++ 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, ++ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, ++ 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d ++ }; ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); ++ ++ // Allocate registers ++ Register src = A0; ++ Register dst = A1; ++ Register key = A2; ++ Register rve = A3; ++ Register srclen = A4; ++ Register keylen = T8; ++ Register srcend = A5; ++ Register t0 = A6; ++ Register t1 = A7; ++ Register t2, t3, rtp, rvp; ++ Register xa[4] = { T0, T1, T2, T3 }; ++ Register ya[4] = { T4, T5, T6, T7 }; ++ ++ Label loop, tail, done; ++ address start = __ pc(); ++ ++ if (cbc) { ++ t2 = S0; ++ t3 = S1; ++ rtp = S2; ++ rvp = S3; ++ ++ __ beqz(srclen, done); ++ ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_d(S3, SP, 3 * wordSize); ++ __ st_d(S2, SP, 2 * wordSize); ++ __ st_d(S1, SP, 1 * wordSize); ++ __ st_d(S0, SP, 0 * wordSize); ++ ++ __ add_d(srcend, src, srclen); ++ __ move(rvp, rve); ++ } else { ++ t2 = A3; ++ t3 = A4; ++ rtp = A5; ++ } ++ ++ __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ __ bind(loop); ++ ++ // Round 1 ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xa[i], src, 4 * i); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], key, 4 * (4 + i)); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ ++ __ li(rtp, (intptr_t)rt_consts); ++ ++ // Round 2 - (N-1) ++ for (int r = 0; r < 14; r++) { ++ Register *xp; ++ Register *yp; ++ ++ if (r & 1) { ++ xp = xa; ++ yp = ya; ++ } else { ++ xp = ya; ++ yp = xa; ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xp[i], key, 4 * (4 * (r + 1) + 4 + i)); ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, yp[(i + 1) & 3], 7, 0); ++ __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, yp[(i + 3) & 3], 23, 16); ++ __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24); ++ __ slli_w(t0, t0, 2); ++ __ slli_w(t1, t1, 2); ++ __ slli_w(t2, t2, 2); ++ __ slli_w(t3, t3, 2); ++ __ ldx_w(t0, rtp, t0); ++ __ ldx_w(t1, rtp, t1); ++ __ ldx_w(t2, rtp, t2); ++ __ ldx_w(t3, rtp, t3); ++ __ rotri_w(t0, t0, 24); ++ __ rotri_w(t1, t1, 16); ++ __ rotri_w(t2, t2, 8); ++ __ XOR(xp[i], xp[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xp[i], xp[i], t3); ++ __ XOR(xp[i], xp[i], t0); ++ } ++ ++ if (r == 8) { ++ // AES 128 ++ __ li(t0, 44); ++ __ beq(t0, keylen, tail); ++ } else if (r == 10) { ++ // AES 192 ++ __ li(t0, 52); ++ __ beq(t0, keylen, tail); ++ } ++ } ++ ++ __ bind(tail); ++ __ li(rtp, (intptr_t)rsb_consts); ++ ++ // Round N ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, ya[(i + 1) & 3], 7, 0); ++ __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, ya[(i + 3) & 3], 23, 16); ++ __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24); ++ __ ldx_bu(t0, rtp, t0); ++ __ ldx_bu(t1, rtp, t1); ++ __ ldx_bu(t2, rtp, t2); ++ __ ldx_bu(t3, rtp, t3); ++ __ ld_w(xa[i], key, 4 * i); ++ __ slli_w(t1, t1, 8); ++ __ slli_w(t2, t2, 16); ++ __ slli_w(t3, t3, 24); ++ __ XOR(xa[i], xa[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xa[i], xa[i], t3); ++ __ XOR(xa[i], xa[i], t0); ++ } ++ ++ if (cbc) { ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], rvp, 4 * i); ++ } ++ } ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ if (cbc) { ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ } ++ for (int i = 0; i < 4; i++) { ++ __ st_w(xa[i], dst, 4 * i); ++ } ++ ++ if (cbc) { ++ __ move(rvp, src); ++ __ addi_d(src, src, 16); ++ __ addi_d(dst, dst, 16); ++ __ blt(src, srcend, loop); ++ ++ __ ld_d(t0, src, -16); ++ __ ld_d(t1, src, -8); ++ __ st_d(t0, rve, 0); ++ __ st_d(t1, rve, 8); ++ ++ __ ld_d(S3, SP, 3 * wordSize); ++ __ ld_d(S2, SP, 2 * wordSize); ++ __ ld_d(S1, SP, 1 * wordSize); ++ __ ld_d(S0, SP, 0 * wordSize); ++ __ addi_d(SP, SP, 4 * wordSize); ++ ++ __ bind(done); ++ __ move(A0, srclen); ++ } ++ ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - byte[] source+offset ++ // A1 - int[] SHA.state ++ // A2 - int offset ++ // A3 - int limit ++ // ++ void generate_sha1_implCompress(const char *name, address &entry, address &entry_mb) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ Label keys, loop; ++ ++ // Keys ++ __ bind(keys); ++ __ emit_int32(0x5a827999); ++ __ emit_int32(0x6ed9eba1); ++ __ emit_int32(0x8f1bbcdc); ++ __ emit_int32(0xca62c1d6); ++ ++ // Allocate registers ++ Register t0 = T5; ++ Register t1 = T6; ++ Register t2 = T7; ++ Register t3 = T8; ++ Register buf = A0; ++ Register state = A1; ++ Register ofs = A2; ++ Register limit = A3; ++ Register ka[4] = { A4, A5, A6, A7 }; ++ Register sa[5] = { T0, T1, T2, T3, T4 }; ++ ++ // Entry ++ entry = __ pc(); ++ __ move(ofs, R0); ++ __ move(limit, R0); ++ ++ // Entry MB ++ entry_mb = __ pc(); ++ ++ // Allocate scratch space ++ __ addi_d(SP, SP, -64); ++ ++ // Load keys ++ __ lipc(t0, keys); ++ __ ld_w(ka[0], t0, 0); ++ __ ld_w(ka[1], t0, 4); ++ __ ld_w(ka[2], t0, 8); ++ __ ld_w(ka[3], t0, 12); ++ ++ __ bind(loop); ++ // Load arguments ++ __ ld_w(sa[0], state, 0); ++ __ ld_w(sa[1], state, 4); ++ __ ld_w(sa[2], state, 8); ++ __ ld_w(sa[3], state, 12); ++ __ ld_w(sa[4], state, 16); ++ ++ // 80 rounds of hashing ++ for (int i = 0; i < 80; i++) { ++ Register a = sa[(5 - (i % 5)) % 5]; ++ Register b = sa[(6 - (i % 5)) % 5]; ++ Register c = sa[(7 - (i % 5)) % 5]; ++ Register d = sa[(8 - (i % 5)) % 5]; ++ Register e = sa[(9 - (i % 5)) % 5]; ++ ++ if (i < 16) { ++ __ ld_w(t0, buf, i * 4); ++ __ revb_2h(t0, t0); ++ __ rotri_w(t0, t0, 16); ++ __ add_w(e, e, t0); ++ __ st_w(t0, SP, i * 4); ++ __ XOR(t0, c, d); ++ __ AND(t0, t0, b); ++ __ XOR(t0, t0, d); ++ } else { ++ __ ld_w(t0, SP, ((i - 3) & 0xF) * 4); ++ __ ld_w(t1, SP, ((i - 8) & 0xF) * 4); ++ __ ld_w(t2, SP, ((i - 14) & 0xF) * 4); ++ __ ld_w(t3, SP, ((i - 16) & 0xF) * 4); ++ __ XOR(t0, t0, t1); ++ __ XOR(t0, t0, t2); ++ __ XOR(t0, t0, t3); ++ __ rotri_w(t0, t0, 31); ++ __ add_w(e, e, t0); ++ __ st_w(t0, SP, (i & 0xF) * 4); ++ ++ if (i < 20) { ++ __ XOR(t0, c, d); ++ __ AND(t0, t0, b); ++ __ XOR(t0, t0, d); ++ } else if (i < 40 || i >= 60) { ++ __ XOR(t0, b, c); ++ __ XOR(t0, t0, d); ++ } else if (i < 60) { ++ __ OR(t0, c, d); ++ __ AND(t0, t0, b); ++ __ AND(t2, c, d); ++ __ OR(t0, t0, t2); ++ } ++ } ++ ++ __ rotri_w(b, b, 2); ++ __ add_w(e, e, t0); ++ __ add_w(e, e, ka[i / 20]); ++ __ rotri_w(t0, a, 27); ++ __ add_w(e, e, t0); ++ } ++ ++ // Save updated state ++ __ ld_w(t0, state, 0); ++ __ ld_w(t1, state, 4); ++ __ ld_w(t2, state, 8); ++ __ ld_w(t3, state, 12); ++ __ add_w(sa[0], sa[0], t0); ++ __ ld_w(t0, state, 16); ++ __ add_w(sa[1], sa[1], t1); ++ __ add_w(sa[2], sa[2], t2); ++ __ add_w(sa[3], sa[3], t3); ++ __ add_w(sa[4], sa[4], t0); ++ __ st_w(sa[0], state, 0); ++ __ st_w(sa[1], state, 4); ++ __ st_w(sa[2], state, 8); ++ __ st_w(sa[3], state, 12); ++ __ st_w(sa[4], state, 16); ++ ++ __ addi_w(ofs, ofs, 64); ++ __ addi_d(buf, buf, 64); ++ __ bge(limit, ofs, loop); ++ __ move(V0, ofs); // return ofs ++ ++ __ addi_d(SP, SP, 64); ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - byte[] source+offset ++ // A1 - int[] SHA.state ++ // A2 - int offset ++ // A3 - int limit ++ // ++ void generate_sha256_implCompress(const char *name, address &entry, address &entry_mb) { ++ static const uint32_t round_consts[64] = { ++ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, ++ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, ++ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, ++ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, ++ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, ++ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, ++ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, ++ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, ++ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, ++ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, ++ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, ++ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, ++ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, ++ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, ++ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, ++ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, ++ }; ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ Label loop; ++ ++ // Allocate registers ++ Register t0 = A4; ++ Register t1 = A5; ++ Register t2 = A6; ++ Register t3 = A7; ++ Register buf = A0; ++ Register state = A1; ++ Register ofs = A2; ++ Register limit = A3; ++ Register kptr = T8; ++ Register sa[8] = { T0, T1, T2, T3, T4, T5, T6, T7 }; ++ ++ // Entry ++ entry = __ pc(); ++ __ move(ofs, R0); ++ __ move(limit, R0); ++ ++ // Entry MB ++ entry_mb = __ pc(); ++ ++ // Allocate scratch space ++ __ addi_d(SP, SP, -64); ++ ++ // Load keys base address ++ __ li(kptr, (intptr_t)round_consts); ++ ++ __ bind(loop); ++ // Load state ++ __ ld_w(sa[0], state, 0); ++ __ ld_w(sa[1], state, 4); ++ __ ld_w(sa[2], state, 8); ++ __ ld_w(sa[3], state, 12); ++ __ ld_w(sa[4], state, 16); ++ __ ld_w(sa[5], state, 20); ++ __ ld_w(sa[6], state, 24); ++ __ ld_w(sa[7], state, 28); ++ ++ // Do 64 rounds of hashing ++ for (int i = 0; i < 64; i++) { ++ Register a = sa[(0 - i) & 7]; ++ Register b = sa[(1 - i) & 7]; ++ Register c = sa[(2 - i) & 7]; ++ Register d = sa[(3 - i) & 7]; ++ Register e = sa[(4 - i) & 7]; ++ Register f = sa[(5 - i) & 7]; ++ Register g = sa[(6 - i) & 7]; ++ Register h = sa[(7 - i) & 7]; ++ ++ if (i < 16) { ++ __ ld_w(t1, buf, i * 4); ++ __ revb_2h(t1, t1); ++ __ rotri_w(t1, t1, 16); ++ } else { ++ __ ld_w(t0, SP, ((i - 15) & 0xF) * 4); ++ __ ld_w(t1, SP, ((i - 16) & 0xF) * 4); ++ __ ld_w(t2, SP, ((i - 7) & 0xF) * 4); ++ __ add_w(t1, t1, t2); ++ __ rotri_w(t2, t0, 18); ++ __ srli_w(t3, t0, 3); ++ __ rotri_w(t0, t0, 7); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ add_w(t1, t1, t0); ++ __ ld_w(t0, SP, ((i - 2) & 0xF) * 4); ++ __ rotri_w(t2, t0, 19); ++ __ srli_w(t3, t0, 10); ++ __ rotri_w(t0, t0, 17); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ add_w(t1, t1, t0); ++ } ++ ++ __ rotri_w(t2, e, 11); ++ __ rotri_w(t3, e, 25); ++ __ rotri_w(t0, e, 6); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ XOR(t2, g, f); ++ __ ld_w(t3, kptr, i * 4); ++ __ AND(t2, t2, e); ++ __ XOR(t2, t2, g); ++ __ add_w(t0, t0, t2); ++ __ add_w(t0, t0, t3); ++ __ add_w(h, h, t1); ++ __ add_w(h, h, t0); ++ __ add_w(d, d, h); ++ __ rotri_w(t2, a, 13); ++ __ rotri_w(t3, a, 22); ++ __ rotri_w(t0, a, 2); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ add_w(h, h, t0); ++ __ OR(t0, c, b); ++ __ AND(t2, c, b); ++ __ AND(t0, t0, a); ++ __ OR(t0, t0, t2); ++ __ add_w(h, h, t0); ++ __ st_w(t1, SP, (i & 0xF) * 4); ++ } ++ ++ // Add to state ++ __ ld_w(t0, state, 0); ++ __ ld_w(t1, state, 4); ++ __ ld_w(t2, state, 8); ++ __ ld_w(t3, state, 12); ++ __ add_w(sa[0], sa[0], t0); ++ __ add_w(sa[1], sa[1], t1); ++ __ add_w(sa[2], sa[2], t2); ++ __ add_w(sa[3], sa[3], t3); ++ __ ld_w(t0, state, 16); ++ __ ld_w(t1, state, 20); ++ __ ld_w(t2, state, 24); ++ __ ld_w(t3, state, 28); ++ __ add_w(sa[4], sa[4], t0); ++ __ add_w(sa[5], sa[5], t1); ++ __ add_w(sa[6], sa[6], t2); ++ __ add_w(sa[7], sa[7], t3); ++ __ st_w(sa[0], state, 0); ++ __ st_w(sa[1], state, 4); ++ __ st_w(sa[2], state, 8); ++ __ st_w(sa[3], state, 12); ++ __ st_w(sa[4], state, 16); ++ __ st_w(sa[5], state, 20); ++ __ st_w(sa[6], state, 24); ++ __ st_w(sa[7], state, 28); ++ ++ __ addi_w(ofs, ofs, 64); ++ __ addi_d(buf, buf, 64); ++ __ bge(limit, ofs, loop); ++ __ move(V0, ofs); // return ofs ++ ++ __ addi_d(SP, SP, 64); ++ __ jr(RA); ++ } ++ ++ // Do NOT delete this node which stands for stub routine placeholder ++ address generate_updateBytesCRC32() { ++ assert(UseCRC32Intrinsics, "need CRC32 instructions support"); ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); ++ ++ address start = __ pc(); ++ ++ const Register crc = A0; // crc ++ const Register buf = A1; // source java byte array address ++ const Register len = A2; // length ++ const Register tmp = A3; ++ ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ kernel_crc32(crc, buf, len, tmp); ++ ++ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ // Do NOT delete this node which stands for stub routine placeholder ++ address generate_updateBytesCRC32C() { ++ assert(UseCRC32CIntrinsics, "need CRC32C instructions support"); ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C"); ++ ++ address start = __ pc(); ++ ++ const Register crc = A0; // crc ++ const Register buf = A1; // source java byte array address ++ const Register len = A2; // length ++ const Register tmp = A3; ++ ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ kernel_crc32c(crc, buf, len, tmp); ++ ++ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ address generate_dsin_dcos(bool isCos) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", isCos ? "libmDcos" : "libmDsin"); ++ address start = __ pc(); ++ __ generate_dsin_dcos(isCos, (address)StubRoutines::la::_npio2_hw, ++ (address)StubRoutines::la::_two_over_pi, ++ (address)StubRoutines::la::_pio2, ++ (address)StubRoutines::la::_dsin_coef, ++ (address)StubRoutines::la::_dcos_coef); ++ return start; ++ } ++ ++ // add a function to implement SafeFetch32 and SafeFetchN ++ void generate_safefetch(const char* name, int size, address* entry, ++ address* fault_pc, address* continuation_pc) { ++ // safefetch signatures: ++ // int SafeFetch32(int* adr, int errValue); ++ // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); ++ // ++ // arguments: ++ // A0 = adr ++ // A1 = errValue ++ // ++ // result: ++ // PPC_RET = *adr or errValue ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ // Entry point, pc or function descriptor. ++ *entry = __ pc(); ++ ++ // Load *adr into A1, may fault. ++ *fault_pc = __ pc(); ++ switch (size) { ++ case 4: ++ // int32_t ++ __ ld_w(A1, A0, 0); ++ break; ++ case 8: ++ // int64_t ++ __ ld_d(A1, A0, 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ // return errValue or *adr ++ *continuation_pc = __ pc(); ++ __ add_d(V0, A1, R0); ++ __ jr(RA); ++ } ++ ++ ++#undef __ ++#define __ masm-> ++ ++ // Continuation point for throwing of implicit exceptions that are ++ // not handled in the current activation. Fabricates an exception ++ // oop and initiates normal exception dispatching in this ++ // frame. Since we need to preserve callee-saved values (currently ++ // only for C2, but done for C1 as well) we need a callee-saved oop ++ // map and therefore have to make these stubs into RuntimeStubs ++ // rather than BufferBlobs. If the compiler needs all registers to ++ // be preserved between the fault point and the exception handler ++ // then it must assume responsibility for that in ++ // AbstractCompiler::continuation_for_implicit_null_exception or ++ // continuation_for_implicit_division_by_zero_exception. All other ++ // implicit exceptions (e.g., NullPointerException or ++ // AbstractMethodError on entry) are either at call sites or ++ // otherwise assume that stack unwinding will be initiated, so ++ // caller saved registers were assumed volatile in the compiler. ++ address generate_throw_exception(const char* name, ++ address runtime_entry, ++ bool restore_saved_exception_pc) { ++ // Information about frame layout at time of blocking runtime call. ++ // Note that we only have to preserve callee-saved registers since ++ // the compilers are responsible for supplying a continuation point ++ // if they expect all registers to be preserved. ++ enum layout { ++ thread_off, // last_java_sp ++ S7_off, // callee saved register sp + 1 ++ S6_off, // callee saved register sp + 2 ++ S5_off, // callee saved register sp + 3 ++ S4_off, // callee saved register sp + 4 ++ S3_off, // callee saved register sp + 5 ++ S2_off, // callee saved register sp + 6 ++ S1_off, // callee saved register sp + 7 ++ S0_off, // callee saved register sp + 8 ++ FP_off, ++ ret_address, ++ framesize ++ }; ++ ++ int insts_size = 2048; ++ int locs_size = 32; ++ ++ // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, ++ // NULL, NULL, NULL, false, NULL, name, false); ++ CodeBuffer code (name , insts_size, locs_size); ++ OopMapSet* oop_maps = new OopMapSet(); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ ++ address start = __ pc(); ++ ++ // This is an inlined and slightly modified version of call_VM ++ // which has the ability to fetch the return PC out of ++ // thread-local storage and also sets up last_Java_sp slightly ++ // differently than the real call_VM ++#ifndef OPT_THREAD ++ Register java_thread = TREG; ++ __ get_thread(java_thread); ++#else ++ Register java_thread = TREG; ++#endif ++ if (restore_saved_exception_pc) { ++ __ ld_d(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ } ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ addi_d(SP, SP, (-1) * (framesize-2) * wordSize); // prolog ++ __ st_d(S0, SP, S0_off * wordSize); ++ __ st_d(S1, SP, S1_off * wordSize); ++ __ st_d(S2, SP, S2_off * wordSize); ++ __ st_d(S3, SP, S3_off * wordSize); ++ __ st_d(S4, SP, S4_off * wordSize); ++ __ st_d(S5, SP, S5_off * wordSize); ++ __ st_d(S6, SP, S6_off * wordSize); ++ __ st_d(S7, SP, S7_off * wordSize); ++ ++ int frame_complete = __ pc() - start; ++ // push java thread (becomes first argument of C function) ++ __ st_d(java_thread, SP, thread_off * wordSize); ++ if (java_thread != A0) ++ __ move(A0, java_thread); ++ ++ // Set up last_Java_sp and last_Java_fp ++ Label before_call; ++ address the_pc = __ pc(); ++ __ bind(before_call); ++ __ set_last_Java_frame(java_thread, SP, FP, before_call); ++ // Align stack ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ // Call runtime ++ // TODO: confirm reloc ++ __ call(runtime_entry, relocInfo::runtime_call_type); ++ // Generate oop map ++ OopMap* map = new OopMap(framesize, 0); ++ oop_maps->add_gc_map(the_pc - start, map); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ __ get_thread(java_thread); ++#endif ++ ++ __ ld_d(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ reset_last_Java_frame(java_thread, true); ++ ++ // Restore callee save registers. This must be done after resetting the Java frame ++ __ ld_d(S0, SP, S0_off * wordSize); ++ __ ld_d(S1, SP, S1_off * wordSize); ++ __ ld_d(S2, SP, S2_off * wordSize); ++ __ ld_d(S3, SP, S3_off * wordSize); ++ __ ld_d(S4, SP, S4_off * wordSize); ++ __ ld_d(S5, SP, S5_off * wordSize); ++ __ ld_d(S6, SP, S6_off * wordSize); ++ __ ld_d(S7, SP, S7_off * wordSize); ++ ++ // discard arguments ++ __ move(SP, FP); // epilog ++ __ pop(FP); ++ // check for pending exceptions ++#ifdef ASSERT ++ Label L; ++ __ ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ should_not_reach_here(); ++ __ bind(L); ++#endif //ASSERT ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, ++ &code, ++ frame_complete, ++ framesize, ++ oop_maps, false); ++ return stub->entry_point(); ++ } ++ ++ class MontgomeryMultiplyGenerator : public MacroAssembler { ++ ++ Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Rlen2, Ra, Rb, Rm, ++ Rn, Iam, Ibn, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj; ++ ++ bool _squaring; ++ ++ public: ++ MontgomeryMultiplyGenerator (Assembler *as, bool squaring) ++ : MacroAssembler(as->code()), _squaring(squaring) { ++ ++ // Register allocation ++ ++ Register reg = A0; ++ Pa_base = reg; // Argument registers: ++ if (squaring) ++ Pb_base = Pa_base; ++ else ++ Pb_base = ++reg; ++ Pn_base = ++reg; ++ Rlen = ++reg; ++ inv = ++reg; ++ Rlen2 = inv; // Reuse inv ++ Pm_base = ++reg; ++ ++ // Working registers: ++ Ra = ++reg; // The current digit of a, b, n, and m. ++ Rb = ++reg; ++ Rm = ++reg; ++ Rn = ++reg; ++ ++ Iam = ++reg; // Index to the current/next digit of a, b, n, and m. ++ Ibn = ++reg; ++ ++ t0 = ++reg; // Three registers which form a ++ t1 = ++reg; // triple-precision accumuator. ++ t2 = ++reg; ++ ++ Ri = ++reg; // Inner and outer loop indexes. ++ Rj = ++reg; ++ ++ if (squaring) { ++ Rhi_ab = ++reg; // Product registers: low and high parts ++ reg = S0; ++ Rlo_ab = ++reg; // of a*b and m*n. ++ } else { ++ reg = S0; ++ Rhi_ab = reg; // Product registers: low and high parts ++ Rlo_ab = ++reg; // of a*b and m*n. ++ } ++ ++ Rhi_mn = ++reg; ++ Rlo_mn = ++reg; ++ } ++ ++ private: ++ void enter() { ++ addi_d(SP, SP, -6 * wordSize); ++ st_d(FP, SP, 0 * wordSize); ++ move(FP, SP); ++ } ++ ++ void leave() { ++ addi_d(T0, FP, 6 * wordSize); ++ ld_d(FP, FP, 0 * wordSize); ++ move(SP, T0); ++ } ++ ++ void save_regs() { ++ if (!_squaring) ++ st_d(Rhi_ab, FP, 5 * wordSize); ++ st_d(Rlo_ab, FP, 4 * wordSize); ++ st_d(Rhi_mn, FP, 3 * wordSize); ++ st_d(Rlo_mn, FP, 2 * wordSize); ++ st_d(Pm_base, FP, 1 * wordSize); ++ } ++ ++ void restore_regs() { ++ if (!_squaring) ++ ld_d(Rhi_ab, FP, 5 * wordSize); ++ ld_d(Rlo_ab, FP, 4 * wordSize); ++ ld_d(Rhi_mn, FP, 3 * wordSize); ++ ld_d(Rlo_mn, FP, 2 * wordSize); ++ ld_d(Pm_base, FP, 1 * wordSize); ++ } ++ ++ template ++ void unroll_2(Register count, T block, Register tmp) { ++ Label loop, end, odd; ++ andi(tmp, count, 1); ++ bnez(tmp, odd); ++ beqz(count, end); ++ align(16); ++ bind(loop); ++ (this->*block)(); ++ bind(odd); ++ (this->*block)(); ++ addi_w(count, count, -2); ++ blt(R0, count, loop); ++ bind(end); ++ } ++ ++ template ++ void unroll_2(Register count, T block, Register d, Register s, Register tmp) { ++ Label loop, end, odd; ++ andi(tmp, count, 1); ++ bnez(tmp, odd); ++ beqz(count, end); ++ align(16); ++ bind(loop); ++ (this->*block)(d, s, tmp); ++ bind(odd); ++ (this->*block)(d, s, tmp); ++ addi_w(count, count, -2); ++ blt(R0, count, loop); ++ bind(end); ++ } ++ ++ void acc(Register Rhi, Register Rlo, ++ Register t0, Register t1, Register t2, Register t, Register c) { ++ add_d(t0, t0, Rlo); ++ OR(t, t1, Rhi); ++ sltu(c, t0, Rlo); ++ add_d(t1, t1, Rhi); ++ add_d(t1, t1, c); ++ sltu(c, t1, t); ++ add_d(t2, t2, c); ++ } ++ ++ void pre1(Register i) { ++ block_comment("pre1"); ++ // Iam = 0; ++ // Ibn = i; ++ ++ slli_w(Ibn, i, LogBytesPerWord); ++ ++ // Ra = Pa_base[Iam]; ++ // Rb = Pb_base[Ibn]; ++ // Rm = Pm_base[Iam]; ++ // Rn = Pn_base[Ibn]; ++ ++ ld_d(Ra, Pa_base, 0); ++ ldx_d(Rb, Pb_base, Ibn); ++ ld_d(Rm, Pm_base, 0); ++ ldx_d(Rn, Pn_base, Ibn); ++ ++ move(Iam, R0); ++ ++ // Zero the m*n result. ++ move(Rhi_mn, R0); ++ move(Rlo_mn, R0); ++ } ++ ++ // The core multiply-accumulate step of a Montgomery ++ // multiplication. The idea is to schedule operations as a ++ // pipeline so that instructions with long latencies (loads and ++ // multiplies) have time to complete before their results are ++ // used. This most benefits in-order implementations of the ++ // architecture but out-of-order ones also benefit. ++ void step() { ++ block_comment("step"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ addi_d(Iam, Iam, wordSize); ++ addi_d(Ibn, Ibn, -wordSize); ++ mul_d(Rlo_ab, Ra, Rb); ++ mulh_du(Rhi_ab, Ra, Rb); ++ acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n from the ++ // previous iteration. ++ ldx_d(Ra, Pa_base, Iam); ++ ldx_d(Rb, Pb_base, Ibn); ++ ++ // MACC(Rm, Rn, t0, t1, t2); ++ // Rm = Pm_base[Iam]; ++ // Rn = Pn_base[Ibn]; ++ mul_d(Rlo_mn, Rm, Rn); ++ mulh_du(Rhi_mn, Rm, Rn); ++ acc(Rhi_ab, Rlo_ab, t0, t1, t2, Rm, Rn); ++ ldx_d(Rm, Pm_base, Iam); ++ ldx_d(Rn, Pn_base, Ibn); ++ } ++ ++ void post1() { ++ block_comment("post1"); ++ ++ // MACC(Ra, Rb, t0, t1, t2); ++ mul_d(Rlo_ab, Ra, Rb); ++ mulh_du(Rhi_ab, Ra, Rb); ++ acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n ++ acc(Rhi_ab, Rlo_ab, t0, t1, t2, Ra, Rb); ++ ++ // Pm_base[Iam] = Rm = t0 * inv; ++ mul_d(Rm, t0, inv); ++ stx_d(Rm, Pm_base, Iam); ++ ++ // MACC(Rm, Rn, t0, t1, t2); ++ // t0 = t1; t1 = t2; t2 = 0; ++ mulh_du(Rhi_mn, Rm, Rn); ++ ++#ifndef PRODUCT ++ // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply"); ++ { ++ mul_d(Rlo_mn, Rm, Rn); ++ add_d(Rlo_mn, t0, Rlo_mn); ++ Label ok; ++ beqz(Rlo_mn, ok); { ++ stop("broken Montgomery multiply"); ++ } bind(ok); ++ } ++#endif ++ ++ // We have very carefully set things up so that ++ // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate ++ // the lower half of Rm * Rn because we know the result already: ++ // it must be -t0. t0 + (-t0) must generate a carry iff ++ // t0 != 0. So, rather than do a mul and an adds we just set ++ // the carry flag iff t0 is nonzero. ++ // ++ // mul_d(Rlo_mn, Rm, Rn); ++ // add_d(t0, t0, Rlo_mn); ++ OR(Ra, t1, Rhi_mn); ++ sltu(Rb, R0, t0); ++ add_d(t0, t1, Rhi_mn); ++ add_d(t0, t0, Rb); ++ sltu(Rb, t0, Ra); ++ add_d(t1, t2, Rb); ++ move(t2, R0); ++ } ++ ++ void pre2(Register i, Register len) { ++ block_comment("pre2"); ++ ++ // Rj == i-len ++ sub_w(Rj, i, len); ++ ++ // Iam = i - len; ++ // Ibn = len; ++ slli_w(Iam, Rj, LogBytesPerWord); ++ slli_w(Ibn, len, LogBytesPerWord); ++ ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ addi_d(Iam, Iam, wordSize); ++ addi_d(Ibn, Ibn, -wordSize); ++ ++ ldx_d(Ra, Pa_base, Iam); ++ ldx_d(Rb, Pb_base, Ibn); ++ ldx_d(Rm, Pm_base, Iam); ++ ldx_d(Rn, Pn_base, Ibn); ++ ++ move(Rhi_mn, R0); ++ move(Rlo_mn, R0); ++ } ++ ++ void post2(Register i, Register len) { ++ block_comment("post2"); ++ ++ sub_w(Rj, i, len); ++ slli_w(Iam, Rj, LogBytesPerWord); ++ ++ add_d(t0, t0, Rlo_mn); // The pending m*n, low part ++ ++ // As soon as we know the least significant digit of our result, ++ // store it. ++ // Pm_base[i-len] = t0; ++ stx_d(t0, Pm_base, Iam); ++ ++ // t0 = t1; t1 = t2; t2 = 0; ++ OR(Ra, t1, Rhi_mn); ++ sltu(Rb, t0, Rlo_mn); ++ add_d(t0, t1, Rhi_mn); // The pending m*n, high part ++ add_d(t0, t0, Rb); ++ sltu(Rb, t0, Ra); ++ add_d(t1, t2, Rb); ++ move(t2, R0); ++ } ++ ++ // A carry in t0 after Montgomery multiplication means that we ++ // should subtract multiples of n from our result in m. We'll ++ // keep doing that until there is no carry. ++ void normalize(Register len) { ++ block_comment("normalize"); ++ // while (t0) ++ // t0 = sub(Pm_base, Pn_base, t0, len); ++ Label loop, post, again; ++ Register cnt = t1, i = t2, b = Ra, t = Rb; // Re-use registers; we're done with them now ++ beqz(t0, post); { ++ bind(again); { ++ move(i, R0); ++ move(b, R0); ++ slli_w(cnt, len, LogBytesPerWord); ++ align(16); ++ bind(loop); { ++ ldx_d(Rm, Pm_base, i); ++ ldx_d(Rn, Pn_base, i); ++ sltu(t, Rm, b); ++ sub_d(Rm, Rm, b); ++ sltu(b, Rm, Rn); ++ sub_d(Rm, Rm, Rn); ++ OR(b, b, t); ++ stx_d(Rm, Pm_base, i); ++ addi_w(i, i, BytesPerWord); ++ } blt(i, cnt, loop); ++ sub_d(t0, t0, b); ++ } bnez(t0, again); ++ } bind(post); ++ } ++ ++ // Move memory at s to d, reversing words. ++ // Increments d to end of copied memory ++ // Destroys tmp1, tmp2, tmp3 ++ // Preserves len ++ // Leaves s pointing to the address which was in d at start ++ void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { ++ assert(tmp1 < S0 && tmp2 < S0, "register corruption"); ++ ++ alsl_d(s, len, s, LogBytesPerWord - 1); ++ move(tmp1, len); ++ unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); ++ slli_w(s, len, LogBytesPerWord); ++ sub_d(s, d, s); ++ } ++ ++ // where ++ void reverse1(Register d, Register s, Register tmp) { ++ ld_d(tmp, s, -wordSize); ++ addi_d(s, s, -wordSize); ++ addi_d(d, d, wordSize); ++ rotri_d(tmp, tmp, 32); ++ st_d(tmp, d, -wordSize); ++ } ++ ++ public: ++ /** ++ * Fast Montgomery multiplication. The derivation of the ++ * algorithm is in A Cryptographic Library for the Motorola ++ * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. ++ * ++ * Arguments: ++ * ++ * Inputs for multiplication: ++ * A0 - int array elements a ++ * A1 - int array elements b ++ * A2 - int array elements n (the modulus) ++ * A3 - int length ++ * A4 - int inv ++ * A5 - int array elements m (the result) ++ * ++ * Inputs for squaring: ++ * A0 - int array elements a ++ * A1 - int array elements n (the modulus) ++ * A2 - int length ++ * A3 - int inv ++ * A4 - int array elements m (the result) ++ * ++ */ ++ address generate_multiply() { ++ Label argh, nothing; ++ bind(argh); ++ stop("MontgomeryMultiply total_allocation must be <= 8192"); ++ ++ align(CodeEntryAlignment); ++ address entry = pc(); ++ ++ beqz(Rlen, nothing); ++ ++ enter(); ++ ++ // Make room. ++ sltui(Ra, Rlen, 513); ++ beqz(Ra, argh); ++ slli_w(Ra, Rlen, exact_log2(4 * sizeof (jint))); ++ sub_d(Ra, SP, Ra); ++ ++ srli_w(Rlen, Rlen, 1); // length in longwords = len/2 ++ ++ { ++ // Copy input args, reversing as we go. We use Ra as a ++ // temporary variable. ++ reverse(Ra, Pa_base, Rlen, t0, t1); ++ if (!_squaring) ++ reverse(Ra, Pb_base, Rlen, t0, t1); ++ reverse(Ra, Pn_base, Rlen, t0, t1); ++ } ++ ++ // Push all call-saved registers and also Pm_base which we'll need ++ // at the end. ++ save_regs(); ++ ++#ifndef PRODUCT ++ // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ { ++ ld_d(Rn, Pn_base, 0); ++ li(t0, -1); ++ mul_d(Rlo_mn, Rn, inv); ++ Label ok; ++ beq(Rlo_mn, t0, ok); { ++ stop("broken inverse in Montgomery multiply"); ++ } bind(ok); ++ } ++#endif ++ ++ move(Pm_base, Ra); ++ ++ move(t0, R0); ++ move(t1, R0); ++ move(t2, R0); ++ ++ block_comment("for (int i = 0; i < len; i++) {"); ++ move(Ri, R0); { ++ Label loop, end; ++ bge(Ri, Rlen, end); ++ ++ bind(loop); ++ pre1(Ri); ++ ++ block_comment(" for (j = i; j; j--) {"); { ++ move(Rj, Ri); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab); ++ } block_comment(" } // j"); ++ ++ post1(); ++ addi_w(Ri, Ri, 1); ++ blt(Ri, Rlen, loop); ++ bind(end); ++ block_comment("} // i"); ++ } ++ ++ block_comment("for (int i = len; i < 2*len; i++) {"); ++ move(Ri, Rlen); ++ slli_w(Rlen2, Rlen, 1); { ++ Label loop, end; ++ bge(Ri, Rlen2, end); ++ ++ bind(loop); ++ pre2(Ri, Rlen); ++ ++ block_comment(" for (j = len*2-i-1; j; j--) {"); { ++ sub_w(Rj, Rlen2, Ri); ++ addi_w(Rj, Rj, -1); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab); ++ } block_comment(" } // j"); ++ ++ post2(Ri, Rlen); ++ addi_w(Ri, Ri, 1); ++ blt(Ri, Rlen2, loop); ++ bind(end); ++ } ++ block_comment("} // i"); ++ ++ normalize(Rlen); ++ ++ move(Ra, Pm_base); // Save Pm_base in Ra ++ restore_regs(); // Restore caller's Pm_base ++ ++ // Copy our result into caller's Pm_base ++ reverse(Pm_base, Ra, Rlen, t0, t1); ++ ++ leave(); ++ bind(nothing); ++ jr(RA); ++ ++ return entry; ++ } ++ // In C, approximately: ++ ++ // void ++ // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[], ++ // unsigned long Pn_base[], unsigned long Pm_base[], ++ // unsigned long inv, int len) { ++ // unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator ++ // unsigned long Ra, Rb, Rn, Rm; ++ // int i, Iam, Ibn; ++ ++ // assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply"); ++ ++ // for (i = 0; i < len; i++) { ++ // int j; ++ ++ // Iam = 0; ++ // Ibn = i; ++ ++ // Ra = Pa_base[Iam]; ++ // Rb = Pb_base[Iam]; ++ // Rm = Pm_base[Ibn]; ++ // Rn = Pn_base[Ibn]; ++ ++ // int iters = i; ++ // for (j = 0; iters--; j++) { ++ // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Ra = Pa_base[++Iam]; ++ // Rb = pb_base[--Ibn]; ++ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); ++ // MACC(Rm, Rn, t0, t1, t2); ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ // } ++ ++ // assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Pm_base[Iam] = Rm = t0 * inv; ++ // assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be"); ++ // MACC(Rm, Rn, t0, t1, t2); ++ ++ // assert(t0 == 0, "broken Montgomery multiply"); ++ ++ // t0 = t1; t1 = t2; t2 = 0; ++ // } ++ ++ // for (i = len; i < 2*len; i++) { ++ // int j; ++ ++ // Iam = i - len; ++ // Ibn = len; ++ ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ ++ // int iters = len*2-i-1; ++ // for (j = i-len+1; iters--; j++) { ++ // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); ++ // MACC(Rm, Rn, t0, t1, t2); ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ // } ++ ++ // Pm_base[i-len] = t0; ++ // t0 = t1; t1 = t2; t2 = 0; ++ // } ++ ++ // while (t0) ++ // t0 = sub(Pm_base, Pn_base, t0, len); ++ // } ++ }; ++ ++ // Initialization ++ void generate_initial() { ++ // Generates all stubs and initializes the entry points ++ ++ //------------------------------------------------------------- ++ //----------------------------------------------------------- ++ // entry points that exist in all platforms ++ // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller ++ // than the disadvantage of having a much more complicated generator structure. ++ // See also comment in stubRoutines.hpp. ++ StubRoutines::_forward_exception_entry = generate_forward_exception(); ++ StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); ++ // is referenced by megamorphic call ++ StubRoutines::_catch_exception_entry = generate_catch_exception(); ++ ++ StubRoutines::_throw_StackOverflowError_entry = ++ generate_throw_exception("StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), ++ false); ++ StubRoutines::_throw_delayed_StackOverflowError_entry = ++ generate_throw_exception("delayed StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError), ++ false); ++ ++ if (UseCRC32Intrinsics) { ++ // set table address before stub generation which use it ++ StubRoutines::_crc_table_adr = (address)StubRoutines::la::_crc_table; ++ StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); ++ } ++ ++ if (UseCRC32CIntrinsics) { ++ StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); ++ } ++ } ++ ++ void generate_all() { ++ // Generates all stubs and initializes the entry points ++ ++ // These entry points require SharedInfo::stack0 to be set up in ++ // non-core builds and need to be relocatable, so they each ++ // fabricate a RuntimeStub internally. ++ StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); ++ ++ StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false); ++ ++ StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); ++ ++ // entry points that are platform specific ++ ++ // support for verify_oop (must happen after universe_init) ++ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); ++#ifndef CORE ++ // arraycopy stubs used by compilers ++ generate_arraycopy_stubs(); ++#endif ++ ++ if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) { ++ StubRoutines::_dsin = generate_dsin_dcos(/* isCos = */ false); ++ } ++ ++ if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { ++ StubRoutines::_dcos = generate_dsin_dcos(/* isCos = */ true); ++ } ++ ++ // Safefetch stubs. ++ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, ++ &StubRoutines::_safefetch32_fault_pc, ++ &StubRoutines::_safefetch32_continuation_pc); ++ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, ++ &StubRoutines::_safefetchN_fault_pc, ++ &StubRoutines::_safefetchN_continuation_pc); ++ ++#ifdef COMPILER2 ++ if (UseMulAddIntrinsic) { ++ StubRoutines::_mulAdd = generate_mulAdd(); ++ } ++ ++ if (UseMontgomeryMultiplyIntrinsic) { ++ StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); ++ MontgomeryMultiplyGenerator g(_masm, false /* squaring */); ++ StubRoutines::_montgomeryMultiply = g.generate_multiply(); ++ } ++ ++ if (UseMontgomerySquareIntrinsic) { ++ StubCodeMark mark(this, "StubRoutines", "montgomerySquare"); ++ MontgomeryMultiplyGenerator g(_masm, true /* squaring */); ++ // We use generate_multiply() rather than generate_square() ++ // because it's faster for the sizes of modulus we care about. ++ StubRoutines::_montgomerySquare = g.generate_multiply(); ++ } ++#endif ++ ++ if (UseAESIntrinsics) { ++ StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(false); ++ StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(false); ++ StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_aescrypt_encryptBlock(true); ++ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_aescrypt_decryptBlock(true); ++ } ++ ++ if (UseSHA1Intrinsics) { ++ generate_sha1_implCompress("sha1_implCompress", StubRoutines::_sha1_implCompress, StubRoutines::_sha1_implCompressMB); ++ } ++ ++ if (UseSHA256Intrinsics) { ++ generate_sha256_implCompress("sha256_implCompress", StubRoutines::_sha256_implCompress, StubRoutines::_sha256_implCompressMB); ++ } ++ } ++ ++ public: ++ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { ++ if (all) { ++ generate_all(); ++ } else { ++ generate_initial(); ++ } ++ } ++}; // end class declaration ++ ++void StubGenerator_generate(CodeBuffer* code, bool all) { ++ StubGenerator g(code, all); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,178 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++ ++// a description of how to extend it, see the stubRoutines.hpp file. ++ ++//find the last fp value ++address StubRoutines::la::_call_stub_compiled_return = NULL; ++ ++/** ++ * crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h ++ */ ++juint StubRoutines::la::_crc_table[] = ++{ ++ 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, ++ 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, ++ 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, ++ 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, ++ 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, ++ 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, ++ 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, ++ 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, ++ 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, ++ 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, ++ 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, ++ 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, ++ 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, ++ 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, ++ 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, ++ 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, ++ 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, ++ 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, ++ 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, ++ 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, ++ 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, ++ 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, ++ 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, ++ 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, ++ 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, ++ 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, ++ 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, ++ 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, ++ 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, ++ 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, ++ 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, ++ 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, ++ 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, ++ 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, ++ 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, ++ 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, ++ 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, ++ 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, ++ 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, ++ 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, ++ 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, ++ 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, ++ 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, ++ 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, ++ 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, ++ 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, ++ 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, ++ 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, ++ 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, ++ 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, ++ 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, ++ 0x2d02ef8dUL ++}; ++ ++ATTRIBUTE_ALIGNED(64) juint StubRoutines::la::_npio2_hw[] = { ++ // first, various coefficient values: 0.5, invpio2, pio2_1, pio2_1t, pio2_2, ++ // pio2_2t, pio2_3, pio2_3t ++ // This is a small optimization wich keeping double[8] values in int[] table ++ // to have less address calculation instructions ++ // ++ // invpio2: 53 bits of 2/pi (enough for cases when trigonometric argument is small) ++ // pio2_1: first 33 bit of pi/2 ++ // pio2_1t: pi/2 - pio2_1 ++ // pio2_2: second 33 bit of pi/2 ++ // pio2_2t: pi/2 - (pio2_1+pio2_2) ++ // pio2_3: third 33 bit of pi/2 ++ // pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) ++ 0x00000000, 0x3fe00000, // 0.5 ++ 0x6DC9C883, 0x3FE45F30, // invpio2 = 6.36619772367581382433e-01 ++ 0x54400000, 0x3FF921FB, // pio2_1 = 1.57079632673412561417e+00 ++ 0x1A626331, 0x3DD0B461, // pio2_1t = 6.07710050650619224932e-11 ++ 0x1A600000, 0x3DD0B461, // pio2_2 = 6.07710050630396597660e-11 ++ 0x2E037073, 0x3BA3198A, // pio2_2t = 2.02226624879595063154e-21 ++ 0x2E000000, 0x3BA3198A, // pio2_3 = 2.02226624871116645580e-21 ++ 0x252049C1, 0x397B839A, // pio2_3t = 8.47842766036889956997e-32 ++ // now, npio2_hw itself ++ 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C, ++ 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C, ++ 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A, ++ 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C, ++ 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB, ++ 0x404858EB, 0x404921FB ++}; ++ ++// Coefficients for sin(x) polynomial approximation: S1..S6. ++// See kernel_sin comments in macroAssembler_loongarch64_trig.cpp for details ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dsin_coef[] = { ++ -1.66666666666666324348e-01, // 0xBFC5555555555549 ++ 8.33333333332248946124e-03, // 0x3F8111111110F8A6 ++ -1.98412698298579493134e-04, // 0xBF2A01A019C161D5 ++ 2.75573137070700676789e-06, // 0x3EC71DE357B1FE7D ++ -2.50507602534068634195e-08, // 0xBE5AE5E68A2B9CEB ++ 1.58969099521155010221e-10 // 0x3DE5D93A5ACFD57C ++}; ++ ++// Coefficients for cos(x) polynomial approximation: C1..C6. ++// See kernel_cos comments in macroAssembler_loongarch64_trig.cpp for details ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dcos_coef[] = { ++ 4.16666666666666019037e-02, // c0x3FA555555555554C ++ -1.38888888888741095749e-03, // 0xBF56C16C16C15177 ++ 2.48015872894767294178e-05, // 0x3EFA01A019CB1590 ++ -2.75573143513906633035e-07, // 0xBE927E4F809C52AD ++ 2.08757232129817482790e-09, // 0x3E21EE9EBDB4B1C4 ++ -1.13596475577881948265e-11 // 0xBDA8FAE9BE8838D4 ++}; ++ ++// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi. ++// Used in cases of very large argument. 396 hex digits is enough to support ++// required precision. ++// Converted to double to avoid unnecessary conversion in code ++// NOTE: table looks like original int table: {0xA2F983, 0x6E4E44,...} with ++// only (double) conversion added ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_two_over_pi[] = { ++ (double)0xA2F983, (double)0x6E4E44, (double)0x1529FC, (double)0x2757D1, (double)0xF534DD, (double)0xC0DB62, ++ (double)0x95993C, (double)0x439041, (double)0xFE5163, (double)0xABDEBB, (double)0xC561B7, (double)0x246E3A, ++ (double)0x424DD2, (double)0xE00649, (double)0x2EEA09, (double)0xD1921C, (double)0xFE1DEB, (double)0x1CB129, ++ (double)0xA73EE8, (double)0x8235F5, (double)0x2EBB44, (double)0x84E99C, (double)0x7026B4, (double)0x5F7E41, ++ (double)0x3991D6, (double)0x398353, (double)0x39F49C, (double)0x845F8B, (double)0xBDF928, (double)0x3B1FF8, ++ (double)0x97FFDE, (double)0x05980F, (double)0xEF2F11, (double)0x8B5A0A, (double)0x6D1F6D, (double)0x367ECF, ++ (double)0x27CB09, (double)0xB74F46, (double)0x3F669E, (double)0x5FEA2D, (double)0x7527BA, (double)0xC7EBE5, ++ (double)0xF17B3D, (double)0x0739F7, (double)0x8A5292, (double)0xEA6BFB, (double)0x5FB11F, (double)0x8D5D08, ++ (double)0x560330, (double)0x46FC7B, (double)0x6BABF0, (double)0xCFBC20, (double)0x9AF436, (double)0x1DA9E3, ++ (double)0x91615E, (double)0xE61B08, (double)0x659985, (double)0x5F14A0, (double)0x68408D, (double)0xFFD880, ++ (double)0x4D7327, (double)0x310606, (double)0x1556CA, (double)0x73A8C9, (double)0x60E27B, (double)0xC08C6B, ++}; ++ ++// Pi over 2 value ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_pio2[] = { ++ 1.57079625129699707031e+00, // 0x3FF921FB40000000 ++ 7.54978941586159635335e-08, // 0x3E74442D00000000 ++ 5.39030252995776476554e-15, // 0x3CF8469880000000 ++ 3.28200341580791294123e-22, // 0x3B78CC5160000000 ++ 1.27065575308067607349e-29, // 0x39F01B8380000000 ++ 1.22933308981111328932e-36, // 0x387A252040000000 ++ 2.73370053816464559624e-44, // 0x36E3822280000000 ++ 2.16741683877804819444e-51, // 0x3569F31D00000000 ++}; +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,67 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP ++#define CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP ++ ++// This file holds the platform specific parts of the StubRoutines ++// definition. See stubRoutines.hpp for a description on how to ++// extend it. ++ ++static bool returns_to_call_stub(address return_pc){ ++ return return_pc == _call_stub_return_address||return_pc == la::get_call_stub_compiled_return(); ++} ++ ++enum platform_dependent_constants { ++ code_size1 = 20000, // simply increase if too small (assembler will crash if too small) ++ code_size2 = 60000 // simply increase if too small (assembler will crash if too small) ++}; ++ ++class la { ++ friend class StubGenerator; ++ friend class VMStructs; ++ private: ++ // If we call compiled code directly from the call stub we will ++ // need to adjust the return back to the call stub to a specialized ++ // piece of code that can handle compiled results and cleaning the fpu ++ // stack. The variable holds that location. ++ static address _call_stub_compiled_return; ++ static juint _crc_table[]; ++ // begin trigonometric tables block. See comments in .cpp file ++ static juint _npio2_hw[]; ++ static jdouble _two_over_pi[]; ++ static jdouble _pio2[]; ++ static jdouble _dsin_coef[]; ++ static jdouble _dcos_coef[]; ++ // end trigonometric tables block ++ ++public: ++ // Call back points for traps in compiled code ++ static address get_call_stub_compiled_return() { return _call_stub_compiled_return; } ++ static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; } ++ ++}; ++ ++#endif // CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,2269 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/templateInterpreterGenerator.hpp" ++#include "interpreter/templateTable.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "runtime/timer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++int TemplateInterpreter::InterpreterCodeSize = 500 * K; ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++address TemplateInterpreterGenerator::generate_slow_signature_handler() { ++ address entry = __ pc(); ++ // Rmethod: method ++ // LVP: pointer to locals ++ // A3: first stack arg ++ __ move(A3, SP); ++ __ addi_d(SP, SP, -18 * wordSize); ++ __ st_d(RA, SP, 0); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::slow_signature_handler), ++ Rmethod, LVP, A3); ++ ++ // V0: result handler ++ ++ // Stack layout: ++ // ... ++ // 18 stack arg0 <--- old sp ++ // 17 floatReg arg7 ++ // ... ++ // 10 floatReg arg0 ++ // 9 float/double identifiers ++ // 8 IntReg arg7 ++ // ... ++ // 2 IntReg arg1 ++ // 1 aligned slot ++ // SP: 0 return address ++ ++ // Do FP first so we can use A3 as temp ++ __ ld_d(A3, Address(SP, 9 * wordSize)); // float/double identifiers ++ ++ for (int i= 0; i < Argument::n_float_register_parameters; i++) { ++ FloatRegister floatreg = as_FloatRegister(i + FA0->encoding()); ++ Label isdouble, done; ++ ++ __ andi(AT, A3, 1 << i); ++ __ bnez(AT, isdouble); ++ __ fld_s(floatreg, SP, (10 + i) * wordSize); ++ __ b(done); ++ __ bind(isdouble); ++ __ fld_d(floatreg, SP, (10 + i) * wordSize); ++ __ bind(done); ++ } ++ ++ // A0 is for env. ++ // If the mothed is not static, A1 will be corrected in generate_native_entry. ++ for (int i= 1; i < Argument::n_register_parameters; i++) { ++ Register reg = as_Register(i + A0->encoding()); ++ __ ld_d(reg, SP, (1 + i) * wordSize); ++ } ++ ++ // A0/V0 contains the result from the call of ++ // InterpreterRuntime::slow_signature_handler so we don't touch it ++ // here. It will be loaded with the JNIEnv* later. ++ __ ld_d(RA, SP, 0); ++ __ addi_d(SP, SP, 18 * wordSize); ++ __ jr(RA); ++ return entry; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.update(int crc, int b) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_update_entry() { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ ++ // rmethod: Method* ++ // Rsender: senderSP must preserved for slow path ++ // SP: args ++ ++ Label slow_path; ++ // If we need a safepoint check, generate full interpreter entry. ++ __ li(AT, SafepointSynchronize::_not_synchronized); ++ __ li(T8, (long)SafepointSynchronize::address_of_state()); ++ __ bne(T8, AT, slow_path); ++ ++ // We don't generate local frame and don't align stack because ++ // we call stub code and there is no safepoint on this path. ++ ++ const Register crc = A0; // crc ++ const Register val = A1; // source java byte value ++ const Register tbl = A2; // scratch ++ ++ // Arguments are reversed on java expression stack ++ __ ld_w(val, SP, 0); // byte value ++ __ ld_w(crc, SP, wordSize); // Initial CRC ++ ++ __ li(tbl, (long)StubRoutines::crc_table_addr()); ++ ++ __ nor(crc, crc, R0); // ~crc ++ __ update_byte_crc32(crc, val, tbl); ++ __ nor(crc, crc, R0); // ~crc ++ ++ // restore caller SP ++ __ move(SP, Rsender); ++ __ jr(RA); ++ ++ // generate a vanilla native entry as the slow path ++ __ bind(slow_path); ++ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); ++ return entry; ++ } ++ return NULL; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) ++ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ ++ // rmethod: Method* ++ // Rsender: senderSP must preserved for slow path ++ // SP: args ++ ++ Label slow_path; ++ // If we need a safepoint check, generate full interpreter entry. ++ __ li(AT, SafepointSynchronize::_not_synchronized); ++ __ li(T8, (long)SafepointSynchronize::address_of_state()); ++ __ bne(T8, AT, slow_path); ++ ++ // We don't generate local frame and don't align stack because ++ // we call stub code and there is no safepoint on this path. ++ ++ const Register crc = A0; // crc ++ const Register buf = A1; // source java byte array address ++ const Register len = A2; // length ++ const Register tmp = A3; ++ ++ const Register off = len; // offset (never overlaps with 'len') ++ ++ // Arguments are reversed on java expression stack ++ // Calculate address of start element ++ __ ld_w(off, SP, wordSize); // int offset ++ __ ld_d(buf, SP, 2 * wordSize); // byte[] buf | long buf ++ __ add_d(buf, buf, off); // + offset ++ if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { ++ __ ld_w(crc, SP, 4 * wordSize); // long crc ++ } else { ++ __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size ++ __ ld_w(crc, SP, 3 * wordSize); // long crc ++ } ++ ++ // Can now load 'len' since we're finished with 'off' ++ __ ld_w(len, SP, 0); // length ++ ++ __ kernel_crc32(crc, buf, len, tmp); ++ ++ // restore caller SP ++ __ move(SP, Rsender); ++ __ jr(RA); ++ ++ // generate a vanilla native entry as the slow path ++ __ bind(slow_path); ++ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); ++ return entry; ++ } ++ return NULL; ++} ++ ++/** ++ * Method entry for intrinsic-candidate (non-native) methods: ++ * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) ++ * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end) ++ * Unlike CRC32, CRC32C does not have any methods marked as native ++ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses ++ */ ++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ if (UseCRC32CIntrinsics) { ++ address entry = __ pc(); ++ ++ const Register crc = A0; // initial crc ++ const Register buf = A1; // source java byte array address ++ const Register len = A2; // len argument to the kernel ++ const Register tmp = A3; ++ ++ const Register end = len; // index of last element to process ++ const Register off = crc; // offset ++ ++ __ ld_w(end, SP, 0); // int end ++ __ ld_w(off, SP, wordSize); // int offset ++ __ sub_w(len, end, off); // calculate length ++ __ ld_d(buf, SP, 2 * wordSize); // byte[] buf | long buf ++ __ add_d(buf, buf, off); // + offset ++ if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { ++ __ ld_w(crc, SP, 4 * wordSize); // int crc ++ } else { ++ __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size ++ __ ld_w(crc, SP, 3 * wordSize); // int crc ++ } ++ ++ __ kernel_crc32c(crc, buf, len, tmp); ++ ++ // restore caller SP ++ __ move(SP, Rsender); ++ __ jr(RA); ++ ++ return entry; ++ } ++ return NULL; ++} ++ ++// ++// Various method entries ++// ++ ++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { ++ if (!InlineIntrinsics) return NULL; // Generate a vanilla entry ++ ++ // These don't need a safepoint check because they aren't virtually ++ // callable. We won't enter these intrinsics from compiled code. ++ // If in the future we added an intrinsic which was virtually callable ++ // we'd have to worry about how to safepoint so that this code is used. ++ ++ // mathematical functions inlined by compiler ++ // (interpreter must provide identical implementation ++ // in order to avoid monotonicity bugs when switching ++ // from interpreter to compiler in the middle of some ++ // computation) ++ // ++ // stack: ++ // [ arg ] <-- sp ++ // [ arg ] ++ // retaddr in ra ++ ++ address entry_point = NULL; ++ switch (kind) { ++ case Interpreter::java_lang_math_abs: ++ entry_point = __ pc(); ++ __ fld_d(FA0, SP, 0); ++ __ fabs_d(F0, FA0); ++ __ move(SP, Rsender); ++ break; ++ case Interpreter::java_lang_math_sqrt: ++ entry_point = __ pc(); ++ __ fld_d(FA0, SP, 0); ++ __ fsqrt_d(F0, FA0); ++ __ move(SP, Rsender); ++ break; ++ case Interpreter::java_lang_math_sin : ++ case Interpreter::java_lang_math_cos : ++ case Interpreter::java_lang_math_tan : ++ case Interpreter::java_lang_math_log : ++ case Interpreter::java_lang_math_log10 : ++ case Interpreter::java_lang_math_exp : ++ entry_point = __ pc(); ++ __ fld_d(FA0, SP, 0); ++ __ move(SP, Rsender); ++ __ movgr2fr_d(FS0, RA); ++ __ movgr2fr_d(FS1, SP); ++ __ bstrins_d(SP, R0, exact_log2(StackAlignmentInBytes) - 1, 0); ++ generate_transcendental_entry(kind, 1); ++ __ movfr2gr_d(SP, FS1); ++ __ movfr2gr_d(RA, FS0); ++ break; ++ case Interpreter::java_lang_math_pow : ++ entry_point = __ pc(); ++ __ fld_d(FA0, SP, 2 * Interpreter::stackElementSize); ++ __ fld_d(FA1, SP, 0); ++ __ move(SP, Rsender); ++ __ movgr2fr_d(FS0, RA); ++ __ movgr2fr_d(FS1, SP); ++ __ bstrins_d(SP, R0, exact_log2(StackAlignmentInBytes) - 1, 0); ++ generate_transcendental_entry(kind, 2); ++ __ movfr2gr_d(SP, FS1); ++ __ movfr2gr_d(RA, FS0); ++ break; ++ case Interpreter::java_lang_math_fmaD : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ fld_d(FA0, SP, 4 * Interpreter::stackElementSize); ++ __ fld_d(FA1, SP, 2 * Interpreter::stackElementSize); ++ __ fld_d(FA2, SP, 0); ++ __ fmadd_d(F0, FA0, FA1, FA2); ++ __ move(SP, Rsender); ++ } ++ break; ++ case Interpreter::java_lang_math_fmaF : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ fld_s(FA0, SP, 2 * Interpreter::stackElementSize); ++ __ fld_s(FA1, SP, Interpreter::stackElementSize); ++ __ fld_s(FA2, SP, 0); ++ __ fmadd_s(F0, FA0, FA1, FA2); ++ __ move(SP, Rsender); ++ } ++ break; ++ default: ++ ; ++ } ++ if (entry_point) { ++ __ jr(RA); ++ } ++ ++ return entry_point; ++} ++ ++ // double trigonometrics and transcendentals ++ // static jdouble dsin(jdouble x); ++ // static jdouble dcos(jdouble x); ++ // static jdouble dtan(jdouble x); ++ // static jdouble dlog(jdouble x); ++ // static jdouble dlog10(jdouble x); ++ // static jdouble dexp(jdouble x); ++ // static jdouble dpow(jdouble x, jdouble y); ++ ++void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) { ++ address fn; ++ switch (kind) { ++ case Interpreter::java_lang_math_sin : ++ if (StubRoutines::dsin() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin()); ++ } ++ break; ++ case Interpreter::java_lang_math_cos : ++ if (StubRoutines::dcos() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos()); ++ } ++ break; ++ case Interpreter::java_lang_math_tan : ++ if (StubRoutines::dtan() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan()); ++ } ++ break; ++ case Interpreter::java_lang_math_log : ++ if (StubRoutines::dlog() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog()); ++ } ++ break; ++ case Interpreter::java_lang_math_log10 : ++ if (StubRoutines::dlog10() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10()); ++ } ++ break; ++ case Interpreter::java_lang_math_exp : ++ if (StubRoutines::dexp() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp()); ++ } ++ break; ++ case Interpreter::java_lang_math_pow : ++ if (StubRoutines::dpow() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow()); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ fn = NULL; // unreachable ++ } ++ __ li(T4, fn); ++ __ jalr(T4); ++} ++ ++// Abstract method entry ++// Attempt to execute abstract method. Throw exception ++address TemplateInterpreterGenerator::generate_abstract_entry(void) { ++ ++ // Rmethod: methodOop ++ // V0: receiver (unused) ++ // Rsender : sender 's sp ++ address entry_point = __ pc(); ++ ++ // abstract method entry ++ // throw exception ++ // adjust stack to what a normal return would do ++ __ empty_expression_stack(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ return entry_point; ++} ++ ++ ++const int method_offset = frame::interpreter_frame_method_offset * wordSize; ++const int bci_offset = frame::interpreter_frame_bcp_offset * wordSize; ++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; ++ ++//----------------------------------------------------------------------------- ++ ++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { ++ address entry = __ pc(); ++ ++#ifdef ASSERT ++ { ++ Label L; ++ __ addi_d(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ sub_d(T1, T1, SP); // T1 = maximal sp for current fp ++ __ bge(T1, R0, L); // check if frame is complete ++ __ stop("interpreter frame not set up"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // Restore bcp under the assumption that the current frame is still ++ // interpreted ++ __ restore_bcp(); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // throw exception ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { ++ address entry = __ pc(); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // ??? convention: expect array in register A1 ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ClassCastException_handler() { ++ address entry = __ pc(); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), FSR); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_exception_handler_common( ++ const char* name, const char* message, bool pass_oop) { ++ assert(!pass_oop || message == NULL, "either oop or message but not both"); ++ address entry = __ pc(); ++ ++ // expression stack must be empty before entering the VM if an exception happened ++ __ empty_expression_stack(); ++ // setup parameters ++ __ li(A1, (long)name); ++ if (pass_oop) { ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR); ++ } else { ++ __ li(A2, (long)message); ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2); ++ } ++ // throw exception ++ __ jmp(Interpreter::throw_exception_entry(), relocInfo::none); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { ++ ++ address entry = __ pc(); ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ // Restore stack bottom in case i2c adjusted stack ++ __ ld_d(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); ++ // and NULL it as marker that sp is now tos until next java call ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ __ restore_bcp(); ++ __ restore_locals(); ++ ++ // mdp: T8 ++ // ret: FSR ++ // tmp: T4 ++ if (state == atos) { ++ Register mdp = T8; ++ Register tmp = T4; ++ __ profile_return_type(mdp, FSR, tmp); ++ } ++ ++ ++ const Register cache = T4; ++ const Register index = T3; ++ __ get_cache_and_index_at_bcp(cache, index, 1, index_size); ++ ++ const Register flags = cache; ++ __ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ __ ld_w(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask); ++ __ alsl_d(SP, flags, SP, Interpreter::logStackElementSize - 1); ++ ++ Register java_thread; ++#ifndef OPT_THREAD ++ java_thread = T4; ++ __ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ ++ __ check_and_handle_popframe(java_thread); ++ __ check_and_handle_earlyret(java_thread); ++ ++ __ dispatch_next(state, step); ++ ++ return entry; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, ++ int step, ++ address continuation) { ++ address entry = __ pc(); ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ // NULL last_sp until next java call ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ restore_bcp(); ++ __ restore_locals(); ++ ++#if INCLUDE_JVMCI ++ // Check if we need to take lock at entry of synchronized method. This can ++ // only occur on method entry so emit it only for vtos with step 0. ++ if (EnableJVMCI && state == vtos && step == 0) { ++ Label L; ++ __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset())); ++ __ beqz(AT, L); ++ // Clear flag. ++ __ st_b(R0, Address(TREG, JavaThread::pending_monitorenter_offset())); ++ // Take lock. ++ lock_method(); ++ __ bind(L); ++ } else { ++#ifdef ASSERT ++ if (EnableJVMCI) { ++ Label L; ++ __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset())); ++ __ beqz(AT, L); ++ __ stop("unexpected pending monitor in deopt entry"); ++ __ bind(L); ++ } ++#endif ++ } ++#endif ++ ++ // handle exceptions ++ { ++ Label L; ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ if (continuation == NULL) { ++ __ dispatch_next(state, step); ++ } else { ++ __ jump_to_entry(continuation); ++ } ++ return entry; ++} ++ ++int AbstractInterpreter::BasicType_as_index(BasicType type) { ++ int i = 0; ++ switch (type) { ++ case T_BOOLEAN: i = 0; break; ++ case T_CHAR : i = 1; break; ++ case T_BYTE : i = 2; break; ++ case T_SHORT : i = 3; break; ++ case T_INT : // fall through ++ case T_LONG : // fall through ++ case T_VOID : i = 4; break; ++ case T_FLOAT : i = 5; break; ++ case T_DOUBLE : i = 6; break; ++ case T_OBJECT : // fall through ++ case T_ARRAY : i = 7; break; ++ default : ShouldNotReachHere(); ++ } ++ assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, ++ "index out of bounds"); ++ return i; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_result_handler_for( ++ BasicType type) { ++ address entry = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ bstrpick_d(V0, V0, 15, 0); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : /* nothing to do */ break; ++ case T_FLOAT : /* nothing to do */ break; ++ case T_DOUBLE : /* nothing to do */ break; ++ case T_OBJECT : ++ { ++ __ ld_d(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ verify_oop(V0); // and verify it ++ } ++ break; ++ default : ShouldNotReachHere(); ++ } ++ __ jr(RA); // return from result handler ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_safept_entry_for( ++ TosState state, ++ address runtime_entry) { ++ address entry = __ pc(); ++ __ push(state); ++ __ call_VM(noreg, runtime_entry); ++ __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); ++ return entry; ++} ++ ++ ++ ++// Helpers for commoning out cases in the various type of method entries. ++// ++ ++ ++// increment invocation count & check for overflow ++// ++// Note: checking for negative value instead of overflow ++// so we have a 'sticky' overflow test ++// ++// prerequisites : method in T0, invocation counter in T3 ++void TemplateInterpreterGenerator::generate_counter_incr( ++ Label* overflow, ++ Label* profile_method, ++ Label* profile_method_continue) { ++ Label done; ++ // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. ++ if (TieredCompilation) { ++ int increment = InvocationCounter::count_increment; ++ int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift; ++ Label no_mdo; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld_d(FSR, Address(Rmethod, Method::method_data_offset())); ++ __ beqz(FSR, no_mdo); ++ // Increment counter in the MDO ++ const Address mdo_invocation_counter(FSR, in_bytes(MethodData::invocation_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); ++ __ b(done); ++ } ++ __ bind(no_mdo); ++ // Increment counter in MethodCounters ++ const Address invocation_counter(FSR, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ __ get_method_counters(Rmethod, FSR, done); ++ __ increment_mask_and_jump(invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); ++ __ bind(done); ++ } else { // not TieredCompilation ++ const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset()) ++ + in_bytes(InvocationCounter::counter_offset())); ++ const Address backedge_counter (FSR, in_bytes(MethodCounters::backedge_counter_offset()) ++ + in_bytes(InvocationCounter::counter_offset())); ++ ++ __ get_method_counters(Rmethod, FSR, done); ++ ++ if (ProfileInterpreter) { // %%% Merge this into methodDataOop ++ __ ld_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); ++ __ addi_d(T4, T4, 1); ++ __ st_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); ++ } ++ // Update standard invocation counters ++ __ ld_w(T3, invocation_counter); ++ __ increment(T3, InvocationCounter::count_increment); ++ __ st_w(T3, invocation_counter); // save invocation count ++ ++ __ ld_w(FSR, backedge_counter); // load backedge counter ++ __ li(AT, InvocationCounter::count_mask_value); // mask out the status bits ++ __ andr(FSR, FSR, AT); ++ ++ __ add_d(T3, T3, FSR); // add both counters ++ ++ if (ProfileInterpreter && profile_method != NULL) { ++ // Test to see if we should create a method data oop ++ if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) { ++ __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit); ++ __ bne_far(AT, R0, *profile_method_continue); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); ++ __ ld_w(AT, AT, 0); ++ __ blt_far(T3, AT, *profile_method_continue, true /* signed */); ++ } ++ ++ // if no method data exists, go to profile_method ++ __ test_method_data_pointer(FSR, *profile_method); ++ } ++ ++ if (Assembler::is_simm(CompileThreshold, 12)) { ++ __ srli_w(AT, T3, InvocationCounter::count_shift); ++ __ slti(AT, AT, CompileThreshold); ++ __ beq_far(AT, R0, *overflow); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit); ++ __ ld_w(AT, AT, 0); ++ __ bge_far(T3, AT, *overflow, true /* signed */); ++ } ++ ++ __ bind(done); ++ } ++} ++ ++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { ++ ++ // Asm interpreter on entry ++ // S7 - locals ++ // S0 - bcp ++ // Rmethod - method ++ // FP - interpreter frame ++ ++ // On return (i.e. jump to entry_point) ++ // Rmethod - method ++ // RA - return address of interpreter caller ++ // tos - the last parameter to Java method ++ // SP - sender_sp ++ ++ // the bcp is valid if and only if it's not null ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), R0); ++ __ ld_d(Rmethod, FP, method_offset); ++ // Preserve invariant that S0/S7 contain bcp/locals of sender frame ++ __ b_far(do_continue); ++} ++ ++// See if we've got enough room on the stack for locals plus overhead. ++// The expression stack grows down incrementally, so the normal guard ++// page mechanism will work for that. ++// ++// NOTE: Since the additional locals are also always pushed (wasn't ++// obvious in generate_method_entry) so the guard should work for them ++// too. ++// ++// Args: ++// T2: number of additional locals this frame needs (what we must check) ++// T0: Method* ++// ++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { ++ // see if we've got enough room on the stack for locals plus overhead. ++ // the expression stack grows down incrementally, so the normal guard ++ // page mechanism will work for that. ++ // ++ // Registers live on entry: ++ // ++ // T0: Method* ++ // T2: number of additional locals this frame needs (what we must check) ++ ++ // NOTE: since the additional locals are also always pushed (wasn't obvious in ++ // generate_method_entry) so the guard should work for them too. ++ // ++ ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++ // total overhead size: entry_size + (saved fp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize) ++ + entry_size; ++ ++ const int page_size = os::vm_page_size(); ++ Label after_frame_check; ++ ++ // see if the frame is greater than one page in size. If so, ++ // then we need to verify there is enough stack space remaining ++ // for the additional locals. ++ __ li(AT, (page_size - overhead_size) / Interpreter::stackElementSize); ++ __ bge(AT, T2, after_frame_check); ++ ++ // compute sp as if this were going to be the last frame on ++ // the stack before the red zone ++#ifndef OPT_THREAD ++ Register thread = T1; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ // locals + overhead, in bytes ++ __ slli_d(T3, T2, Interpreter::logStackElementSize); ++ __ addi_d(T3, T3, overhead_size); // locals * 4 + overhead_size --> T3 ++ ++#ifdef ASSERT ++ Label stack_base_okay, stack_size_okay; ++ // verify that thread stack base is non-zero ++ __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); ++ __ bne(AT, R0, stack_base_okay); ++ __ stop("stack base is zero"); ++ __ bind(stack_base_okay); ++ // verify that thread stack size is non-zero ++ __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset())); ++ __ bne(AT, R0, stack_size_okay); ++ __ stop("stack size is zero"); ++ __ bind(stack_size_okay); ++#endif ++ ++ // Add stack base to locals and subtract stack size ++ __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT ++ __ add_d(T3, T3, AT); // locals * 4 + overhead_size + stack_base--> T3 ++ __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset())); // stack_size --> AT ++ __ sub_d(T3, T3, AT); // locals * 4 + overhead_size + stack_base - stack_size --> T3 ++ ++ // Use the bigger size for banging. ++ const int max_bang_size = (int)MAX2(JavaThread::stack_shadow_zone_size(), JavaThread::stack_guard_zone_size()); ++ ++ // add in the redzone and yellow size ++ __ li(AT, max_bang_size); ++ __ add_d(T3, T3, AT); ++ ++ // check against the current stack bottom ++ __ blt(T3, SP, after_frame_check); ++ ++ // Note: the restored frame is not necessarily interpreted. ++ // Use the shared runtime version of the StackOverflowError. ++ __ move(SP, Rsender); ++ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); ++ __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type); ++ ++ // all done with frame size check ++ __ bind(after_frame_check); ++} ++ ++// Allocate monitor and lock method (asm interpreter) ++// Rmethod - Method* ++void TemplateInterpreterGenerator::lock_method(void) { ++ // synchronize method ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T0, T0, JVM_ACC_SYNCHRONIZED); ++ __ bne(T0, R0, L); ++ __ stop("method doesn't need synchronization"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // get synchronization object ++ { ++ Label done; ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, T0, JVM_ACC_STATIC); ++ __ ld_d(T0, LVP, Interpreter::local_offset_in_bytes(0)); ++ __ beq(T2, R0, done); ++ __ load_mirror(T0, Rmethod, T4); ++ __ bind(done); ++ } ++ // add space for monitor & lock ++ __ addi_d(SP, SP, (-1) * entry_size); // add space for a monitor entry ++ __ st_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // set new monitor block top ++ __ st_d(T0, SP, BasicObjectLock::obj_offset_in_bytes()); // store object ++ // FIXME: I do not know what lock_object will do and what it will need ++ __ move(c_rarg0, SP); // object address ++ __ lock_object(c_rarg0); ++} ++ ++// Generate a fixed interpreter frame. This is identical setup for ++// interpreted methods and for native methods hence the shared code. ++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { ++ ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- T0(sender's sp) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // initialize fixed part of activation frame ++ // sender's sp in Rsender ++ int i = 0; ++ int frame_size = 10; ++#ifndef CORE ++ ++frame_size; ++#endif ++ __ addi_d(SP, SP, (-frame_size) * wordSize); ++ __ st_d(RA, SP, (frame_size - 1) * wordSize); // save return address ++ __ st_d(FP, SP, (frame_size - 2) * wordSize); // save sender's fp ++ __ addi_d(FP, SP, (frame_size - 2) * wordSize); ++ __ st_d(Rsender, FP, (-++i) * wordSize); // save sender's sp ++ __ st_d(R0, FP,(-++i) * wordSize); //save last_sp as null ++ __ st_d(LVP, FP, (-++i) * wordSize); // save locals offset ++ __ ld_d(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop ++ __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase ++ __ st_d(Rmethod, FP, (-++i) * wordSize); // save Method* ++ // Get mirror and store it in the frame as GC root for this Method* ++ __ load_mirror(T2, Rmethod, T4); ++ __ st_d(T2, FP, (-++i) * wordSize); // Mirror ++#ifndef CORE ++ if (ProfileInterpreter) { ++ Label method_data_continue; ++ __ ld_d(AT, Rmethod, in_bytes(Method::method_data_offset())); ++ __ beq(AT, R0, method_data_continue); ++ __ addi_d(AT, AT, in_bytes(MethodData::data_offset())); ++ __ bind(method_data_continue); ++ __ st_d(AT, FP, (-++i) * wordSize); ++ } else { ++ __ st_d(R0, FP, (-++i) * wordSize); ++ } ++#endif // !CORE ++ ++ __ ld_d(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_d(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld_d(T2, T2, ConstantPool::cache_offset_in_bytes()); ++ __ st_d(T2, FP, (-++i) * wordSize); // set constant pool cache ++ if (native_call) { ++ __ st_d(R0, FP, (-++i) * wordSize); // no bcp ++ } else { ++ __ st_d(BCP, FP, (-++i) * wordSize); // set bcp ++ } ++ __ st_d(SP, FP, (-++i) * wordSize); // reserve word for pointer to expression stack bottom ++ assert(i + 2 == frame_size, "i + 2 should be equal to frame_size"); ++} ++ ++// End of helpers ++ ++// Various method entries ++//------------------------------------------------------------------------------------------------------------------------ ++// ++// ++ ++// Method entry for java.lang.ref.Reference.get. ++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { ++ // Code: _aload_0, _getfield, _areturn ++ // parameter size = 1 ++ // ++ // The code that gets generated by this routine is split into 2 parts: ++ // 1. The "intrinsified" code for G1 (or any SATB based GC), ++ // 2. The slow path - which is an expansion of the regular method entry. ++ // ++ // Notes:- ++ // * In the G1 code we do not check whether we need to block for ++ // a safepoint. If G1 is enabled then we must execute the specialized ++ // code for Reference.get (except when the Reference object is null) ++ // so that we can log the value in the referent field with an SATB ++ // update buffer. ++ // If the code for the getfield template is modified so that the ++ // G1 pre-barrier code is executed when the current method is ++ // Reference.get() then going through the normal method entry ++ // will be fine. ++ // * The G1 code can, however, check the receiver object (the instance ++ // of java.lang.Reference) and jump to the slow path if null. If the ++ // Reference object is null then we obviously cannot fetch the referent ++ // and so we don't need to call the G1 pre-barrier. Thus we can use the ++ // regular method entry code to generate the NPE. ++ // ++ // This code is based on generate_accessor_entry. ++ // ++ // Rmethod: Method* ++ // Rsender: senderSP must preserve for slow path, set SP to it on fast path ++ // RA is live. It must be saved around calls. ++ ++ address entry = __ pc(); ++ ++ const int referent_offset = java_lang_ref_Reference::referent_offset; ++ ++ Label slow_path; ++ const Register local_0 = A0; ++ // Check if local 0 != NULL ++ // If the receiver is null then it is OK to jump to the slow path. ++ __ ld_d(local_0, Address(SP, 0)); ++ __ beqz(local_0, slow_path); ++ ++ // Load the value of the referent field. ++ const Address field_address(local_0, referent_offset); ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ T4, /*tmp2*/ noreg); ++ ++ // areturn ++ __ move(SP, Rsender); ++ __ jr(RA); ++ ++ // generate a vanilla interpreter entry as the slow path ++ __ bind(slow_path); ++ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); ++ return entry; ++} ++ ++// Interpreter stub for calling a native method. (asm interpreter) ++// This sets up a somewhat different looking stack for calling the ++// native method than the typical interpreter frame setup. ++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ // Rsender: sender's sp ++ // Rmethod: Method* ++ address entry_point = __ pc(); ++ ++#ifndef CORE ++ const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset())); ++#endif ++ // get parameter size (always needed) ++ // the size in the java stack ++ __ ld_d(V0, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_hu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // native calls don't need the stack size check since they have no expression stack ++ // and the arguments are already on the stack and we only add a handful of words ++ // to the stack ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ ++ // for natives the size of locals is zero ++ ++ // compute beginning of parameters (S7) ++ __ slli_d(LVP, V0, Address::times_8); ++ __ addi_d(LVP, LVP, (-1) * wordSize); ++ __ add_d(LVP, LVP, SP); ++ ++ ++ // add 2 zero-initialized slots for native calls ++ // 1 slot for native oop temp offset (setup via runtime) ++ // 1 slot for static native result handler3 (setup via runtime) ++ __ push2(R0, R0); ++ ++ // Layout of frame at this point ++ // [ method holder mirror ] <--- sp ++ // [ result type info ] ++ // [ argument word n-1 ] <--- T0 ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++#ifndef CORE ++ if (inc_counter) __ ld_w(T3, invocation_counter); // (pre-)fetch invocation count ++#endif ++ ++ // initialize fixed part of activation frame ++ generate_fixed_frame(true); ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- sender's sp ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ ++ // make sure method is native & not abstract ++#ifdef ASSERT ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_NATIVE); ++ __ bne(AT, R0, L); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_ABSTRACT); ++ __ beq(AT, R0, L); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ li(AT, (int)true); ++ __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, NULL, NULL); ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++#endif // CORE ++ ++ bang_stack_shadow_pages(true); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ if (synchronized) { ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, T0, JVM_ACC_SYNCHRONIZED); ++ __ beq(AT, R0, L); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // after method_lock, the layout of frame is as following ++ // ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ stop("broken stack frame setup in interpreter in asm"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ // work registers ++ const Register method = Rmethod; ++ const Register t = T8; ++ ++ __ get_method(method); ++ { ++ Label L, Lstatic; ++ __ ld_d(t,method,in_bytes(Method::const_offset())); ++ __ ld_hu(t, t, in_bytes(ConstMethod::size_of_parameters_offset())); ++ // LoongArch ABI: caller does not reserve space for the register auguments. ++ // A0 and A1(if needed) ++ __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, AT, JVM_ACC_STATIC); ++ __ beq(AT, R0, Lstatic); ++ __ addi_d(t, t, 1); ++ __ bind(Lstatic); ++ __ addi_d(t, t, -7); ++ __ bge(R0, t, L); ++ __ slli_d(t, t, Address::times_8); ++ __ sub_d(SP, SP, t); ++ __ bind(L); ++ } ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ __ move(AT, SP); ++ // [ ] <--- sp ++ // ... (size of parameters - 8 ) ++ // [ monitor entry ] ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // get signature handler ++ { ++ Label L; ++ __ ld_d(T4, method, in_bytes(Method::signature_handler_offset())); ++ __ bne(T4, R0, L); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld_d(T4, method, in_bytes(Method::signature_handler_offset())); ++ __ bind(L); ++ } ++ ++ // call signature handler ++ // FIXME: when change codes in InterpreterRuntime, note this point ++ // from: begin of parameters ++ assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code"); ++ // to: current sp ++ assert(InterpreterRuntime::SignatureHandlerGenerator::to () == SP, "adjust this code"); ++ // temp: T3 ++ assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t , "adjust this code"); ++ ++ __ jalr(T4); ++ __ get_method(method); ++ ++ // ++ // if native function is static, and its second parameter has type length of double word, ++ // and first parameter has type length of word, we have to reserve one word ++ // for the first parameter, according to LoongArch abi. ++ // if native function is not static, and its third parameter has type length of double word, ++ // and second parameter has type length of word, we have to reserve one word for the second ++ // parameter. ++ // ++ ++ ++ // result handler is in V0 ++ // set result handler ++ __ st_d(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize); ++ ++#define FIRSTPARA_SHIFT_COUNT 5 ++#define SECONDPARA_SHIFT_COUNT 9 ++#define THIRDPARA_SHIFT_COUNT 13 ++#define PARA_MASK 0xf ++ ++ // pass mirror handle if static call ++ { ++ Label L; ++ __ ld_w(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, t, JVM_ACC_STATIC); ++ __ beq(AT, R0, L); ++ ++ // get mirror ++ __ load_mirror(t, method, T4); ++ // copy mirror into activation frame ++ __ st_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ // pass handle to mirror ++ __ addi_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ move(A1, t); ++ __ bind(L); ++ } ++ ++ // [ mthd holder mirror ptr ] <--- sp --------------------| (only for static method) ++ // [ ] | ++ // ... size of parameters(or +1) | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Mirror ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // get native function entry point ++ { Label L; ++ __ ld_d(T4, method, in_bytes(Method::native_function_offset())); ++ __ li(T6, SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); ++ __ bne(T6, T4, L); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld_d(T4, method, in_bytes(Method::native_function_offset())); ++ __ bind(L); ++ } ++ ++ // pass JNIEnv ++ // native function in T4 ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ addi_d(t, thread, in_bytes(JavaThread::jni_environment_offset())); ++ __ move(A0, t); ++ // [ jni environment ] <--- sp ++ // [ mthd holder mirror ptr ] ---------------------------->| (only for static method) ++ // [ ] | ++ // ... size of parameters | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Mirror ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // Set the last Java PC in the frame anchor to be the return address from ++ // the call to the native method: this will allow the debugger to ++ // generate an accurate stack trace. ++ Label native_return; ++ __ set_last_Java_frame(thread, SP, FP, native_return); ++ ++ // change thread state ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ addi_d(t, t, (-1) * _thread_in_Java); ++ __ beq(t, R0, L); ++ __ stop("Wrong thread state in native stub"); ++ __ bind(L); ++ } ++#endif ++ ++ __ li(t, _thread_in_native); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ // call native method ++ __ jalr(T4); ++ __ bind(native_return); ++ // result potentially in V0 or F0 ++ ++ ++ // via _last_native_pc and not via _last_jave_sp ++ // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. ++ // If the order changes or anything else is added to the stack the code in ++ // interpreter_frame_result will have to be changed. ++ //FIXME, should modify here ++ // save return value to keep the value from being destroyed by other calls ++ __ push(dtos); ++ __ push(ltos); ++ ++ // change thread state ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ li(t, _thread_in_native_trans); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) { ++ if (UseMembar) { ++ // Force this write out before the read below ++ __ membar(__ AnyAny); ++ } else { ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(thread, A0); ++ } ++ } ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { Label Continue; ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are ++ // preserved and correspond to the bcp/locals pointers. So we do a runtime call ++ // by hand. ++ // ++ Label slow_path; ++ ++ __ safepoint_poll_acquire(slow_path, thread); ++ __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ bind(slow_path); ++ __ move(A0, thread); ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), ++ relocInfo::runtime_call_type); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ li(t, _thread_in_Java); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ reset_last_Java_frame(thread, true); ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ st_d(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ // reset handle block ++ __ ld_d(t, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ st_w(R0, t, JNIHandleBlock::top_offset_in_bytes()); ++ ++ // If result was an oop then unbox and save it in the frame ++ { ++ Label no_oop; ++ __ ld_d(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize); ++ __ li(T0, AbstractInterpreter::result_handler(T_OBJECT)); ++ __ bne(AT, T0, no_oop); ++ __ pop(ltos); ++ // Unbox oop result, e.g. JNIHandles::resolve value. ++ __ resolve_jobject(V0, thread, T4); ++ __ st_d(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize); ++ // keep stack depth as expected by pushing oop which will eventually be discarded ++ __ push(ltos); ++ __ bind(no_oop); ++ } ++ { ++ Label no_reguard; ++ __ ld_w(t, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ li(AT, (u1)JavaThread::stack_guard_yellow_reserved_disabled); ++ __ bne(t, AT, no_reguard); ++ __ pushad(); ++ __ move(S5_heapbase, SP); ++ __ li(AT, -StackAlignmentInBytes); ++ __ andr(SP, SP, AT); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type); ++ __ move(SP, S5_heapbase); ++ __ popad(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(no_reguard); ++ } ++ // restore BCP to have legal interpreter frame, ++ // i.e., bci == 0 <=> BCP == code_base() ++ // Can't call_VM until bcp is within reasonable. ++ __ get_method(method); // method is junk from thread_in_native to now. ++ __ ld_d(BCP, method, in_bytes(Method::const_offset())); ++ __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset()))); ++ // handle exceptions (exception handling will handle unlocking!) ++ { ++ Label L; ++ __ ld_d(t, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(t, R0, L); ++ // Note: At some point we may want to unify this with the code used in ++ // call_VM_base(); ++ // i.e., we should use the StubRoutines::forward_exception code. For now this ++ // doesn't work here because the sp is not correctly set at this point. ++ __ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ ++ // do unlocking if necessary ++ { ++ Label L; ++ __ ld_w(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(t, t, JVM_ACC_SYNCHRONIZED); ++ __ addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); ++ __ beq(t, R0, L); ++ // the code below should be shared with interpreter macro assembler implementation ++ { ++ Label unlock; ++ // BasicObjectLock will be first in list, ++ // since this is a synchronized method. However, need ++ // to check that the object has not been unlocked by ++ // an explicit monitorexit bytecode. ++ // address of first monitor ++ ++ __ ld_d(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ bne(t, R0, unlock); ++ ++ // Entry already unlocked, need to throw exception ++ __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ __ bind(unlock); ++ __ unlock_object(c_rarg0); ++ } ++ __ bind(L); ++ } ++ ++ // jvmti/jvmpi support ++ // Note: This must happen _after_ handling/throwing any exceptions since ++ // the exception handler code notifies the runtime of method exits ++ // too. If this happens before, method entry/exit notifications are ++ // not properly paired (was bug - gri 11/22/99). ++ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); ++ ++ // restore potential result in V0, ++ // call result handler to restore potential result in ST0 & handle result ++ ++ __ pop(ltos); ++ __ pop(dtos); ++ ++ __ ld_d(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize); ++ __ jalr(t); ++ ++ ++ // remove activation ++ __ ld_d(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp ++ __ ld_d(RA, FP, frame::java_frame_return_addr_offset * wordSize); // get return address ++ __ ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp ++ __ jr(RA); ++ ++#ifndef CORE ++ if (inc_counter) { ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ // entry_point is the beginning of this ++ // function and checks again for compiled code ++ } ++#endif ++ return entry_point; ++} ++ ++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { ++ // Quick & dirty stack overflow checking: bang the stack & handle trap. ++ // Note that we do the banging after the frame is setup, since the exception ++ // handling code expects to find a valid interpreter frame on the stack. ++ // Doing the banging earlier fails if the caller frame is not an interpreter ++ // frame. ++ // (Also, the exception throwing code expects to unlock any synchronized ++ // method receiever, so do the banging after locking the receiver.) ++ ++ // Bang each page in the shadow zone. We can't assume it's been done for ++ // an interpreter frame with greater than a page of locals, so each page ++ // needs to be checked. Only true for non-native. ++ if (UseStackBanging) { ++ const int page_size = os::vm_page_size(); ++ const int n_shadow_pages = ((int)JavaThread::stack_shadow_zone_size()) / page_size; ++ const int start_page = native_call ? n_shadow_pages : 1; ++ BLOCK_COMMENT("bang_stack_shadow_pages:"); ++ for (int pages = start_page; pages <= n_shadow_pages; pages++) { ++ __ bang_stack_with_offset(pages*page_size); ++ } ++ } ++} ++ ++// ++// Generic interpreted method entry to (asm) interpreter ++// ++// Layout of frame just at the entry ++// ++// [ argument word n-1 ] <--- sp ++// ... ++// [ argument word 0 ] ++// assume Method* in Rmethod before call this method. ++// prerequisites to the generated stub : the callee Method* in Rmethod ++// note you must save the caller bcp before call the generated stub ++// ++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ ++ // Rmethod: Method* ++ // Rsender: sender 's sp ++ address entry_point = __ pc(); ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ const Address invocation_counter(Rmethod, ++ in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset())); ++ ++ // get parameter size (always needed) ++ __ ld_d(T3, Rmethod, in_bytes(Method::const_offset())); //T3 --> Rmethod._constMethod ++ __ ld_hu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i ++ // get size of locals in words to T2 ++ __ ld_hu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset())); ++ // T2 = no. of additional locals, locals include parameters ++ __ sub_d(T2, T2, V0); ++ ++ // see if we've got enough room on the stack for locals plus overhead. ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ generate_stack_overflow_check(); ++ // after this function, the layout of frame does not change ++ ++ // compute beginning of parameters (LVP) ++ __ slli_d(LVP, V0, LogBytesPerWord); ++ __ addi_d(LVP, LVP, (-1) * wordSize); ++ __ add_d(LVP, LVP, SP); ++ ++ // T2 - # of additional locals ++ // allocate space for locals ++ // explicitly initialize locals ++ { ++ Label exit, loop; ++ __ beq(T2, R0, exit); ++ ++ __ bind(loop); ++ __ addi_d(SP, SP, (-1) * wordSize); ++ __ addi_d(T2, T2, -1); // until everything initialized ++ __ st_d(R0, SP, 0); // initialize local variables ++ __ bne(T2, R0, loop); ++ ++ __ bind(exit); ++ } ++ ++ // ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argument word n-1 ] <--- T0? ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // initialize fixed part of activation frame ++ ++ generate_fixed_frame(false); ++ ++ ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] <--- fp ++ // [ return address ] ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // make sure method is not native & not abstract ++#ifdef ASSERT ++ __ ld_d(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_NATIVE); ++ __ beq(T2, R0, L); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_ABSTRACT); ++ __ beq(T2, R0, L); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ li(AT, (int)true); ++ __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ ++ // mdp : T8 ++ // tmp1: T4 ++ // tmp2: T2 ++ __ profile_parameters_type(T8, T4, T2); ++ ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ Label profile_method; ++ Label profile_method_continue; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, ++ &profile_method, ++ &profile_method_continue); ++ if (ProfileInterpreter) { ++ __ bind(profile_method_continue); ++ } ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++ ++#endif // CORE ++ ++ bang_stack_shadow_pages(false); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ // ++ if (synchronized) { ++ // Allocate monitor and lock method ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { Label L; ++ __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, AT, JVM_ACC_SYNCHRONIZED); ++ __ beq(T2, R0, L); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // layout of frame after lock_method ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ stop("broken stack frame setup in interpreter in native"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ __ dispatch_next(vtos); ++ ++ // invocation counter overflow ++ if (inc_counter) { ++ if (ProfileInterpreter) { ++ // We have decided to profile this method in the interpreter ++ __ bind(profile_method); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ __ get_method(Rmethod); ++ __ b(profile_method_continue); ++ } ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ } ++ ++ return entry_point; ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateInterpreterGenerator::generate_throw_exception() { ++ // Entry point in previous activation (i.e., if the caller was ++ // interpreted) ++ Interpreter::_rethrow_exception_entry = __ pc(); ++ // Restore sp to interpreter_frame_last_sp even though we are going ++ // to empty the expression stack for the exception processing. ++ __ st_d(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ // V0: exception ++ // V1: return address/pc that threw exception ++ __ restore_bcp(); // BCP points to call/send ++ __ restore_locals(); ++ ++ //add for compressedoops ++ __ reinit_heapbase(); ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ // Entry point for exceptions thrown within interpreter code ++ Interpreter::_throw_exception_entry = __ pc(); ++ // expression stack is undefined here ++ // V0: exception ++ // BCP: exception bcp ++ __ verify_oop(V0); ++ ++ // expression stack must be empty before entering the VM in case of an exception ++ __ empty_expression_stack(); ++ // find exception handler address and preserve exception oop ++ __ move(A1, V0); ++ __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1); ++ // V0: exception handler entry point ++ // V1: preserved exception oop ++ // S0: bcp for exception handler ++ __ push(V1); // push exception which is now the only value on the stack ++ __ jr(V0); // jump to exception handler (may be _remove_activation_entry!) ++ ++ // If the exception is not handled in the current frame the frame is removed and ++ // the exception is rethrown (i.e. exception continuation is _rethrow_exception). ++ // ++ // Note: At this point the bci is still the bxi for the instruction which caused ++ // the exception and the expression stack is empty. Thus, for any VM calls ++ // at this point, GC will find a legal oop map (with empty expression stack). ++ ++ // In current activation ++ // V0: exception ++ // BCP: exception bcp ++ ++ // ++ // JVMTI PopFrame support ++ // ++ ++ Interpreter::_remove_activation_preserving_args_entry = __ pc(); ++ __ empty_expression_stack(); ++ // Set the popframe_processing bit in pending_popframe_condition indicating that we are ++ // currently handling popframe, so that call_VMs that may happen later do not trigger new ++ // popframe handling cycles. ++#ifndef OPT_THREAD ++ Register thread = T2; ++ __ get_thread(T2); ++#else ++ Register thread = TREG; ++#endif ++ __ ld_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ __ ori(T3, T3, JavaThread::popframe_processing_bit); ++ __ st_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#ifndef CORE ++ { ++ // Check to see whether we are returning to a deoptimized frame. ++ // (The PopFrame call ensures that the caller of the popped frame is ++ // either interpreted or compiled and deoptimizes it if compiled.) ++ // In this case, we can't call dispatch_next() after the frame is ++ // popped, but instead must save the incoming arguments and restore ++ // them after deoptimization has occurred. ++ // ++ // Note that we don't compare the return PC against the ++ // deoptimization blob's unpack entry because of the presence of ++ // adapter frames in C2. ++ Label caller_not_deoptimized; ++ __ ld_d(A0, FP, frame::java_frame_return_addr_offset * wordSize); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0); ++ __ bne(V0, R0, caller_not_deoptimized); ++ ++ // Compute size of arguments for saving when returning to deoptimized caller ++ __ get_method(A1); ++ __ verify_oop(A1); ++ __ ld_d(A1, A1, in_bytes(Method::const_offset())); ++ __ ld_hu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset())); ++ __ shl(A1, Interpreter::logStackElementSize); ++ __ restore_locals(); ++ __ sub_d(A2, LVP, A1); ++ __ addi_d(A2, A2, wordSize); ++ // Save these arguments ++#ifndef OPT_THREAD ++ __ get_thread(A0); ++#else ++ __ move(A0, TREG); ++#endif ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2); ++ ++ __ remove_activation(vtos, T4, false, false, false); ++ ++ // Inform deoptimization that it is responsible for restoring these arguments ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ li(AT, JavaThread::popframe_force_deopt_reexecution_bit); ++ __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ // Continue in deoptimization handler ++ __ jr(T4); ++ ++ __ bind(caller_not_deoptimized); ++ } ++#endif /* !CORE */ ++ ++ __ remove_activation(vtos, T3, ++ /* throw_monitor_exception */ false, ++ /* install_monitor_exception */ false, ++ /* notify_jvmdi */ false); ++ ++ // Clear the popframe condition flag ++ // Finish with popframe handling ++ // A previous I2C followed by a deoptimization might have moved the ++ // outgoing arguments further up the stack. PopFrame expects the ++ // mutations to those outgoing arguments to be preserved and other ++ // constraints basically require this frame to look exactly as ++ // though it had previously invoked an interpreted activation with ++ // no space between the top of the expression stack (current ++ // last_sp) and the top of stack. Rather than force deopt to ++ // maintain this kind of invariant all the time we call a small ++ // fixup routine to move the mutated arguments onto the top of our ++ // expression stack if necessary. ++ __ move(T8, SP); ++ __ ld_d(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // PC must point into interpreter here ++ Label L; ++ __ bind(L); ++ __ set_last_Java_frame(thread, noreg, FP, L); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2); ++ __ reset_last_Java_frame(thread, true); ++ // Restore the last_sp and null it out ++ __ ld_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ ++ ++ __ li(AT, JavaThread::popframe_inactive); ++ __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++ // Finish with popframe handling ++ __ restore_bcp(); ++ __ restore_locals(); ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++#ifndef CORE ++ // The method data pointer was incremented already during ++ // call profiling. We have to restore the mdp for the current bcp. ++ if (ProfileInterpreter) { ++ __ set_method_data_pointer_for_bcp(); ++ } ++#endif // !CORE ++ // Clear the popframe condition flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ li(AT, JavaThread::popframe_inactive); ++ __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#if INCLUDE_JVMTI ++ { ++ Label L_done; ++ ++ __ ld_bu(AT, BCP, 0); ++ __ addi_d(AT, AT, -1 * Bytecodes::_invokestatic); ++ __ bne(AT, R0, L_done); ++ ++ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. ++ // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. ++ ++ __ get_method(T4); ++ __ ld_d(T8, LVP, 0); ++ __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T4, BCP); ++ ++ __ beq(T8, R0, L_done); ++ ++ __ st_d(T8, SP, 0); ++ __ bind(L_done); ++ } ++#endif // INCLUDE_JVMTI ++ ++ __ dispatch_next(vtos); ++ // end of PopFrame support ++ ++ Interpreter::_remove_activation_entry = __ pc(); ++ ++ // preserve exception over this code sequence ++ __ pop(T0); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_d(T0, thread, in_bytes(JavaThread::vm_result_offset())); ++ // remove the activation (without doing throws on illegalMonitorExceptions) ++ __ remove_activation(vtos, T3, false, true, false); ++ // restore exception ++ __ get_vm_result(T0, thread); ++ __ verify_oop(T0); ++ ++ // In between activations - previous activation type unknown yet ++ // compute continuation point - the continuation point expects ++ // the following registers set up: ++ // ++ // T0: exception ++ // T1: return address/pc that threw exception ++ // SP: expression stack of caller ++ // FP: fp of caller ++ __ push2(T0, T3); // save exception and return address ++ __ move(A1, T3); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T4, V0); // save exception handler ++ __ pop2(V0, V1); // restore return address and exception ++ ++ // Note that an "issuing PC" is actually the next PC after the call ++ __ jr(T4); // jump to exception handler of caller ++} ++ ++ ++// ++// JVMTI ForceEarlyReturn support ++// ++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { ++ address entry = __ pc(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ load_earlyret_value(state); ++ ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ ld_ptr(T4, TREG, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address cond_addr(T4, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ // Clear the earlyret state ++ __ li(AT, JvmtiThreadState::earlyret_inactive); ++ __ st_w(AT, cond_addr); ++ __ membar(__ AnyAny);//no membar here for aarch64 ++ ++ ++ __ remove_activation(state, T0, ++ false, /* throw_monitor_exception */ ++ false, /* install_monitor_exception */ ++ true); /* notify_jvmdi */ ++ __ membar(__ AnyAny); ++ __ jr(T0); ++ ++ return entry; ++} // end of ForceEarlyReturn support ++ ++ ++//----------------------------------------------------------------------------- ++// Helper for vtos entry point generation ++ ++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, ++ address& bep, ++ address& cep, ++ address& sep, ++ address& aep, ++ address& iep, ++ address& lep, ++ address& fep, ++ address& dep, ++ address& vep) { ++ assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); ++ Label L; ++ fep = __ pc(); __ push(ftos); __ b(L); ++ dep = __ pc(); __ push(dtos); __ b(L); ++ lep = __ pc(); __ push(ltos); __ b(L); ++ aep =__ pc(); __ push(atos); __ b(L); ++ bep = cep = sep = ++ iep = __ pc(); __ push(itos); ++ vep = __ pc(); ++ __ bind(L); ++ generate_and_dispatch(t); ++} ++ ++//----------------------------------------------------------------------------- ++ ++// Non-product code ++#ifndef PRODUCT ++address TemplateInterpreterGenerator::generate_trace_code(TosState state) { ++ address entry = __ pc(); ++ ++ // prepare expression stack ++ __ push(state); // save tosca ++ ++ // tos & tos2 ++ // trace_bytecode need actually 4 args, the last two is tos&tos2 ++ // this work fine for x86. but LA ABI calling convention will store A2-A3 ++ // to the stack position it think is the tos&tos2 ++ // when the expression stack have no more than 2 data, error occur. ++ __ ld_d(A2, SP, 0); ++ __ ld_d(A3, SP, 1 * wordSize); ++ ++ // pass arguments & call tracer ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3); ++ __ move(RA, V0); // make sure return address is not destroyed by pop(state) ++ ++ // restore expression stack ++ __ pop(state); // restore tosca ++ ++ // return ++ __ jr(RA); ++ return entry; ++} ++ ++void TemplateInterpreterGenerator::count_bytecode() { ++ __ li(T8, (long)&BytecodeCounter::_counter_value); ++ __ ld_w(AT, T8, 0); ++ __ addi_d(AT, AT, 1); ++ __ st_w(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ++ __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]); ++ __ ld_w(AT, T8, 0); ++ __ addi_d(AT, AT, 1); ++ __ st_w(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ ld_w(T4, T8, 0); ++ __ srli_d(T4, T4, BytecodePairHistogram::log2_number_of_codes); ++ __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); ++ __ orr(T4, T4, T8); ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ st_w(T4, T8, 0); ++ __ slli_d(T4, T4, 2); ++ __ li(T8, (long)BytecodePairHistogram::_counters); ++ __ add_d(T8, T8, T4); ++ __ ld_w(AT, T8, 0); ++ __ addi_d(AT, AT, 1); ++ __ st_w(AT, T8, 0); ++} ++ ++ ++void TemplateInterpreterGenerator::trace_bytecode(Template* t) { ++ // Call a little run-time stub to avoid blow-up for each bytecode. ++ // The run-time runtime saves the right registers, depending on ++ // the tosca in-state for the given template. ++ address entry = Interpreter::trace_code(t->tos_in()); ++ assert(entry != NULL, "entry must have been generated"); ++ __ call(entry, relocInfo::none); ++ //add for compressedoops ++ __ reinit_heapbase(); ++} ++ ++ ++void TemplateInterpreterGenerator::stop_interpreter_at() { ++ Label L; ++ __ li(T8, long(&BytecodeCounter::_counter_value)); ++ __ ld_w(T8, T8, 0); ++ __ li(AT, StopInterpreterAt); ++ __ bne(T8, AT, L); ++ __ brk(5); ++ __ bind(L); ++} ++#endif // !PRODUCT +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,4115 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/templateTable.hpp" ++#include "memory/universe.hpp" ++#include "oops/methodData.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "utilities/macros.hpp" ++ ++ ++#ifndef CC_INTERP ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T6 RT6 ++#define T8 RT8 ++ ++// Platform-dependent initialization ++ ++void TemplateTable::pd_initialize() { ++ // No LoongArch specific initialization ++} ++ ++// Address computation: local variables ++ ++static inline Address iaddress(int n) { ++ return Address(LVP, Interpreter::local_offset_in_bytes(n)); ++} ++ ++static inline Address laddress(int n) { ++ return iaddress(n + 1); ++} ++ ++static inline Address faddress(int n) { ++ return iaddress(n); ++} ++ ++static inline Address daddress(int n) { ++ return laddress(n); ++} ++ ++static inline Address aaddress(int n) { ++ return iaddress(n); ++} ++static inline Address haddress(int n) { return iaddress(n + 0); } ++ ++ ++static inline Address at_sp() { return Address(SP, 0); } ++static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); } ++static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); } ++ ++// At top of Java expression stack which may be different than sp(). ++// It isn't for category 1 objects. ++static inline Address at_tos () { ++ Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0)); ++ return tos; ++} ++ ++static inline Address at_tos_p1() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(1)); ++} ++ ++static inline Address at_tos_p2() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(2)); ++} ++ ++static inline Address at_tos_p3() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(3)); ++} ++ ++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator ++Address TemplateTable::at_bcp(int offset) { ++ assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); ++ return Address(BCP, offset); ++} ++ ++// Miscelaneous helper routines ++// Store an oop (or NULL) at the address described by obj. ++// If val == noreg this means store a NULL ++static void do_oop_store(InterpreterMacroAssembler* _masm, ++ Address dst, ++ Register val, ++ DecoratorSet decorators = 0) { ++ assert(val == noreg || val == V0, "parameter is just for looks"); ++ __ store_heap_oop(dst, val, T4, T1, decorators); ++} ++ ++static void do_oop_load(InterpreterMacroAssembler* _masm, ++ Address src, ++ Register dst, ++ DecoratorSet decorators = 0) { ++ __ load_heap_oop(dst, src, T4, T1, decorators); ++} ++ ++// bytecode folding ++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, ++ Register tmp_reg, bool load_bc_into_bc_reg/*=true*/, ++ int byte_no) { ++ if (!RewriteBytecodes) return; ++ Label L_patch_done; ++ ++ switch (bc) { ++ case Bytecodes::_fast_aputfield: ++ case Bytecodes::_fast_bputfield: ++ case Bytecodes::_fast_zputfield: ++ case Bytecodes::_fast_cputfield: ++ case Bytecodes::_fast_dputfield: ++ case Bytecodes::_fast_fputfield: ++ case Bytecodes::_fast_iputfield: ++ case Bytecodes::_fast_lputfield: ++ case Bytecodes::_fast_sputfield: ++ { ++ // We skip bytecode quickening for putfield instructions when ++ // the put_code written to the constant pool cache is zero. ++ // This is required so that every execution of this instruction ++ // calls out to InterpreterRuntime::resolve_get_put to do ++ // additional, required work. ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ assert(load_bc_into_bc_reg, "we use bc_reg as temp"); ++ __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1); ++ __ addi_d(bc_reg, R0, bc); ++ __ beq(tmp_reg, R0, L_patch_done); ++ } ++ break; ++ default: ++ assert(byte_no == -1, "sanity"); ++ // the pair bytecodes have already done the load. ++ if (load_bc_into_bc_reg) { ++ __ li(bc_reg, bc); ++ } ++ } ++ ++ if (JvmtiExport::can_post_breakpoint()) { ++ Label L_fast_patch; ++ // if a breakpoint is present we can't rewrite the stream directly ++ __ ld_bu(tmp_reg, at_bcp(0)); ++ __ li(AT, Bytecodes::_breakpoint); ++ __ bne(tmp_reg, AT, L_fast_patch); ++ ++ __ get_method(tmp_reg); ++ // Let breakpoint table handling rewrite to quicker bytecode ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg); ++ ++ __ b(L_patch_done); ++ __ bind(L_fast_patch); ++ } ++ ++#ifdef ASSERT ++ Label L_okay; ++ __ ld_bu(tmp_reg, at_bcp(0)); ++ __ li(AT, (int)Bytecodes::java_code(bc)); ++ __ beq(tmp_reg, AT, L_okay); ++ __ beq(tmp_reg, bc_reg, L_patch_done); ++ __ stop("patching the wrong bytecode"); ++ __ bind(L_okay); ++#endif ++ ++ // patch bytecode ++ __ st_b(bc_reg, at_bcp(0)); ++ __ bind(L_patch_done); ++} ++ ++ ++// Individual instructions ++ ++void TemplateTable::nop() { ++ transition(vtos, vtos); ++ // nothing to do ++} ++ ++void TemplateTable::shouldnotreachhere() { ++ transition(vtos, vtos); ++ __ stop("shouldnotreachhere bytecode"); ++} ++ ++void TemplateTable::aconst_null() { ++ transition(vtos, atos); ++ __ move(FSR, R0); ++} ++ ++void TemplateTable::iconst(int value) { ++ transition(vtos, itos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ li(FSR, value); ++ } ++} ++ ++void TemplateTable::lconst(int value) { ++ transition(vtos, ltos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ li(FSR, value); ++ } ++} ++ ++void TemplateTable::fconst(int value) { ++ transition(vtos, ftos); ++ switch( value ) { ++ case 0: __ movgr2fr_w(FSF, R0); return; ++ case 1: __ addi_d(AT, R0, 1); break; ++ case 2: __ addi_d(AT, R0, 2); break; ++ default: ShouldNotReachHere(); ++ } ++ __ movgr2fr_w(FSF, AT); ++ __ ffint_s_w(FSF, FSF); ++} ++ ++void TemplateTable::dconst(int value) { ++ transition(vtos, dtos); ++ switch( value ) { ++ case 0: __ movgr2fr_d(FSF, R0); ++ return; ++ case 1: __ addi_d(AT, R0, 1); ++ __ movgr2fr_d(FSF, AT); ++ __ ffint_d_w(FSF, FSF); ++ break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::bipush() { ++ transition(vtos, itos); ++ __ ld_b(FSR, at_bcp(1)); ++} ++ ++void TemplateTable::sipush() { ++ transition(vtos, itos); ++ __ ld_b(FSR, BCP, 1); ++ __ ld_bu(AT, BCP, 2); ++ __ slli_d(FSR, FSR, 8); ++ __ orr(FSR, FSR, AT); ++} ++ ++// T1 : tags ++// T2 : index ++// T3 : cpool ++// T8 : tag ++void TemplateTable::ldc(bool wide) { ++ transition(vtos, vtos); ++ Label call_ldc, notFloat, notClass, notInt, Done; ++ // get index in cpool ++ if (wide) { ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ } else { ++ __ ld_bu(T2, at_bcp(1)); ++ } ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type ++ __ add_d(AT, T1, T2); ++ __ ld_b(T1, AT, tags_offset); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ } ++ //now T1 is the tag ++ ++ // unresolved class - get the resolved class ++ __ addi_d(AT, T1, - JVM_CONSTANT_UnresolvedClass); ++ __ beq(AT, R0, call_ldc); ++ ++ // unresolved class in error (resolution failed) - call into runtime ++ // so that the same error from first resolution attempt is thrown. ++ __ addi_d(AT, T1, -JVM_CONSTANT_UnresolvedClassInError); ++ __ beq(AT, R0, call_ldc); ++ ++ // resolved class - need to call vm to get java mirror of the class ++ __ addi_d(AT, T1, - JVM_CONSTANT_Class); ++ __ slli_d(T2, T2, Address::times_8); ++ __ bne(AT, R0, notClass); ++ ++ __ bind(call_ldc); ++ __ li(A1, wide); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1); ++ //__ push(atos); ++ __ addi_d(SP, SP, - Interpreter::stackElementSize); ++ __ st_d(FSR, SP, 0); ++ __ b(Done); ++ ++ __ bind(notClass); ++ __ addi_d(AT, T1, -JVM_CONSTANT_Float); ++ __ bne(AT, R0, notFloat); ++ // ftos ++ __ add_d(AT, T3, T2); ++ __ fld_s(FSF, AT, base_offset); ++ //__ push_f(); ++ __ addi_d(SP, SP, - Interpreter::stackElementSize); ++ __ fst_s(FSF, SP, 0); ++ __ b(Done); ++ ++ __ bind(notFloat); ++ __ addi_d(AT, T1, -JVM_CONSTANT_Integer); ++ __ bne(AT, R0, notInt); ++ // itos ++ __ add_d(T0, T3, T2); ++ __ ld_w(FSR, T0, base_offset); ++ __ push(itos); ++ __ b(Done); ++ ++ // assume the tag is for condy; if not, the VM runtime will tell us ++ __ bind(notInt); ++ condy_helper(Done); ++ ++ __ bind(Done); ++} ++ ++void TemplateTable::condy_helper(Label& Done) { ++ const Register obj = FSR; ++ const Register off = SSR; ++ const Register flags = T3; ++ const Register rarg = A1; ++ __ li(rarg, (int)bytecode()); ++ __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg); ++ __ get_vm_result_2(flags, TREG); ++ // VMr = obj = base address to find primitive value to push ++ // VMr2 = flags = (tos, off) using format of CPCE::_flags ++ __ li(AT, ConstantPoolCacheEntry::field_index_mask); ++ __ andr(off, flags, AT); ++ __ add_d(obj, off, obj); ++ const Address field(obj, 0 * wordSize); ++ ++ // What sort of thing are we loading? ++ __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ ++ switch (bytecode()) { ++ case Bytecodes::_ldc: ++ case Bytecodes::_ldc_w: ++ { ++ // tos in (itos, ftos, stos, btos, ctos, ztos) ++ Label notInt, notFloat, notShort, notByte, notChar, notBool; ++ __ addi_d(AT, flags, -itos); ++ __ bne(AT, R0, notInt); ++ // itos ++ __ ld_d(obj, field); ++ __ push(itos); ++ __ b(Done); ++ ++ __ bind(notInt); ++ __ addi_d(AT, flags, -ftos); ++ __ bne(AT, R0, notFloat); ++ // ftos ++ __ fld_s(FSF, field); ++ __ push(ftos); ++ __ b(Done); ++ ++ __ bind(notFloat); ++ __ addi_d(AT, flags, -stos); ++ __ bne(AT, R0, notShort); ++ // stos ++ __ ld_h(obj, field); ++ __ push(stos); ++ __ b(Done); ++ ++ __ bind(notShort); ++ __ addi_d(AT, flags, -btos); ++ __ bne(AT, R0, notByte); ++ // btos ++ __ ld_b(obj, field); ++ __ push(btos); ++ __ b(Done); ++ ++ __ bind(notByte); ++ __ addi_d(AT, flags, -ctos); ++ __ bne(AT, R0, notChar); ++ // ctos ++ __ ld_hu(obj, field); ++ __ push(ctos); ++ __ b(Done); ++ ++ __ bind(notChar); ++ __ addi_d(AT, flags, -ztos); ++ __ bne(AT, R0, notBool); ++ // ztos ++ __ ld_bu(obj, field); ++ __ push(ztos); ++ __ b(Done); ++ ++ __ bind(notBool); ++ break; ++ } ++ ++ case Bytecodes::_ldc2_w: ++ { ++ Label notLong, notDouble; ++ __ addi_d(AT, flags, -ltos); ++ __ bne(AT, R0, notLong); ++ // ltos ++ __ ld_d(obj, field); ++ __ push(ltos); ++ __ b(Done); ++ ++ __ bind(notLong); ++ __ addi_d(AT, flags, -dtos); ++ __ bne(AT, R0, notDouble); ++ // dtos ++ __ fld_d(FSF, field); ++ __ push(dtos); ++ __ b(Done); ++ ++ __ bind(notDouble); ++ break; ++ } ++ ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ __ stop("bad ldc/condy"); ++} ++ ++// Fast path for caching oop constants. ++void TemplateTable::fast_aldc(bool wide) { ++ transition(vtos, atos); ++ ++ Register result = FSR; ++ Register tmp = SSR; ++ Register rarg = A1; ++ int index_size = wide ? sizeof(u2) : sizeof(u1); ++ ++ Label resolved; ++ ++ // We are resolved if the resolved reference cache entry contains a ++ // non-null object (String, MethodType, etc.) ++ assert_different_registers(result, tmp); ++ __ get_cache_index_at_bcp(tmp, 1, index_size); ++ __ load_resolved_reference_at_index(result, tmp, T4); ++ __ bne(result, R0, resolved); ++ ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); ++ // first time invocation - must resolve first ++ int i = (int)bytecode(); ++ __ li(rarg, i); ++ __ call_VM(result, entry, rarg); ++ ++ __ bind(resolved); ++ ++ { // Check for the null sentinel. ++ // If we just called the VM, it already did the mapping for us, ++ // but it's harmless to retry. ++ Label notNull; ++ __ li(rarg, (long)Universe::the_null_sentinel_addr()); ++ __ ld_ptr(tmp, Address(rarg)); ++ __ bne(tmp, result, notNull); ++ __ xorr(result, result, result); // NULL object reference ++ __ bind(notNull); ++ } ++ ++ if (VerifyOops) { ++ __ verify_oop(result); ++ } ++} ++ ++// used register: T2, T3, T1 ++// T2 : index ++// T3 : cpool ++// T1 : tag ++void TemplateTable::ldc2_w() { ++ transition(vtos, vtos); ++ Label notDouble, notLong, Done; ++ ++ // get index in cpool ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type in T1 ++ __ add_d(AT, T1, T2); ++ __ ld_b(T1, AT, tags_offset); ++ ++ __ addi_d(AT, T1, -JVM_CONSTANT_Double); ++ __ bne(AT, R0, notDouble); ++ ++ // dtos ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ fld_d(FSF, AT, base_offset); ++ __ push(dtos); ++ __ b(Done); ++ ++ __ bind(notDouble); ++ __ addi_d(AT, T1, -JVM_CONSTANT_Long); ++ __ bne(AT, R0, notLong); ++ ++ // ltos ++ __ slli_d(T2, T2, Address::times_8); ++ __ add_d(AT, T3, T2); ++ __ ld_d(FSR, AT, base_offset); ++ __ push(ltos); ++ __ b(Done); ++ ++ __ bind(notLong); ++ condy_helper(Done); ++ ++ __ bind(Done); ++} ++ ++// we compute the actual local variable address here ++void TemplateTable::locals_index(Register reg, int offset) { ++ __ ld_bu(reg, at_bcp(offset)); ++ __ slli_d(reg, reg, Address::times_8); ++ __ sub_d(reg, LVP, reg); ++} ++ ++void TemplateTable::iload() { ++ iload_internal(); ++} ++ ++void TemplateTable::nofast_iload() { ++ iload_internal(may_not_rewrite); ++} ++ ++// this method will do bytecode folding of the two form: ++// iload iload iload caload ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::iload_internal(RewriteControl rc) { ++ transition(vtos, itos); ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); ++ // if _iload, wait to rewrite to iload2. We only want to rewrite the ++ // last two iloads in a pair. Comparing against fast_iload means that ++ // the next bytecode is neither an iload or a caload, and therefore ++ // an iload pair. ++ __ li(AT, Bytecodes::_iload); ++ __ beq(AT, T2, done); ++ ++ __ li(T3, Bytecodes::_fast_iload2); ++ __ li(AT, Bytecodes::_fast_iload); ++ __ beq(AT, T2, rewrite); ++ ++ // if _caload, rewrite to fast_icaload ++ __ li(T3, Bytecodes::_fast_icaload); ++ __ li(AT, Bytecodes::_caload); ++ __ beq(AT, T2, rewrite); ++ ++ // rewrite so iload doesn't check again. ++ __ li(T3, Bytecodes::_fast_iload); ++ ++ // rewrite ++ // T3 : fast bytecode ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_iload, T3, T2, false); ++ __ bind(done); ++ } ++ ++ // Get the local value into tos ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload2() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++ __ push(itos); ++ locals_index(T2, 3); ++ __ ld_w(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::lload() { ++ transition(vtos, ltos); ++ locals_index(T2); ++ __ ld_d(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fload() { ++ transition(vtos, ftos); ++ locals_index(T2); ++ __ fld_s(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::dload() { ++ transition(vtos, dtos); ++ locals_index(T2); ++ __ fld_d(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::aload() { ++ transition(vtos, atos); ++ locals_index(T2); ++ __ ld_d(FSR, T2, 0); ++} ++ ++void TemplateTable::locals_index_wide(Register reg) { ++ __ get_unsigned_2_byte_index_at_bcp(reg, 2); ++ __ slli_d(reg, reg, Address::times_8); ++ __ sub_d(reg, LVP, reg); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_iload() { ++ transition(vtos, itos); ++ locals_index_wide(T2); ++ __ ld_d(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_lload() { ++ transition(vtos, ltos); ++ locals_index_wide(T2); ++ __ ld_d(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_fload() { ++ transition(vtos, ftos); ++ locals_index_wide(T2); ++ __ fld_s(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_dload() { ++ transition(vtos, dtos); ++ locals_index_wide(T2); ++ __ fld_d(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_aload() { ++ transition(vtos, atos); ++ locals_index_wide(T2); ++ __ ld_d(FSR, T2, 0); ++} ++ ++// we use A2 as the regiser for index, BE CAREFUL! ++// we dont use our tge 29 now, for later optimization ++void TemplateTable::index_check(Register array, Register index) { ++ // Pop ptr into array ++ __ pop_ptr(array); ++ index_check_without_pop(array, index); ++} ++ ++void TemplateTable::index_check_without_pop(Register array, Register index) { ++ // destroys A2 ++ // check array ++ __ null_check(array, arrayOopDesc::length_offset_in_bytes()); ++ ++ // sign extend since tos (index) might contain garbage in upper bits ++ __ slli_w(index, index, 0); ++ ++ // check index ++ Label ok; ++ __ ld_w(AT, array, arrayOopDesc::length_offset_in_bytes()); ++ __ bltu(index, AT, ok); ++ ++ //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2 ++ if (A1 != array) __ move(A1, array); ++ if (A2 != index) __ move(A2, index); ++ __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); ++ __ bind(ok); ++} ++ ++void TemplateTable::iaload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, 1); ++ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg); ++} ++ ++void TemplateTable::laload() { ++ transition(itos, ltos); ++ index_check(SSR, FSR); ++ __ alsl_d(T4, FSR, SSR, Address::times_8 - 1); ++ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T4, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg); ++} ++ ++void TemplateTable::faload() { ++ transition(itos, ftos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, Address::times_4 - 1); ++ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg); ++} ++ ++void TemplateTable::daload() { ++ transition(itos, dtos); ++ index_check(SSR, FSR); ++ __ alsl_d(T4, FSR, SSR, 2); ++ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T4, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg); ++} ++ ++void TemplateTable::aaload() { ++ transition(itos, atos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, (UseCompressedOops ? Address::times_4 : Address::times_8) - 1); ++ //add for compressedoops ++ do_oop_load(_masm, ++ Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), ++ FSR, ++ IS_ARRAY); ++} ++ ++void TemplateTable::baload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ add_d(FSR, SSR, FSR); ++ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg); ++} ++ ++void TemplateTable::caload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); ++} ++ ++// iload followed by caload frequent pair ++// used register : T2 ++// T2 : index ++void TemplateTable::fast_icaload() { ++ transition(vtos, itos); ++ // load index out of locals ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, 0); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); ++} ++ ++void TemplateTable::saload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1); ++ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg); ++} ++ ++void TemplateTable::iload(int n) { ++ transition(vtos, itos); ++ __ ld_w(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lload(int n) { ++ transition(vtos, ltos); ++ __ ld_d(FSR, laddress(n)); ++} ++ ++void TemplateTable::fload(int n) { ++ transition(vtos, ftos); ++ __ fld_s(FSF, faddress(n)); ++} ++ ++void TemplateTable::dload(int n) { ++ transition(vtos, dtos); ++ __ fld_d(FSF, laddress(n)); ++} ++ ++void TemplateTable::aload(int n) { ++ transition(vtos, atos); ++ __ ld_d(FSR, aaddress(n)); ++} ++ ++void TemplateTable::aload_0() { ++ aload_0_internal(); ++} ++ ++void TemplateTable::nofast_aload_0() { ++ aload_0_internal(may_not_rewrite); ++} ++ ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::aload_0_internal(RewriteControl rc) { ++ transition(vtos, atos); ++ // According to bytecode histograms, the pairs: ++ // ++ // _aload_0, _fast_igetfield ++ // _aload_0, _fast_agetfield ++ // _aload_0, _fast_fgetfield ++ // ++ // occur frequently. If RewriteFrequentPairs is set, the (slow) ++ // _aload_0 bytecode checks if the next bytecode is either ++ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then ++ // rewrites the current bytecode into a pair bytecode; otherwise it ++ // rewrites the current bytecode into _fast_aload_0 that doesn't do ++ // the pair check anymore. ++ // ++ // Note: If the next bytecode is _getfield, the rewrite must be ++ // delayed, otherwise we may miss an opportunity for a pair. ++ // ++ // Also rewrite frequent pairs ++ // aload_0, aload_1 ++ // aload_0, iload_1 ++ // These bytecodes with a small amount of code are most profitable ++ // to rewrite ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); ++ ++ // do actual aload_0 ++ aload(0); ++ ++ // if _getfield then wait with rewrite ++ __ li(AT, Bytecodes::_getfield); ++ __ beq(AT, T2, done); ++ ++ // if _igetfield then reqrite to _fast_iaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_iaccess_0); ++ __ li(AT, Bytecodes::_fast_igetfield); ++ __ beq(AT, T2, rewrite); ++ ++ // if _agetfield then reqrite to _fast_aaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_aaccess_0); ++ __ li(AT, Bytecodes::_fast_agetfield); ++ __ beq(AT, T2, rewrite); ++ ++ // if _fgetfield then reqrite to _fast_faccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_faccess_0); ++ __ li(AT, Bytecodes::_fast_fgetfield); ++ __ beq(AT, T2, rewrite); ++ ++ // else rewrite to _fast_aload0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_aload_0); ++ ++ // rewrite ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_aload_0, T3, T2, false); ++ ++ __ bind(done); ++ } else { ++ aload(0); ++ } ++} ++ ++void TemplateTable::istore() { ++ transition(itos, vtos); ++ locals_index(T2); ++ __ st_w(FSR, T2, 0); ++} ++ ++void TemplateTable::lstore() { ++ transition(ltos, vtos); ++ locals_index(T2); ++ __ st_d(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::fstore() { ++ transition(ftos, vtos); ++ locals_index(T2); ++ __ fst_s(FSF, T2, 0); ++} ++ ++void TemplateTable::dstore() { ++ transition(dtos, vtos); ++ locals_index(T2); ++ __ fst_d(FSF, T2, -wordSize); ++} ++ ++void TemplateTable::astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index(T2); ++ __ st_d(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_istore() { ++ transition(vtos, vtos); ++ __ pop_i(FSR); ++ locals_index_wide(T2); ++ __ st_d(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_lstore() { ++ transition(vtos, vtos); ++ __ pop_l(FSR); ++ locals_index_wide(T2); ++ __ st_d(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::wide_fstore() { ++ wide_istore(); ++} ++ ++void TemplateTable::wide_dstore() { ++ wide_lstore(); ++} ++ ++void TemplateTable::wide_astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index_wide(T2); ++ __ st_d(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::iastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); // T2: array SSR: index ++ index_check(T2, SSR); // prefer index in SSR ++ __ alsl_d(T2, SSR, T2, Address::times_4 - 1); ++ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg); ++} ++ ++// used register T2, T3 ++void TemplateTable::lastore() { ++ transition(ltos, vtos); ++ __ pop_i (T2); ++ index_check(T3, T2); ++ __ alsl_d(T3, T2, T3, Address::times_8 - 1); ++ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg); ++} ++ ++// used register T2 ++void TemplateTable::fastore() { ++ transition(ftos, vtos); ++ __ pop_i(SSR); ++ index_check(T2, SSR); ++ __ alsl_d(T2, SSR, T2, Address::times_4 - 1); ++ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg); ++} ++ ++// used register T2, T3 ++void TemplateTable::dastore() { ++ transition(dtos, vtos); ++ __ pop_i (T2); ++ index_check(T3, T2); ++ __ alsl_d(T3, T2, T3, Address::times_8 - 1); ++ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg); ++} ++ ++// used register : T2, T3, T8 ++// T2 : array ++// T3 : subklass ++// T8 : supklass ++void TemplateTable::aastore() { ++ Label is_null, ok_is_subtype, done; ++ transition(vtos, vtos); ++ // stack: ..., array, index, value ++ __ ld_d(FSR, at_tos()); // Value ++ __ ld_w(SSR, at_tos_p1()); // Index ++ __ ld_d(T2, at_tos_p2()); // Array ++ ++ // index_check(T2, SSR); ++ index_check_without_pop(T2, SSR); ++ // do array store check - check for NULL value first ++ __ beq(FSR, R0, is_null); ++ ++ // Move subklass into T3 ++ //add for compressedoops ++ __ load_klass(T3, FSR); ++ // Move superklass into T8 ++ //add for compressedoops ++ __ load_klass(T8, T2); ++ __ ld_d(T8, Address(T8, ObjArrayKlass::element_klass_offset())); ++ // Compress array+index*4+12 into a single register. T2 ++ __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1); ++ __ addi_d(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ ++ // Generate subtype check. ++ // Superklass in T8. Subklass in T3. ++ __ gen_subtype_check(T8, T3, ok_is_subtype); ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ArrayStoreException_entry); ++ // Come here on success ++ __ bind(ok_is_subtype); ++ do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY); ++ __ b(done); ++ ++ // Have a NULL in FSR, T2=array, SSR=index. Store NULL at ary[idx] ++ __ bind(is_null); ++ __ profile_null_seen(T4); ++ __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1); ++ do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY); ++ ++ __ bind(done); ++ __ addi_d(SP, SP, 3 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::bastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ index_check(T2, SSR); ++ ++ // Need to check whether array is boolean or byte ++ // since both types share the bastore bytecode. ++ __ load_klass(T4, T2); ++ __ ld_w(T4, T4, in_bytes(Klass::layout_helper_offset())); ++ ++ int diffbit = Klass::layout_helper_boolean_diffbit(); ++ __ li(AT, diffbit); ++ ++ Label L_skip; ++ __ andr(AT, T4, AT); ++ __ beq(AT, R0, L_skip); ++ __ andi(FSR, FSR, 0x1); ++ __ bind(L_skip); ++ ++ __ add_d(SSR, T2, SSR); ++ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg); ++} ++ ++void TemplateTable::castore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ index_check(T2, SSR); ++ __ alsl_d(SSR, SSR, T2, Address::times_2 - 1); ++ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg); ++} ++ ++void TemplateTable::sastore() { ++ castore(); ++} ++ ++void TemplateTable::istore(int n) { ++ transition(itos, vtos); ++ __ st_w(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lstore(int n) { ++ transition(ltos, vtos); ++ __ st_d(FSR, laddress(n)); ++} ++ ++void TemplateTable::fstore(int n) { ++ transition(ftos, vtos); ++ __ fst_s(FSF, faddress(n)); ++} ++ ++void TemplateTable::dstore(int n) { ++ transition(dtos, vtos); ++ __ fst_d(FSF, laddress(n)); ++} ++ ++void TemplateTable::astore(int n) { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ __ st_d(FSR, aaddress(n)); ++} ++ ++void TemplateTable::pop() { ++ transition(vtos, vtos); ++ __ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void TemplateTable::pop2() { ++ transition(vtos, vtos); ++ __ addi_d(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::dup() { ++ transition(vtos, vtos); ++ // stack: ..., a ++ __ load_ptr(0, FSR); ++ __ push_ptr(FSR); ++ // stack: ..., a, a ++} ++ ++// blows FSR ++void TemplateTable::dup_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(0, FSR); // load b ++ __ load_ptr(1, A5); // load a ++ __ store_ptr(1, FSR); // store b ++ __ store_ptr(0, A5); // store a ++ __ push_ptr(FSR); // push b ++ // stack: ..., b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, FSR); // load c ++ __ load_ptr(2, A5); // load a ++ __ store_ptr(2, FSR); // store c in a ++ __ push_ptr(FSR); // push c ++ // stack: ..., c, b, c, c ++ __ load_ptr(2, FSR); // load b ++ __ store_ptr(2, A5); // store a in b ++ // stack: ..., c, a, c, c ++ __ store_ptr(1, FSR); // store b in c ++ // stack: ..., c, a, b, c ++} ++ ++// blows FSR ++void TemplateTable::dup2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(1, FSR); // load a ++ __ push_ptr(FSR); // push a ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ // stack: ..., a, b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup2_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, T2); // load c ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ __ push_ptr(T2); // push c ++ // stack: ..., a, b, c, b, c ++ __ store_ptr(3, T2); // store c in b ++ // stack: ..., a, c, c, b, c ++ __ load_ptr(4, T2); // load a ++ __ store_ptr(2, T2); // store a in 2nd c ++ // stack: ..., a, c, a, b, c ++ __ store_ptr(4, FSR); // store b in a ++ // stack: ..., b, c, a, b, c ++ ++ // stack: ..., b, c, a, b, c ++} ++ ++// blows FSR, SSR ++void TemplateTable::dup2_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c, d ++ // stack: ..., a, b, c, d ++ __ load_ptr(0, T2); // load d ++ __ load_ptr(1, FSR); // load c ++ __ push_ptr(FSR); // push c ++ __ push_ptr(T2); // push d ++ // stack: ..., a, b, c, d, c, d ++ __ load_ptr(4, FSR); // load b ++ __ store_ptr(2, FSR); // store b in d ++ __ store_ptr(4, T2); // store d in b ++ // stack: ..., a, d, c, b, c, d ++ __ load_ptr(5, T2); // load a ++ __ load_ptr(3, FSR); // load c ++ __ store_ptr(3, T2); // store a in c ++ __ store_ptr(5, FSR); // store c in a ++ // stack: ..., c, d, a, b, c, d ++ ++ // stack: ..., c, d, a, b, c, d ++} ++ ++// blows FSR ++void TemplateTable::swap() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ ++ __ load_ptr(1, A5); // load a ++ __ load_ptr(0, FSR); // load b ++ __ store_ptr(0, A5); // store a in b ++ __ store_ptr(1, FSR); // store b in a ++ ++ // stack: ..., b, a ++} ++ ++void TemplateTable::iop2(Operation op) { ++ transition(itos, itos); ++ ++ __ pop_i(SSR); ++ switch (op) { ++ case add : __ add_w(FSR, SSR, FSR); break; ++ case sub : __ sub_w(FSR, SSR, FSR); break; ++ case mul : __ mul_w(FSR, SSR, FSR); break; ++ case _and : __ andr(FSR, SSR, FSR); break; ++ case _or : __ orr(FSR, SSR, FSR); break; ++ case _xor : __ xorr(FSR, SSR, FSR); break; ++ case shl : __ sll_w(FSR, SSR, FSR); break; ++ case shr : __ sra_w(FSR, SSR, FSR); break; ++ case ushr : __ srl_w(FSR, SSR, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// the result stored in FSR, SSR, ++// used registers : T2, T3 ++void TemplateTable::lop2(Operation op) { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ ++ switch (op) { ++ case add : __ add_d(FSR, T2, FSR); break; ++ case sub : __ sub_d(FSR, T2, FSR); break; ++ case _and: __ andr(FSR, T2, FSR); break; ++ case _or : __ orr(FSR, T2, FSR); break; ++ case _xor: __ xorr(FSR, T2, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, ++// the result is 0x80000000 ++// the godson2 cpu do the same, so we need not handle this specially like x86 ++void TemplateTable::idiv() { ++ transition(itos, itos); ++ Label not_zero; ++ ++ __ bne(FSR, R0, not_zero); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ bind(not_zero); ++ ++ __ pop_i(SSR); ++ __ div_w(FSR, SSR, FSR); ++} ++ ++void TemplateTable::irem() { ++ transition(itos, itos); ++ Label not_zero; ++ __ pop_i(SSR); ++ ++ __ bne(FSR, R0, not_zero); ++ //__ brk(7); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ ++ __ bind(not_zero); ++ __ mod_w(FSR, SSR, FSR); ++} ++ ++void TemplateTable::lmul() { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ __ mul_d(FSR, T2, FSR); ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::ldiv() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ ++ //__ brk(7); //generate FPE ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ ++ __ bind(normal); ++ __ pop_l(A2); ++ __ div_d(FSR, A2, FSR); ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::lrem() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ ++ __ bind(normal); ++ __ pop_l (A2); ++ ++ __ mod_d(FSR, A2, FSR); ++} ++ ++// result in FSR ++// used registers : T0 ++void TemplateTable::lshl() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ sll_d(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lshr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ sra_d(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lushr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ srl_d(FSR, T0, FSR); ++} ++ ++// result in FSF ++void TemplateTable::fop2(Operation op) { ++ transition(ftos, ftos); ++ switch (op) { ++ case add: ++ __ fld_s(fscratch, at_sp()); ++ __ fadd_s(FSF, fscratch, FSF); ++ break; ++ case sub: ++ __ fld_s(fscratch, at_sp()); ++ __ fsub_s(FSF, fscratch, FSF); ++ break; ++ case mul: ++ __ fld_s(fscratch, at_sp()); ++ __ fmul_s(FSF, fscratch, FSF); ++ break; ++ case div: ++ __ fld_s(fscratch, at_sp()); ++ __ fdiv_s(FSF, fscratch, FSF); ++ break; ++ case rem: ++ __ fmov_s(FA1, FSF); ++ __ fld_s(FA0, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ addi_d(SP, SP, 1 * wordSize); ++} ++ ++// result in SSF||FSF ++// i dont handle the strict flags ++void TemplateTable::dop2(Operation op) { ++ transition(dtos, dtos); ++ switch (op) { ++ case add: ++ __ fld_d(fscratch, at_sp()); ++ __ fadd_d(FSF, fscratch, FSF); ++ break; ++ case sub: ++ __ fld_d(fscratch, at_sp()); ++ __ fsub_d(FSF, fscratch, FSF); ++ break; ++ case mul: ++ __ fld_d(fscratch, at_sp()); ++ __ fmul_d(FSF, fscratch, FSF); ++ break; ++ case div: ++ __ fld_d(fscratch, at_sp()); ++ __ fdiv_d(FSF, fscratch, FSF); ++ break; ++ case rem: ++ __ fmov_d(FA1, FSF); ++ __ fld_d(FA0, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ addi_d(SP, SP, 2 * wordSize); ++} ++ ++void TemplateTable::ineg() { ++ transition(itos, itos); ++ __ sub_w(FSR, R0, FSR); ++} ++ ++void TemplateTable::lneg() { ++ transition(ltos, ltos); ++ __ sub_d(FSR, R0, FSR); ++} ++ ++void TemplateTable::fneg() { ++ transition(ftos, ftos); ++ __ fneg_s(FSF, FSF); ++} ++ ++void TemplateTable::dneg() { ++ transition(dtos, dtos); ++ __ fneg_d(FSF, FSF); ++} ++ ++// used registers : T2 ++void TemplateTable::iinc() { ++ transition(vtos, vtos); ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++ __ ld_b(AT, at_bcp(2)); // get constant ++ __ add_d(FSR, FSR, AT); ++ __ st_w(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::wide_iinc() { ++ transition(vtos, vtos); ++ locals_index_wide(T2); ++ __ get_2_byte_integer_at_bcp(FSR, AT, 4); ++ __ hswap(FSR); ++ __ ld_w(AT, T2, 0); ++ __ add_d(FSR, AT, FSR); ++ __ st_w(FSR, T2, 0); ++} ++ ++void TemplateTable::convert() { ++ // Checking ++#ifdef ASSERT ++ { ++ TosState tos_in = ilgl; ++ TosState tos_out = ilgl; ++ switch (bytecode()) { ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_in = itos; break; ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_l2d: tos_in = ltos; break; ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_f2d: tos_in = ftos; break; ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_d2l: // fall through ++ case Bytecodes::_d2f: tos_in = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ switch (bytecode()) { ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_out = itos; break; ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_d2l: tos_out = ltos; break; ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_d2f: tos_out = ftos; break; ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_l2d: // fall through ++ case Bytecodes::_f2d: tos_out = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ transition(tos_in, tos_out); ++ } ++#endif // ASSERT ++ // Conversion ++ switch (bytecode()) { ++ case Bytecodes::_i2l: ++ __ slli_w(FSR, FSR, 0); ++ break; ++ case Bytecodes::_i2f: ++ __ movgr2fr_w(FSF, FSR); ++ __ ffint_s_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2d: ++ __ movgr2fr_w(FSF, FSR); ++ __ ffint_d_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2b: ++ __ ext_w_b(FSR, FSR); ++ break; ++ case Bytecodes::_i2c: ++ __ bstrpick_d(FSR, FSR, 15, 0); // truncate upper 56 bits ++ break; ++ case Bytecodes::_i2s: ++ __ ext_w_h(FSR, FSR); ++ break; ++ case Bytecodes::_l2i: ++ __ slli_w(FSR, FSR, 0); ++ break; ++ case Bytecodes::_l2f: ++ __ movgr2fr_d(FSF, FSR); ++ __ ffint_s_l(FSF, FSF); ++ break; ++ case Bytecodes::_l2d: ++ __ movgr2fr_d(FSF, FSR); ++ __ ffint_d_l(FSF, FSF); ++ break; ++ case Bytecodes::_f2i: ++ __ ftintrz_w_s(fscratch, FSF); ++ __ movfr2gr_s(FSR, fscratch); ++ break; ++ case Bytecodes::_f2l: ++ __ ftintrz_l_s(fscratch, FSF); ++ __ movfr2gr_d(FSR, fscratch); ++ break; ++ case Bytecodes::_f2d: ++ __ fcvt_d_s(FSF, FSF); ++ break; ++ case Bytecodes::_d2i: ++ __ ftintrz_w_d(fscratch, FSF); ++ __ movfr2gr_s(FSR, fscratch); ++ break; ++ case Bytecodes::_d2l: ++ __ ftintrz_l_d(fscratch, FSF); ++ __ movfr2gr_d(FSR, fscratch); ++ break; ++ case Bytecodes::_d2f: ++ __ fcvt_s_d(FSF, FSF); ++ break; ++ default : ++ ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::lcmp() { ++ transition(ltos, itos); ++ ++ __ pop(T0); ++ __ pop(R0); ++ ++ __ slt(AT, T0, FSR); ++ __ slt(FSR, FSR, T0); ++ __ sub_d(FSR, FSR, AT); ++} ++ ++void TemplateTable::float_cmp(bool is_float, int unordered_result) { ++ if (is_float) { ++ __ fld_s(fscratch, at_sp()); ++ __ addi_d(SP, SP, 1 * wordSize); ++ ++ if (unordered_result < 0) { ++ __ fcmp_clt_s(FCC0, FSF, fscratch); ++ __ fcmp_cult_s(FCC1, fscratch, FSF); ++ } else { ++ __ fcmp_cult_s(FCC0, FSF, fscratch); ++ __ fcmp_clt_s(FCC1, fscratch, FSF); ++ } ++ } else { ++ __ fld_d(fscratch, at_sp()); ++ __ addi_d(SP, SP, 2 * wordSize); ++ ++ if (unordered_result < 0) { ++ __ fcmp_clt_d(FCC0, FSF, fscratch); ++ __ fcmp_cult_d(FCC1, fscratch, FSF); ++ } else { ++ __ fcmp_cult_d(FCC0, FSF, fscratch); ++ __ fcmp_clt_d(FCC1, fscratch, FSF); ++ } ++ } ++ ++ __ movcf2gr(FSR, FCC0); ++ __ movcf2gr(AT, FCC1); ++ __ sub_d(FSR, FSR, AT); ++} ++ ++// used registers : T3, A7, Rnext ++// FSR : return bci, this is defined by the vm specification ++// T2 : MDO taken count ++// T3 : method ++// A7 : offset ++// Rnext : next bytecode, this is required by dispatch_base ++void TemplateTable::branch(bool is_jsr, bool is_wide) { ++ __ get_method(T3); ++ __ profile_taken_branch(A7, T2); // only C2 meaningful ++ ++ const ByteSize be_offset = MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset(); ++ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset(); ++ ++ // Load up T4 with the branch displacement ++ if (!is_wide) { ++ __ ld_b(A7, BCP, 1); ++ __ ld_bu(AT, BCP, 2); ++ __ slli_d(A7, A7, 8); ++ __ orr(A7, A7, AT); ++ } else { ++ __ get_4_byte_integer_at_bcp(A7, 1); ++ __ swap(A7); ++ } ++ ++ // Handle all the JSR stuff here, then exit. ++ // It's much shorter and cleaner than intermingling with the non-JSR ++ // normal-branch stuff occuring below. ++ if (is_jsr) { ++ // Pre-load the next target bytecode into Rnext ++ __ ldx_bu(Rnext, BCP, A7); ++ ++ // compute return address as bci in FSR ++ __ addi_d(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset())); ++ __ ld_d(AT, T3, in_bytes(Method::const_offset())); ++ __ sub_d(FSR, FSR, AT); ++ // Adjust the bcp in BCP by the displacement in A7 ++ __ add_d(BCP, BCP, A7); ++ // jsr returns atos that is not an oop ++ // Push return address ++ __ push_i(FSR); ++ // jsr returns vtos ++ __ dispatch_only_noverify(vtos); ++ ++ return; ++ } ++ ++ // Normal (non-jsr) branch handling ++ ++ // Adjust the bcp in S0 by the displacement in T4 ++ __ add_d(BCP, BCP, A7); ++ ++ assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); ++ Label backedge_counter_overflow; ++ Label profile_method; ++ Label dispatch; ++ if (UseLoopCounter) { ++ // increment backedge counter for backward branches ++ // T3: method ++ // T4: target offset ++ // BCP: target bcp ++ // LVP: locals pointer ++ __ blt(R0, A7, dispatch); // check if forward or backward branch ++ ++ // check if MethodCounters exists ++ Label has_counters; ++ __ ld_d(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ bne(AT, R0, has_counters); ++ __ push2(T3, A7); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), ++ T3); ++ __ pop2(T3, A7); ++ __ ld_d(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ beq(AT, R0, dispatch); ++ __ bind(has_counters); ++ ++ if (TieredCompilation) { ++ Label no_mdo; ++ int increment = InvocationCounter::count_increment; ++ int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld_d(T0, Address(T3, in_bytes(Method::method_data_offset()))); ++ __ beq(T0, R0, no_mdo); ++ // Increment the MDO backedge counter ++ const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, ++ T1, false, Assembler::zero, &backedge_counter_overflow); ++ __ beq(R0, R0, dispatch); ++ } ++ __ bind(no_mdo); ++ // Increment backedge counter in MethodCounters* ++ __ ld_d(T0, Address(T3, Method::method_counters_offset())); ++ __ increment_mask_and_jump(Address(T0, be_offset), increment, mask, ++ T1, false, Assembler::zero, &backedge_counter_overflow); ++ if (!UseOnStackReplacement) { ++ __ bind(backedge_counter_overflow); ++ } ++ } else { ++ // increment back edge counter ++ __ ld_d(T1, T3, in_bytes(Method::method_counters_offset())); ++ __ ld_w(T0, T1, in_bytes(be_offset)); ++ __ increment(T0, InvocationCounter::count_increment); ++ __ st_w(T0, T1, in_bytes(be_offset)); ++ ++ // load invocation counter ++ __ ld_w(T1, T1, in_bytes(inv_offset)); ++ // buffer bit added, mask no needed ++ ++ // dadd backedge counter & invocation counter ++ __ add_d(T1, T1, T0); ++ ++ if (ProfileInterpreter) { ++ // Test to see if we should create a method data oop ++ // T1 : backedge counter & invocation counter ++ if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) { ++ __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit); ++ __ bne(AT, R0, dispatch); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); ++ __ ld_w(AT, AT, 0); ++ __ blt(T1, AT, dispatch); ++ } ++ ++ // if no method data exists, go to profile method ++ __ test_method_data_pointer(T1, profile_method); ++ ++ if (UseOnStackReplacement) { ++ if (Assembler::is_simm(InvocationCounter::InterpreterBackwardBranchLimit, 12)) { ++ __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit); ++ __ bne(AT, R0, dispatch); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); ++ __ ld_w(AT, AT, 0); ++ __ blt(T2, AT, dispatch); ++ } ++ ++ // When ProfileInterpreter is on, the backedge_count comes ++ // from the methodDataOop, which value does not get reset on ++ // the call to frequency_counter_overflow(). ++ // To avoid excessive calls to the overflow routine while ++ // the method is being compiled, dadd a second test to make ++ // sure the overflow function is called only once every ++ // overflow_frequency. ++ const int overflow_frequency = 1024; ++ __ andi(AT, T2, overflow_frequency-1); ++ __ beq(AT, R0, backedge_counter_overflow); ++ } ++ } else { ++ if (UseOnStackReplacement) { ++ // check for overflow against AT, which is the sum of the counters ++ __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); ++ __ ld_w(AT, AT, 0); ++ __ bge(T1, AT, backedge_counter_overflow); ++ } ++ } ++ } ++ __ bind(dispatch); ++ } ++ ++ // Pre-load the next target bytecode into Rnext ++ __ ld_bu(Rnext, BCP, 0); ++ ++ // continue with the bytecode @ target ++ // FSR: return bci for jsr's, unused otherwise ++ // Rnext: target bytecode ++ // BCP: target bcp ++ __ dispatch_only(vtos, true); ++ ++ if (UseLoopCounter) { ++ if (ProfileInterpreter) { ++ // Out-of-line code to allocate method data oop. ++ __ bind(profile_method); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ __ b(dispatch); ++ } ++ ++ if (UseOnStackReplacement) { ++ // invocation counter overflow ++ __ bind(backedge_counter_overflow); ++ __ sub_d(A7, BCP, A7); // branch bcp ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), A7); ++ ++ // V0: osr nmethod (osr ok) or NULL (osr not possible) ++ // V1: osr adapter frame return address ++ // LVP: locals pointer ++ // BCP: bcp ++ __ beq(V0, R0, dispatch); ++ // nmethod may have been invalidated (VM may block upon call_VM return) ++ __ ld_b(T3, V0, nmethod::state_offset()); ++ __ li(AT, nmethod::in_use); ++ __ bne(AT, T3, dispatch); ++ ++ // We have the address of an on stack replacement routine in rax. ++ // In preparation of invoking it, first we must migrate the locals ++ // and monitors from off the interpreter frame on the stack. ++ // Ensure to save the osr nmethod over the migration call, ++ // it will be preserved in Rnext. ++ __ move(Rnext, V0); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); ++ ++ // V0 is OSR buffer, move it to expected parameter location ++ // refer to osrBufferPointer in c1_LIRAssembler_loongarch.cpp ++ __ move(T0, V0); ++ ++ // pop the interpreter frame ++ __ ld_d(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ // remove frame anchor ++ __ leave(); ++ __ move(LVP, RA); ++ __ move(SP, A7); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP , SP , AT); ++ ++ // push the (possibly adjusted) return address ++ // refer to osr_entry in c1_LIRAssembler_loongarch.cpp ++ __ ld_d(AT, Rnext, nmethod::osr_entry_point_offset()); ++ __ jr(AT); ++ } ++ } ++} ++ ++void TemplateTable::if_0cmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ case less: ++ __ bge(FSR, R0, not_taken); ++ break; ++ case less_equal: ++ __ blt(R0, FSR, not_taken); ++ break; ++ case greater: ++ __ bge(R0, FSR, not_taken); ++ break; ++ case greater_equal: ++ __ blt(FSR, R0, not_taken); ++ break; ++ } ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_icmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ ++ __ pop_i(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ case less: ++ __ bge(SSR, FSR, not_taken); ++ break; ++ case less_equal: ++ __ blt(FSR, SSR, not_taken); ++ break; ++ case greater: ++ __ bge(FSR, SSR, not_taken); ++ break; ++ case greater_equal: ++ __ blt(SSR, FSR, not_taken); ++ break; ++ } ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_nullcmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++ ++void TemplateTable::if_acmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ // __ ld_w(SSR, SP, 0); ++ __ pop_ptr(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::ret() { ++ transition(vtos, vtos); ++ ++ locals_index(T2); ++ __ ld_d(T2, T2, 0); ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld_d(BCP, T1, in_bytes(Method::const_offset())); ++ __ add_d(BCP, BCP, T2); ++ __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos, 0, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::wide_ret() { ++ transition(vtos, vtos); ++ ++ locals_index_wide(T2); ++ __ ld_d(T2, T2, 0); // get return bci, compute return bcp ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld_d(BCP, T1, in_bytes(Method::const_offset())); ++ __ add_d(BCP, BCP, T2); ++ __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos, 0, true); ++} ++ ++// used register T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : low ++// A7 : high ++// Rnext : dest bytecode, required by dispatch_base ++void TemplateTable::tableswitch() { ++ Label default_case, continue_execution; ++ transition(itos, vtos); ++ ++ // align BCP ++ __ addi_d(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // load lo & hi ++ __ ld_w(T3, T2, 1 * BytesPerInt); ++ __ swap(T3); ++ __ ld_w(A7, T2, 2 * BytesPerInt); ++ __ swap(A7); ++ ++ // check against lo & hi ++ __ blt(FSR, T3, default_case); ++ __ blt(A7, FSR, default_case); ++ ++ // lookup dispatch offset, in A7 big endian ++ __ sub_d(FSR, FSR, T3); ++ __ alsl_d(AT, FSR, T2, Address::times_4 - 1); ++ __ ld_w(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(FSR, T4, T3); ++ ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ add_d(BCP, BCP, A7); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++ ++ // handle default ++ __ bind(default_case); ++ __ profile_switch_default(FSR); ++ __ ld_w(A7, T2, 0); ++ __ b(continue_execution); ++} ++ ++void TemplateTable::lookupswitch() { ++ transition(itos, itos); ++ __ stop("lookupswitch bytecode should have been rewritten"); ++} ++ ++// used registers : T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : pair index ++// A7 : offset ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_linearswitch() { ++ transition(itos, vtos); ++ Label loop_entry, loop, found, continue_execution; ++ ++ // swap FSR so we can avoid swapping the table entries ++ __ swap(FSR); ++ ++ // align BCP ++ __ addi_d(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // set counter ++ __ ld_w(T3, T2, BytesPerInt); ++ __ swap(T3); ++ __ b(loop_entry); ++ ++ // table search ++ __ bind(loop); ++ // get the entry value ++ __ alsl_d(AT, T3, T2, Address::times_8 - 1); ++ __ ld_w(AT, AT, 2 * BytesPerInt); ++ ++ // found? ++ __ beq(FSR, AT, found); ++ ++ __ bind(loop_entry); ++ Label L1; ++ __ bge(R0, T3, L1); ++ __ addi_d(T3, T3, -1); ++ __ b(loop); ++ __ bind(L1); ++ __ addi_d(T3, T3, -1); ++ ++ // default case ++ __ profile_switch_default(FSR); ++ __ ld_w(A7, T2, 0); ++ __ b(continue_execution); ++ ++ // entry found -> get offset ++ __ bind(found); ++ __ alsl_d(AT, T3, T2, Address::times_8 - 1); ++ __ ld_w(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(T3, FSR, T2); ++ ++ // continue execution ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ add_d(BCP, BCP, A7); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++} ++ ++// used registers : T0, T1, T2, T3, A7, Rnext ++// T2 : pairs address(array) ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_binaryswitch() { ++ transition(itos, vtos); ++ // Implementation using the following core algorithm: ++ // ++ // int binary_search(int key, LookupswitchPair* array, int n) { ++ // // Binary search according to "Methodik des Programmierens" by ++ // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. ++ // int i = 0; ++ // int j = n; ++ // while (i+1 < j) { ++ // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) ++ // // with Q: for all i: 0 <= i < n: key < a[i] ++ // // where a stands for the array and assuming that the (inexisting) ++ // // element a[n] is infinitely big. ++ // int h = (i + j) >> 1; ++ // // i < h < j ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // } ++ // // R: a[i] <= key < a[i+1] or Q ++ // // (i.e., if key is within array, i is the correct index) ++ // return i; ++ // } ++ ++ // register allocation ++ const Register array = T2; ++ const Register i = T3, j = A7; ++ const Register h = T1; ++ const Register temp = T0; ++ const Register key = FSR; ++ ++ // setup array ++ __ addi_d(array, BCP, 3*BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(array, array, AT); ++ ++ // initialize i & j ++ __ move(i, R0); ++ __ ld_w(j, array, - 1 * BytesPerInt); ++ // Convert j into native byteordering ++ __ swap(j); ++ ++ // and start ++ Label entry; ++ __ b(entry); ++ ++ // binary search loop ++ { ++ Label loop; ++ __ bind(loop); ++ // int h = (i + j) >> 1; ++ __ add_d(h, i, j); ++ __ srli_d(h, h, 1); ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // Convert array[h].match to native byte-ordering before compare ++ __ alsl_d(AT, h, array, Address::times_8 - 1); ++ __ ld_w(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ ++ __ slt(AT, key, temp); ++ __ maskeqz(i, i, AT); ++ __ masknez(temp, h, AT); ++ __ OR(i, i, temp); ++ __ masknez(j, j, AT); ++ __ maskeqz(temp, h, AT); ++ __ OR(j, j, temp); ++ ++ // while (i+1 < j) ++ __ bind(entry); ++ __ addi_d(h, i, 1); ++ __ blt(h, j, loop); ++ } ++ ++ // end of binary search, result index is i (must check again!) ++ Label default_case; ++ // Convert array[i].match to native byte-ordering before compare ++ __ alsl_d(AT, i, array, Address::times_8 - 1); ++ __ ld_w(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ __ bne(key, temp, default_case); ++ ++ // entry found -> j = offset ++ __ alsl_d(AT, i, array, Address::times_8 - 1); ++ __ ld_w(j, AT, 1 * BytesPerInt); ++ __ profile_switch_case(i, key, array); ++ __ swap(j); ++ ++ __ add_d(BCP, BCP, j); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++ ++ // default case -> j = default offset ++ __ bind(default_case); ++ __ profile_switch_default(i); ++ __ ld_w(j, array, - 2 * BytesPerInt); ++ __ swap(j); ++ __ add_d(BCP, BCP, j); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++} ++ ++void TemplateTable::_return(TosState state) { ++ transition(state, state); ++ assert(_desc->calls_vm(), ++ "inconsistent calls_vm information"); // call in remove_activation ++ ++ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { ++ assert(state == vtos, "only valid state"); ++ __ ld_d(T1, aaddress(0)); ++ __ load_klass(LVP, T1); ++ __ ld_w(LVP, LVP, in_bytes(Klass::access_flags_offset())); ++ __ li(AT, JVM_ACC_HAS_FINALIZER); ++ __ andr(AT, AT, LVP); ++ Label skip_register_finalizer; ++ __ beq(AT, R0, skip_register_finalizer); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::register_finalizer), T1); ++ __ bind(skip_register_finalizer); ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) { ++ Label no_safepoint; ++ NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll")); ++ __ ld_b(AT, thread, in_bytes(Thread::polling_page_offset())); ++ __ andi(AT, AT, SafepointMechanism::poll_bit()); ++ __ beq(AT, R0, no_safepoint); ++ __ push(state); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::at_safepoint)); ++ __ pop(state); ++ __ bind(no_safepoint); ++ } ++ ++ // Narrow result if state is itos but result type is smaller. ++ // Need to narrow in the return bytecode rather than in generate_return_entry ++ // since compiled code callers expect the result to already be narrowed. ++ if (state == itos) { ++ __ narrow(FSR); ++ } ++ ++ __ remove_activation(state, T4); ++ __ membar(__ StoreStore); ++ ++ __ jr(T4); ++} ++ ++// we dont shift left 2 bits in get_cache_and_index_at_bcp ++// for we always need shift the index we use it. the ConstantPoolCacheEntry ++// is 16-byte long, index is the index in ++// ConstantPoolCache, so cache + base_offset() + index * 16 is ++// the corresponding ConstantPoolCacheEntry ++// used registers : T2 ++// NOTE : the returned index need also shift left 4 to get the address! ++void TemplateTable::resolve_cache_and_index(int byte_no, ++ Register Rcache, ++ Register index, ++ size_t index_size) { ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ const Register temp = A1; ++ assert_different_registers(Rcache, index); ++ ++ Label resolved; ++ ++ Bytecodes::Code code = bytecode(); ++ switch (code) { ++ case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; ++ case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; ++ default: break; ++ } ++ ++ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); ++ // is resolved? ++ int i = (int)code; ++ __ addi_d(temp, temp, -i); ++ __ beq(temp, R0, resolved); ++ ++ // resolve first time through ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); ++ ++ __ li(temp, i); ++ __ call_VM(NOREG, entry, temp); ++ ++ // Update registers with resolved info ++ __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); ++ __ bind(resolved); ++} ++//END: LA ++ ++// The Rcache and index registers must be set before call ++void TemplateTable::load_field_cp_cache_entry(Register obj, ++ Register cache, ++ Register index, ++ Register off, ++ Register flags, ++ bool is_static = false) { ++ assert_different_registers(cache, index, flags, off); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ // Field offset ++ __ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ __ ld_d(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())); ++ // Flags ++ __ ld_d(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())); ++ ++ // klass overwrite register ++ if (is_static) { ++ __ ld_d(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ __ ld_d(obj, Address(obj, mirror_offset)); ++ ++ __ resolve_oop_handle(obj, T4); ++ } ++} ++ ++// get the method, itable_index and flags of the current invoke ++void TemplateTable::load_invoke_cp_cache_entry(int byte_no, ++ Register method, ++ Register itable_index, ++ Register flags, ++ bool is_invokevirtual, ++ bool is_invokevfinal, /*unused*/ ++ bool is_invokedynamic) { ++ // setup registers ++ const Register cache = T3; ++ const Register index = T1; ++ assert_different_registers(method, flags); ++ assert_different_registers(method, cache, index); ++ assert_different_registers(itable_index, flags); ++ assert_different_registers(itable_index, cache, index); ++ assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant"); ++ // determine constant pool cache field offsets ++ const int method_offset = in_bytes( ++ ConstantPoolCache::base_offset() + ++ ((byte_no == f2_byte) ++ ? ConstantPoolCacheEntry::f2_offset() ++ : ConstantPoolCacheEntry::f1_offset())); ++ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()); ++ // access constant pool cache fields ++ const int index_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::f2_offset()); ++ ++ size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2)); ++ resolve_cache_and_index(byte_no, cache, index, index_size); ++ ++ __ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ __ ld_d(method, AT, method_offset); ++ ++ if (itable_index != NOREG) { ++ __ ld_d(itable_index, AT, index_offset); ++ } ++ __ ld_d(flags, AT, flags_offset); ++} ++ ++// The registers cache and index expected to be set before call. ++// Correct values of the cache and index registers are preserved. ++void TemplateTable::jvmti_post_field_access(Register cache, Register index, ++ bool is_static, bool has_tos) { ++ // do the JVMTI work here to avoid disturbing the register state below ++ // We use c_rarg registers here because we want to use the register used in ++ // the call to the VM ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we ++ // take the time to call into the VM. ++ Label L1; ++ // kill FSR ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ assert_different_registers(cache, index, AT); ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ ld_w(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp3, 1); ++ ++ // cache entry pointer ++ __ addi_d(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset())); ++ __ alsl_d(tmp2, tmp3, tmp2, LogBytesPerWord - 1); ++ ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ __ ld_d(tmp1, SP, 0); ++ __ verify_oop(tmp1); ++ } ++ // tmp1: object pointer or NULL ++ // tmp2: cache entry pointer ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_access), ++ tmp1, tmp2); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++void TemplateTable::pop_and_check_object(Register r) { ++ __ pop_ptr(r); ++ __ null_check(r); // for field access must check obj. ++ __ verify_oop(r); ++} ++ ++// used registers : T1, T2, T3, T1 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T1 : field address ++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the ++// following mapping to the TosState states: ++// btos: 0 ++// ctos: 1 ++// stos: 2 ++// itos: 3 ++// ltos: 4 ++// ftos: 5 ++// dtos: 6 ++// atos: 7 ++// vtos: 8 ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_access(cache, index, is_static, false); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ { ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(MacroAssembler::AnyAny); ++ __ bind(notVolatile); ++ } ++ ++ if (!is_static) pop_and_check_object(obj); ++ __ add_d(index, obj, off); ++ ++ const Address field(index, 0); ++ ++ Label Done, notByte, notBool, notInt, notShort, notChar, ++ notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ ++ // btos ++ __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(btos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ ++ __ bind(notByte); ++ __ li(AT, ztos); ++ __ bne(flags, AT, notBool); ++ ++ // ztos ++ __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(ztos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ ++ __ bind(notBool); ++ __ li(AT, itos); ++ __ bne(flags, AT, notInt); ++ ++ // itos ++ __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(itos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_igetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notInt); ++ __ li(AT, atos); ++ __ bne(flags, AT, notObj); ++ ++ // atos ++ //add for compressedoops ++ do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP); ++ __ push(atos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_agetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notObj); ++ __ li(AT, ctos); ++ __ bne(flags, AT, notChar); ++ ++ // ctos ++ __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(ctos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notChar); ++ __ li(AT, stos); ++ __ bne(flags, AT, notShort); ++ ++ // stos ++ __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(stos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notShort); ++ __ li(AT, ltos); ++ __ bne(flags, AT, notLong); ++ ++ // ltos ++ __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg); ++ __ push(ltos); ++ ++ // Don't rewrite to _fast_lgetfield for potential volatile case. ++ __ b(Done); ++ ++ __ bind(notLong); ++ __ li(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ ++ // ftos ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); ++ __ push(ftos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notFloat); ++ __ li(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++#endif ++ ++ // dtos ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg); ++ __ push(dtos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++void TemplateTable::getfield(int byte_no) { ++ getfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_getfield(int byte_no) { ++ getfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::getstatic(int byte_no) { ++ getfield_or_static(byte_no, true); ++} ++ ++// The registers cache and index expected to be set before call. ++// The function may destroy various registers, just not the cache and index registers. ++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { ++ transition(vtos, vtos); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L1; ++ //kill AT, T1, T2, T3, T4 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T4; ++ assert_different_registers(cache, index, tmp4); ++ ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ ld_w(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp4, 1); ++ ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ // Life is harder. The stack holds the value on top, followed by ++ // the object. We don't know the size of the value, though; it ++ // could be one or two words depending on its type. As a result, ++ // we must find the type to determine where the object is. ++ Label two_word, valsize_known; ++ __ alsl_d(AT, tmp4, tmp2, Address::times_8 - 1); ++ __ ld_d(tmp3, AT, in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::flags_offset())); ++ __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift); ++ ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ __ move(tmp1, SP); ++ __ li(AT, ltos); ++ __ beq(tmp3, AT, two_word); ++ __ li(AT, dtos); ++ __ beq(tmp3, AT, two_word); ++ __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) ); ++ __ b(valsize_known); ++ ++ __ bind(two_word); ++ __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2)); ++ ++ __ bind(valsize_known); ++ // setup object pointer ++ __ ld_d(tmp1, tmp1, 0 * wordSize); ++ } ++ // cache entry pointer ++ __ addi_d(tmp2, tmp2, in_bytes(cp_base_offset)); ++ __ alsl_d(tmp2, tmp4, tmp2, LogBytesPerWord - 1); ++ // object (tos) ++ __ move(tmp3, SP); ++ // tmp1: object pointer set up above (NULL if static) ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++// used registers : T0, T1, T2, T3, T8 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T8 : volatile bit ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ const Register bc = T3; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_mod(cache, index, is_static); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ Label Done; ++ { ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++ ++ ++ Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ ++ // btos ++ __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ ++ __ pop(btos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_BYTE, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ztos ++ __ bind(notByte); ++ __ li(AT, ztos); ++ __ bne(flags, AT, notBool); ++ ++ __ pop(ztos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ andi(FSR, FSR, 0x1); ++ __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // itos ++ __ bind(notBool); ++ __ li(AT, itos); ++ __ bne(flags, AT, notInt); ++ ++ __ pop(itos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_INT, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // atos ++ __ bind(notInt); ++ __ li(AT, atos); ++ __ bne(flags, AT, notObj); ++ ++ __ pop(atos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ ++ do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ctos ++ __ bind(notObj); ++ __ li(AT, ctos); ++ __ bne(flags, AT, notChar); ++ ++ __ pop(ctos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_CHAR, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // stos ++ __ bind(notChar); ++ __ li(AT, stos); ++ __ bne(flags, AT, notShort); ++ ++ __ pop(stos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_SHORT, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ltos ++ __ bind(notShort); ++ __ li(AT, ltos); ++ __ bne(flags, AT, notLong); ++ ++ __ pop(ltos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_LONG, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ftos ++ __ bind(notLong); ++ __ li(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ ++ __ pop(ftos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_FLOAT, IN_HEAP, Address(T4), noreg, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ ++ // dtos ++ __ bind(notFloat); ++ __ li(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++#endif ++ ++ __ pop(dtos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_DOUBLE, IN_HEAP, Address(T4), noreg, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++void TemplateTable::putfield(int byte_no) { ++ putfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_putfield(int byte_no) { ++ putfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::putstatic(int byte_no) { ++ putfield_or_static(byte_no, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : cp_entry ++// T2 : obj ++// T3 : value pointer ++void TemplateTable::jvmti_post_fast_field_mod() { ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L2; ++ //kill AT, T1, T2, T3, T4 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T4; ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ ld_w(tmp3, AT, 0); ++ __ beq(tmp3, R0, L2); ++ __ pop_ptr(tmp1); ++ __ verify_oop(tmp1); ++ __ push_ptr(tmp1); ++ switch (bytecode()) { // load values into the jvalue object ++ case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ push_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ push_d(FSF); break; ++ case Bytecodes::_fast_fputfield: __ push_f(); break; ++ case Bytecodes::_fast_lputfield: __ push_l(FSR); break; ++ default: ShouldNotReachHere(); ++ } ++ __ move(tmp3, SP); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1); ++ __ verify_oop(tmp1); ++ // tmp1: object pointer copied above ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ ++ switch (bytecode()) { // restore tos values ++ case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ pop_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ pop_d(); break; ++ case Bytecodes::_fast_fputfield: __ pop_f(); break; ++ case Bytecodes::_fast_lputfield: __ pop_l(FSR); break; ++ } ++ __ bind(L2); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T2 : index & off & field address ++// T3 : cache & obj ++// T1 : flags ++void TemplateTable::fast_storefield(TosState state) { ++ transition(state, vtos); ++ ++ const Register scratch = T8; ++ ++ ByteSize base = ConstantPoolCache::base_offset(); ++ ++ jvmti_post_fast_field_mod(); ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ membar(__ LoadLoad); ++ ++ // test for volatile with T1 ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ ld_d(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset())); ++ ++ // replace index with field offset from cache entry ++ __ ld_d(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset())); ++ ++ Label Done; ++ { ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, T1); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++ ++ // Get object from stack ++ pop_and_check_object(T3); ++ ++ if (bytecode() != Bytecodes::_fast_aputfield) { ++ // field address ++ __ add_d(T2, T3, T2); ++ } ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_zputfield: ++ __ andi(FSR, FSR, 0x1); // boolean is true if LSB is 1 ++ __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_bputfield: ++ __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_sputfield: ++ __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_cputfield: ++ __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_iputfield: ++ __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_lputfield: ++ __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_fputfield: ++ __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_dputfield: ++ __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_aputfield: ++ do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T3 : cp_entry & cache ++// T2 : index & offset ++void TemplateTable::fast_accessfield(TosState state) { ++ transition(atos, state); ++ ++ const Register scratch = T8; ++ ++ // do the JVMTI work here to avoid disturbing the register state below ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we take ++ // the time to call into the VM. ++ Label L1; ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ ld_w(T3, AT, 0); ++ __ beq(T3, R0, L1); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(T3, T1, 1); ++ __ move(TSR, FSR); ++ __ verify_oop(FSR); ++ // FSR: object pointer copied above ++ // T3: cache entry pointer ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), ++ FSR, T3); ++ __ move(FSR, TSR); ++ __ bind(L1); ++ } ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ membar(__ LoadLoad); ++ ++ // replace index with field offset from cache entry ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(MacroAssembler::AnyAny); ++ __ bind(notVolatile); ++ } ++ ++ // FSR: object ++ __ verify_oop(FSR); ++ __ null_check(FSR); ++ // field addresses ++ __ add_d(FSR, FSR, T2); ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_bgetfield: ++ __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_sgetfield: ++ __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_cgetfield: ++ __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_igetfield: ++ __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_lgetfield: ++ __ stop("should not be rewritten"); ++ break; ++ case Bytecodes::_fast_fgetfield: ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_dgetfield: ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_agetfield: ++ do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP); ++ __ verify_oop(FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0 ++// used registers : T1, T2, T3, T1 ++// T1 : obj & field address ++// T2 : off ++// T3 : cache ++// T1 : index ++void TemplateTable::fast_xaccess(TosState state) { ++ transition(vtos, state); ++ ++ const Register scratch = T8; ++ ++ // get receiver ++ __ ld_d(T1, aaddress(0)); ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 2); ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(MacroAssembler::AnyAny); ++ __ bind(notVolatile); ++ } ++ ++ // make sure exception is reported in correct bcp range (getfield is ++ // next instruction) ++ __ addi_d(BCP, BCP, 1); ++ __ null_check(T1); ++ __ add_d(T1, T1, T2); ++ ++ if (state == itos) { ++ __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg); ++ } else if (state == atos) { ++ do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP); ++ __ verify_oop(FSR); ++ } else if (state == ftos) { ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg); ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ addi_d(BCP, BCP, -1); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++ ++//----------------------------------------------------------------------------- ++// Calls ++ ++void TemplateTable::count_calls(Register method, Register temp) { ++ // implemented elsewhere ++ ShouldNotReachHere(); ++} ++ ++// method, index, recv, flags: T1, T2, T3, T1 ++// byte_no = 2 for _invokevirtual, 1 else ++// T0 : return address ++// get the method & index of the invoke, and push the return address of ++// the invoke(first word in the frame) ++// this address is where the return code jmp to. ++// NOTE : this method will set T3&T1 as recv&flags ++void TemplateTable::prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index, // itable index, MethodType, etc. ++ Register recv, // if caller wants to see it ++ Register flags // if caller wants to test it ++ ) { ++ // determine flags ++ const Bytecodes::Code code = bytecode(); ++ const bool is_invokeinterface = code == Bytecodes::_invokeinterface; ++ const bool is_invokedynamic = code == Bytecodes::_invokedynamic; ++ const bool is_invokehandle = code == Bytecodes::_invokehandle; ++ const bool is_invokevirtual = code == Bytecodes::_invokevirtual; ++ const bool is_invokespecial = code == Bytecodes::_invokespecial; ++ const bool load_receiver = (recv != noreg); ++ const bool save_flags = (flags != noreg); ++ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),""); ++ assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); ++ assert(flags == noreg || flags == T1, "error flags reg."); ++ assert(recv == noreg || recv == T3, "error recv reg."); ++ ++ // setup registers & access constant pool cache ++ if(recv == noreg) recv = T3; ++ if(flags == noreg) flags = T1; ++ assert_different_registers(method, index, recv, flags); ++ ++ // save 'interpreter return address' ++ __ save_bcp(); ++ ++ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); ++ ++ if (is_invokedynamic || is_invokehandle) { ++ Label L_no_push; ++ __ li(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift)); ++ __ andr(AT, AT, flags); ++ __ beq(AT, R0, L_no_push); ++ // Push the appendix as a trailing parameter. ++ // This must be done before we get the receiver, ++ // since the parameter_size includes it. ++ Register tmp = SSR; ++ __ push(tmp); ++ __ move(tmp, index); ++ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); ++ __ load_resolved_reference_at_index(index, tmp, recv); ++ __ pop(tmp); ++ __ push(index); // push appendix (MethodType, CallSite, etc.) ++ __ bind(L_no_push); ++ } ++ ++ // load receiver if needed (after appendix is pushed so parameter size is correct) ++ // Note: no return address pushed yet ++ if (load_receiver) { ++ __ li(AT, ConstantPoolCacheEntry::parameter_size_mask); ++ __ andr(recv, flags, AT); ++ // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0. ++ const int no_return_pc_pushed_yet = 0; // argument slot correction before we push return address ++ const int receiver_is_at_end = -1; // back off one slot to get receiver ++ Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); ++ __ ld_d(recv, recv_addr); ++ __ verify_oop(recv); ++ } ++ if(save_flags) { ++ __ move(BCP, flags); ++ } ++ ++ // compute return type ++ __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, 0xf); ++ ++ // Make sure we don't need to mask flags for tos_state_shift after the above shift ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ // load return address ++ { ++ const address table = (address) Interpreter::invoke_return_entry_table_for(code); ++ __ li(AT, (long)table); ++ __ alsl_d(AT, flags, AT, LogBytesPerWord - 1); ++ __ ld_d(RA, AT, 0); ++ } ++ ++ if (save_flags) { ++ __ move(flags, BCP); ++ __ restore_bcp(); ++ } ++} ++ ++// used registers : T0, T3, T1, T2 ++// T3 : recv, this two register using convention is by prepare_invoke ++// T1 : flags, klass ++// Rmethod : method, index must be Rmethod ++void TemplateTable::invokevirtual_helper(Register index, ++ Register recv, ++ Register flags) { ++ ++ assert_different_registers(index, recv, flags, T2); ++ ++ // Test for an invoke of a final method ++ Label notFinal; ++ __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, flags, AT); ++ __ beq(AT, R0, notFinal); ++ ++ Register method = index; // method must be Rmethod ++ assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention"); ++ ++ // do the call - the index is actually the method to call ++ // the index is indeed methodOop, for this is vfinal, ++ // see ConstantPoolCacheEntry::set_method for more info ++ ++ // It's final, need a null check here! ++ __ null_check(recv); ++ ++ // profile this call ++ __ profile_final_call(T2); ++ ++ // T2: tmp, used for mdp ++ // method: callee ++ // T4: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T2, method, T4, true); ++ ++ __ jump_from_interpreted(method, T2); ++ ++ __ bind(notFinal); ++ ++ // get receiver klass ++ __ null_check(recv, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T2, recv); ++ ++ // profile this call ++ __ profile_virtual_call(T2, T0, T1); ++ ++ // get target methodOop & entry point ++ __ lookup_virtual_method(T2, index, method); ++ __ profile_arguments_type(T2, method, T4, true); ++ __ jump_from_interpreted(method, T2); ++} ++ ++void TemplateTable::invokevirtual(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3, T1); ++ // now recv & flags in T3, T1 ++ invokevirtual_helper(Rmethod, T3, T1); ++} ++ ++// T4 : entry ++// Rmethod : method ++void TemplateTable::invokespecial(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3); ++ // now recv & flags in T3, T1 ++ __ verify_oop(T3); ++ __ null_check(T3); ++ __ profile_call(T4); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T4: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T4, false); ++ ++ __ jump_from_interpreted(Rmethod, T4); ++ __ move(T0, T3); ++} ++ ++void TemplateTable::invokestatic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG); ++ ++ __ profile_call(T4); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T4: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T4, false); ++ ++ __ jump_from_interpreted(Rmethod, T4); ++} ++ ++// i have no idea what to do here, now. for future change. FIXME. ++void TemplateTable::fast_invokevfinal(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ __ stop("fast_invokevfinal not used on LoongArch64"); ++} ++ ++// used registers : T0, T1, T2, T3, T1, A7 ++// T0 : itable, vtable, entry ++// T1 : interface ++// T3 : receiver ++// T1 : flags, klass ++// Rmethod : index, method, this is required by interpreter_entry ++void TemplateTable::invokeinterface(int byte_no) { ++ transition(vtos, vtos); ++ //this method will use T1-T4 and T0 ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, T2, Rmethod, T3, T1); ++ // T2: reference klass (from f1) if interface method ++ // Rmethod: method (from f2) ++ // T3: receiver ++ // T1: flags ++ ++ // First check for Object case, then private interface method, ++ // then regular interface method. ++ ++ // Special case of invokeinterface called for virtual method of ++ // java.lang.Object. See cpCache.cpp for details. ++ Label notObjectMethod; ++ __ li(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notObjectMethod); ++ ++ invokevirtual_helper(Rmethod, T3, T1); ++ // no return from above ++ __ bind(notObjectMethod); ++ ++ Label no_such_interface; // for receiver subtype check ++ Register recvKlass; // used for exception processing ++ ++ // Check for private method invocation - indicated by vfinal ++ Label notVFinal; ++ __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notVFinal); ++ ++ // Get receiver klass into FSR - also a null check ++ __ null_check(T3, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(FSR, T3); ++ ++ Label subtype; ++ __ check_klass_subtype(FSR, T2, T0, subtype); ++ // If we get here the typecheck failed ++ recvKlass = T1; ++ __ move(recvKlass, FSR); ++ __ b(no_such_interface); ++ ++ __ bind(subtype); ++ ++ // do the call - rbx is actually the method to call ++ ++ __ profile_final_call(T1); ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ __ jump_from_interpreted(Rmethod, T1); ++ // no return from above ++ __ bind(notVFinal); ++ ++ // Get receiver klass into T1 - also a null check ++ __ restore_locals(); ++ __ null_check(T3, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T1, T3); ++ ++ Label no_such_method; ++ ++ // Preserve method for throw_AbstractMethodErrorVerbose. ++ __ move(T3, Rmethod); ++ // Receiver subtype check against REFC. ++ // Superklass in T2. Subklass in T1. ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ T1, T2, noreg, ++ // outputs: scan temp. reg, scan temp. reg ++ T0, FSR, ++ no_such_interface, ++ /*return_method=*/false); ++ ++ ++ // profile this call ++ __ restore_bcp(); ++ __ profile_virtual_call(T1, T0, FSR); ++ ++ // Get declaring interface class from method, and itable index ++ __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes()); ++ __ ld_w(Rmethod, Rmethod, in_bytes(Method::itable_index_offset())); ++ __ addi_d(Rmethod, Rmethod, (-1) * Method::itable_index_max); ++ __ sub_w(Rmethod, R0, Rmethod); ++ ++ // Preserve recvKlass for throw_AbstractMethodErrorVerbose. ++ __ move(FSR, T1); ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ FSR, T2, Rmethod, ++ // outputs: method, scan temp. reg ++ Rmethod, T0, ++ no_such_interface); ++ ++ // Rmethod: Method* to call ++ // T3: receiver ++ // Check for abstract method error ++ // Note: This should be done more efficiently via a throw_abstract_method_error ++ // interpreter entry point and a conditional jump to it in case of a null ++ // method. ++ __ beq(Rmethod, R0, no_such_method); ++ ++ __ profile_called_method(Rmethod, T0, T1); ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ // do the call ++ // T3: receiver ++ // Rmethod: Method* ++ __ jump_from_interpreted(Rmethod, T1); ++ __ should_not_reach_here(); ++ ++ // exception handling code follows... ++ // note: must restore interpreter registers to canonical ++ // state for exception handling to work correctly! ++ ++ __ bind(no_such_method); ++ // throw exception ++ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) ++ __ restore_bcp(); ++ __ restore_locals(); ++ // Pass arguments for generating a verbose error message. ++ recvKlass = A1; ++ Register method = A2; ++ if (recvKlass != T1) { __ move(recvKlass, T1); } ++ if (method != T3) { __ move(method, T3); } ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ __ bind(no_such_interface); ++ // throw exception ++ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) ++ __ restore_bcp(); ++ __ restore_locals(); ++ // Pass arguments for generating a verbose error message. ++ if (recvKlass != T1) { __ move(recvKlass, T1); } ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++} ++ ++ ++void TemplateTable::invokehandle(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ const Register T2_method = Rmethod; ++ const Register FSR_mtype = FSR; ++ const Register T3_recv = T3; ++ ++ prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv); ++ //??__ verify_method_ptr(T2_method); ++ __ verify_oop(T3_recv); ++ __ null_check(T3_recv); ++ ++ // T4: MethodType object (from cpool->resolved_references[f1], if necessary) ++ // T2_method: MH.invokeExact_MT method (from f2) ++ ++ // Note: T4 is already pushed (if necessary) by prepare_invoke ++ ++ // FIXME: profile the LambdaForm also ++ __ profile_final_call(T4); ++ ++ // T8: tmp, used for mdp ++ // T2_method: callee ++ // T4: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T8, T2_method, T4, true); ++ ++ __ jump_from_interpreted(T2_method, T4); ++} ++ ++ void TemplateTable::invokedynamic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ ++ const Register T2_callsite = T2; ++ ++ prepare_invoke(byte_no, Rmethod, T2_callsite); ++ ++ // T2: CallSite object (from cpool->resolved_references[f1]) ++ // Rmethod: MH.linkToCallSite method (from f2) ++ ++ // Note: T2_callsite is already pushed by prepare_invoke ++ // %%% should make a type profile for any invokedynamic that takes a ref argument ++ // profile this call ++ __ profile_call(T4); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T4: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T4, false); ++ ++ __ verify_oop(T2_callsite); ++ ++ __ jump_from_interpreted(Rmethod, T4); ++ } ++ ++//----------------------------------------------------------------------------- ++// Allocation ++// T1 : tags & buffer end & thread ++// T2 : object end ++// T3 : klass ++// T1 : object size ++// A1 : cpool ++// A2 : cp index ++// return object in FSR ++void TemplateTable::_new() { ++ transition(vtos, atos); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ ++ Label slow_case; ++ Label done; ++ Label initialize_header; ++ Label initialize_object; // including clearing the fields ++ Label allocate_shared; ++ ++ __ get_cpool_and_tags(A1, T1); ++ ++ // make sure the class we're about to instantiate has been resolved. ++ // Note: slow_case does a pop of stack, which is why we loaded class/pushed above ++ const int tags_offset = Array::base_offset_in_bytes(); ++ __ add_d(T1, T1, A2); ++ __ ld_b(AT, T1, tags_offset); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ } ++ __ addi_d(AT, AT, -(int)JVM_CONSTANT_Class); ++ __ bne(AT, R0, slow_case); ++ ++ // get InstanceKlass ++ __ load_resolved_klass_at_index(A1, A2, T3); ++ ++ // make sure klass is initialized & doesn't have finalizer ++ // make sure klass is fully initialized ++ __ ld_hu(T1, T3, in_bytes(InstanceKlass::init_state_offset())); ++ __ addi_d(AT, T1, - (int)InstanceKlass::fully_initialized); ++ __ bne(AT, R0, slow_case); ++ ++ // has_finalizer ++ __ ld_w(T0, T3, in_bytes(Klass::layout_helper_offset()) ); ++ __ andi(AT, T0, Klass::_lh_instance_slow_path_bit); ++ __ bne(AT, R0, slow_case); ++ ++ // Allocate the instance ++ // 1) Try to allocate in the TLAB ++ // 2) if fail and the object is large allocate in the shared Eden ++ // 3) if the above fails (or is not applicable), go to a slow case ++ // (creates a new TLAB, etc.) ++ ++ const bool allow_shared_alloc = ++ Universe::heap()->supports_inline_contig_alloc(); ++ ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ if (UseTLAB || allow_shared_alloc) { ++ __ get_thread(thread); ++ } ++#else ++ const Register thread = TREG; ++#endif ++ ++ if (UseTLAB) { ++ // get tlab_top ++ __ ld_d(FSR, thread, in_bytes(JavaThread::tlab_top_offset())); ++ // get tlab_end ++ __ ld_d(AT, thread, in_bytes(JavaThread::tlab_end_offset())); ++ __ add_d(T2, FSR, T0); ++ __ blt(AT, T2, allow_shared_alloc ? allocate_shared : slow_case); ++ __ st_d(T2, thread, in_bytes(JavaThread::tlab_top_offset())); ++ ++ if (ZeroTLAB) { ++ // the fields have been already cleared ++ __ beq(R0, R0, initialize_header); ++ } else { ++ // initialize both the header and fields ++ __ beq(R0, R0, initialize_object); ++ } ++ } ++ ++ // Allocation in the shared Eden , if allowed ++ // T0 : instance size in words ++ if(allow_shared_alloc){ ++ __ bind(allocate_shared); ++ ++ Label done, retry; ++ Address heap_top(T1); ++ __ li(T1, (long)Universe::heap()->top_addr()); ++ __ ld_d(FSR, heap_top); ++ ++ __ bind(retry); ++ __ li(AT, (long)Universe::heap()->end_addr()); ++ __ ld_d(AT, AT, 0); ++ __ add_d(T2, FSR, T0); ++ __ blt(AT, T2, slow_case); ++ ++ // Compare FSR with the top addr, and if still equal, store the new ++ // top addr in T2 at the address of the top addr pointer. Sets AT if was ++ // equal, and clears it otherwise. Use lock prefix for atomicity on MPs. ++ // ++ // FSR: object begin ++ // T2: object end ++ // T0: instance size in words ++ ++ // if someone beat us on the allocation, try again, otherwise continue ++ __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry); ++ ++ __ bind(done); ++ __ incr_allocated_bytes(thread, T0, 0); ++ } ++ ++ if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { ++ // The object is initialized before the header. If the object size is ++ // zero, go directly to the header initialization. ++ __ bind(initialize_object); ++ __ li(AT, - sizeof(oopDesc)); ++ __ add_d(T0, T0, AT); ++ __ beq(T0, R0, initialize_header); ++ ++ // initialize remaining object fields: T0 is a multiple of 2 ++ { ++ Label loop; ++ __ add_d(T1, FSR, T0); ++ ++ __ bind(loop); ++ __ addi_d(T1, T1, -oopSize); ++ __ st_d(R0, T1, sizeof(oopDesc)); ++ __ bne(T1, FSR, loop); // dont clear header ++ } ++ ++ // klass in T3, ++ // initialize object header only. ++ __ bind(initialize_header); ++ if (UseBiasedLocking) { ++ __ ld_d(AT, T3, in_bytes(Klass::prototype_header_offset())); ++ __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes ()); ++ } else { ++ __ li(AT, (long)markOopDesc::prototype()); ++ __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes()); ++ } ++ ++ __ store_klass_gap(FSR, R0); ++ __ store_klass(FSR, T3); ++ ++ { ++ SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); ++ // Trigger dtrace event for fastpath ++ __ push(atos); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR); ++ __ pop(atos); ++ ++ } ++ __ b(done); ++ } ++ ++ // slow case ++ __ bind(slow_case); ++ __ get_constant_pool(A1); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2); ++ ++ // continue ++ __ bind(done); ++ __ membar(__ StoreStore); ++} ++ ++void TemplateTable::newarray() { ++ transition(itos, atos); ++ __ ld_bu(A1, at_bcp(1)); ++ // type, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR); ++ __ membar(__ StoreStore); ++} ++ ++void TemplateTable::anewarray() { ++ transition(itos, atos); ++ __ get_2_byte_integer_at_bcp(A2, AT, 1); ++ __ huswap(A2); ++ __ get_constant_pool(A1); ++ // cp, index, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR); ++ __ membar(__ StoreStore); ++} ++ ++void TemplateTable::arraylength() { ++ transition(atos, itos); ++ __ null_check(FSR, arrayOopDesc::length_offset_in_bytes()); ++ __ ld_w(FSR, FSR, arrayOopDesc::length_offset_in_bytes()); ++} ++ ++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always) ++// T2 : sub klass ++// T3 : cpool ++// T3 : super klass ++void TemplateTable::checkcast() { ++ transition(atos, atos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ __ beq(FSR, R0, is_null); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ huswap(T2); ++ ++ // See if bytecode has already been quicked ++ __ add_d(AT, T1, T2); ++ __ ld_b(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ } ++ __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ ++ // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded. ++ // Then, GC will move the object in V0 to another places in heap. ++ // Therefore, We should never save such an object in register. ++ // Instead, we should save it in the stack. It can be modified automatically by the GC thread. ++ // After GC, the object address in FSR is changed to a new place. ++ // ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ ++ // klass already in cp, get superklass in T3 ++ __ bind(quicked); ++ __ load_resolved_klass_at_index(T3, T2, T3); ++ ++ __ bind(resolved); ++ ++ // get subklass in T2 ++ __ load_klass(T2, FSR); ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ClassCastException_entry); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ ++ // Collect counts on whether this check-cast sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ b(done); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); ++ } ++ __ bind(done); ++} ++ ++// T3 as cpool, T1 as tags, T2 as index ++// object always in FSR, superklass in T3, subklass in T2 ++void TemplateTable::instanceof() { ++ transition(atos, itos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ ++ __ beq(FSR, R0, is_null); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ // get index ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ huswap(T2); ++ ++ // See if bytecode has already been quicked ++ // quicked ++ __ add_d(AT, T1, T2); ++ __ ld_b(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ } ++ __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ ++ // get superklass in T3, subklass in T2 ++ __ bind(quicked); ++ __ load_resolved_klass_at_index(T3, T2, T3); ++ ++ __ bind(resolved); ++ // get subklass in T2 ++ __ load_klass(T2, FSR); ++ ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ __ move(FSR, R0); ++ // Come here on failure ++ __ b(done); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ __ li(FSR, 1); ++ ++ // Collect counts on whether this test sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ beq(R0, R0, done); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); // same as 'done' ++ } ++ __ bind(done); ++ // FSR = 0: obj == NULL or obj is not an instanceof the specified klass ++ // FSR = 1: obj != NULL and obj is an instanceof the specified klass ++} ++ ++//-------------------------------------------------------- ++//-------------------------------------------- ++// Breakpoints ++void TemplateTable::_breakpoint() { ++ // Note: We get here even if we are single stepping.. ++ // jbug inists on setting breakpoints at every bytecode ++ // even if we are in single step mode. ++ ++ transition(vtos, vtos); ++ ++ // get the unpatched byte code ++ __ get_method(A1); ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::get_original_bytecode_at), ++ A1, BCP); ++ __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal ++ ++ // post the breakpoint event ++ __ get_method(A1); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP); ++ ++ // complete the execution of original bytecode ++ __ dispatch_only_normal(vtos); ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateTable::athrow() { ++ transition(atos, vtos); ++ __ null_check(FSR); ++ __ jmp(Interpreter::throw_exception_entry()); ++} ++ ++//----------------------------------------------------------------------------- ++// Synchronization ++// ++// Note: monitorenter & exit are symmetric routines; which is reflected ++// in the assembly code structure as well ++// ++// Stack layout: ++// ++// [expressions ] <--- SP = expression stack top ++// .. ++// [expressions ] ++// [monitor entry] <--- monitor block top = expression stack bot ++// .. ++// [monitor entry] ++// [frame data ] <--- monitor block bot ++// ... ++// [return addr ] <--- FP ++ ++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer ++// object always in FSR ++void TemplateTable::monitorenter() { ++ transition(atos, vtos); ++ ++ // check for NULL object ++ __ null_check(FSR); ++ ++ const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset ++ * wordSize); ++ const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize); ++ Label allocated; ++ ++ // initialize entry pointer ++ __ move(c_rarg0, R0); ++ ++ // find a free slot in the monitor block (result in c_rarg0) ++ { ++ Label entry, loop, exit, next; ++ __ ld_d(T2, monitor_block_top); ++ __ addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ __ b(entry); ++ ++ // free slot? ++ __ bind(loop); ++ __ ld_d(AT, T2, BasicObjectLock::obj_offset_in_bytes()); ++ __ bne(AT, R0, next); ++ __ move(c_rarg0, T2); ++ ++ __ bind(next); ++ __ beq(FSR, AT, exit); ++ __ addi_d(T2, T2, entry_size); ++ ++ __ bind(entry); ++ __ bne(T3, T2, loop); ++ __ bind(exit); ++ } ++ ++ __ bne(c_rarg0, R0, allocated); ++ ++ // allocate one if there's no free slot ++ { ++ Label entry, loop; ++ // 1. compute new pointers // SP: old expression stack top ++ __ ld_d(c_rarg0, monitor_block_top); ++ __ addi_d(SP, SP, -entry_size); ++ __ addi_d(c_rarg0, c_rarg0, -entry_size); ++ __ st_d(c_rarg0, monitor_block_top); ++ __ move(T3, SP); ++ __ b(entry); ++ ++ // 2. move expression stack contents ++ __ bind(loop); ++ __ ld_d(AT, T3, entry_size); ++ __ st_d(AT, T3, 0); ++ __ addi_d(T3, T3, wordSize); ++ __ bind(entry); ++ __ bne(T3, c_rarg0, loop); ++ } ++ ++ __ bind(allocated); ++ // Increment bcp to point to the next bytecode, ++ // so exception handling for async. exceptions work correctly. ++ // The object has already been poped from the stack, so the ++ // expression stack looks correct. ++ __ addi_d(BCP, BCP, 1); ++ __ st_d(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ lock_object(c_rarg0); ++ // check to make sure this monitor doesn't cause stack overflow after locking ++ __ save_bcp(); // in case of exception ++ __ generate_stack_overflow_check(0); ++ // The bcp has already been incremented. Just need to dispatch to next instruction. ++ ++ __ dispatch_next(vtos); ++} ++ ++// T2 : top ++// c_rarg0 : entry ++void TemplateTable::monitorexit() { ++ transition(atos, vtos); ++ ++ __ null_check(FSR); ++ ++ const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize); ++ Label found; ++ ++ // find matching slot ++ { ++ Label entry, loop; ++ __ ld_d(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ addi_d(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ __ b(entry); ++ ++ __ bind(loop); ++ __ ld_d(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ beq(FSR, AT, found); ++ __ addi_d(c_rarg0, c_rarg0, entry_size); ++ __ bind(entry); ++ __ bne(T2, c_rarg0, loop); ++ } ++ ++ // error handling. Unlocking was not block-structured ++ Label end; ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ // call run-time routine ++ // c_rarg0: points to monitor entry ++ __ bind(found); ++ __ move(TSR, FSR); ++ __ unlock_object(c_rarg0); ++ __ move(FSR, TSR); ++ __ bind(end); ++} ++ ++ ++// Wide instructions ++void TemplateTable::wide() { ++ transition(vtos, vtos); ++ __ ld_bu(Rnext, at_bcp(1)); ++ __ slli_d(T4, Rnext, Address::times_8); ++ __ li(AT, (long)Interpreter::_wentry_point); ++ __ add_d(AT, T4, AT); ++ __ ld_d(T4, AT, 0); ++ __ jr(T4); ++} ++ ++ ++void TemplateTable::multianewarray() { ++ transition(vtos, atos); ++ // last dim is on top of stack; we want address of first one: ++ // first_addr = last_addr + (ndims - 1) * wordSize ++ __ ld_bu(A1, at_bcp(3)); // dimension ++ __ addi_d(A1, A1, -1); ++ __ alsl_d(A1, A1, SP, Address::times_8 - 1); // now A1 pointer to the count array on the stack ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1); ++ __ ld_bu(AT, at_bcp(3)); ++ __ alsl_d(SP, AT, SP, Address::times_8 - 1); ++ __ membar(__ AnyAny);//no membar here for aarch64 ++} ++#endif // !CC_INTERP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP ++#define CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP ++ ++ static void prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index = noreg, // itable index, MethodType, etc. ++ Register recv = noreg, // if caller wants to see it ++ Register flags = noreg // if caller wants to test it ++ ); ++ static void invokevirtual_helper(Register index, Register recv, ++ Register flags); ++ static void volatile_barrier(); ++ ++ // Helpers ++ static void index_check(Register array, Register index); ++ static void index_check_without_pop(Register array, Register index); ++ ++#endif // CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "code/vmreg.hpp" ++ ++ ++ ++void VMRegImpl::set_regName() { ++ Register reg = ::as_Register(0); ++ int i; ++ for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { ++ for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) { ++ regName[i++] = reg->name(); ++ } ++ reg = reg->successor(); ++ } ++ ++ FloatRegister freg = ::as_FloatRegister(0); ++ for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { ++ for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) { ++ regName[i++] = freg->name(); ++ } ++ freg = freg->successor(); ++ } ++ ++ for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { ++ regName[i] = "NON-GPR-FPR"; ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,58 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_HPP ++#define CPU_LOONGARCH_VMREG_LOONGARCH_HPP ++ ++inline bool is_Register() { ++ return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; ++} ++ ++inline Register as_Register() { ++ assert( is_Register(), "must be"); ++ return ::as_Register(value() / RegisterImpl::max_slots_per_register); ++} ++ ++inline bool is_FloatRegister() { ++ return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; ++} ++ ++inline FloatRegister as_FloatRegister() { ++ assert( is_FloatRegister() && is_even(value()), "must be" ); ++ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) / ++ FloatRegisterImpl::max_slots_per_register); ++} ++ ++inline bool is_concrete() { ++ assert(is_reg(), "must be"); ++ if (is_FloatRegister()) { ++ int base = value() - ConcreteRegisterImpl::max_gpr; ++ return base % FloatRegisterImpl::max_slots_per_register == 0; ++ } else { ++ return is_even(value()); ++ } ++} ++ ++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp +--- a/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,39 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP ++ ++inline VMReg RegisterImpl::as_VMReg() { ++ if( this==noreg ) return VMRegImpl::Bad(); ++ return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register); ++} ++ ++inline VMReg FloatRegisterImpl::as_VMReg() { ++ return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) + ++ ConcreteRegisterImpl::max_gpr); ++} ++ ++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,61 @@ ++/* ++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP ++#define CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP ++ ++// These are the CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ ++ \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_STRUCTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_TYPES_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#endif // CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp 2024-01-30 10:00:11.841431732 +0800 +@@ -0,0 +1,85 @@ ++/* ++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "memory/allocation.inline.hpp" ++#include "runtime/os.inline.hpp" ++#include "vm_version_ext_loongarch.hpp" ++ ++// VM_Version_Ext statics ++int VM_Version_Ext::_no_of_threads = 0; ++int VM_Version_Ext::_no_of_cores = 0; ++int VM_Version_Ext::_no_of_sockets = 0; ++bool VM_Version_Ext::_initialized = false; ++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; ++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; ++ ++void VM_Version_Ext::initialize_cpu_information(void) { ++ // do nothing if cpu info has been initialized ++ if (_initialized) { ++ return; ++ } ++ ++ _no_of_cores = os::processor_count(); ++ _no_of_threads = _no_of_cores; ++ _no_of_sockets = _no_of_cores; ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "LoongArch"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "LoongArch %s", cpu_features()); ++ _initialized = true; ++} ++ ++int VM_Version_Ext::number_of_threads(void) { ++ initialize_cpu_information(); ++ return _no_of_threads; ++} ++ ++int VM_Version_Ext::number_of_cores(void) { ++ initialize_cpu_information(); ++ return _no_of_cores; ++} ++ ++int VM_Version_Ext::number_of_sockets(void) { ++ initialize_cpu_information(); ++ return _no_of_sockets; ++} ++ ++const char* VM_Version_Ext::cpu_name(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); ++ return tmp; ++} ++ ++const char* VM_Version_Ext::cpu_description(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); ++ return tmp; ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP ++ ++#include "runtime/vm_version.hpp" ++#include "utilities/macros.hpp" ++ ++class VM_Version_Ext : public VM_Version { ++ private: ++ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; ++ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; ++ ++ static int _no_of_threads; ++ static int _no_of_cores; ++ static int _no_of_sockets; ++ static bool _initialized; ++ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; ++ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; ++ ++ public: ++ static int number_of_threads(void); ++ static int number_of_cores(void); ++ static int number_of_sockets(void); ++ ++ static const char* cpu_name(void); ++ static const char* cpu_description(void); ++ static void initialize_cpu_information(void); ++}; ++ ++#endif // CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp +--- a/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,397 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/java.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/vm_version.hpp" ++#ifdef TARGET_OS_FAMILY_linux ++# include "os_linux.inline.hpp" ++#endif ++ ++#define T5 RT5 ++ ++const char* VM_Version::_features_str = ""; ++VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; ++bool VM_Version::_cpu_info_is_initialized = false; ++ ++static BufferBlob* stub_blob; ++static const int stub_size = 600; ++ ++extern "C" { ++ typedef void (*get_cpu_info_stub_t)(void*); ++} ++static get_cpu_info_stub_t get_cpu_info_stub = NULL; ++ ++ ++class VM_Version_StubGenerator: public StubCodeGenerator { ++ public: ++ ++ VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} ++ ++ address generate_get_cpu_info() { ++ assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized"); ++ StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); ++# define __ _masm-> ++ ++ address start = __ pc(); ++ ++ __ enter(); ++ __ push(AT); ++ __ push(T5); ++ ++ __ li(AT, (long)0); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); ++ ++ __ li(AT, 1); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); ++ ++ __ li(AT, 2); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); ++ ++ __ li(AT, 3); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id3_offset())); ++ ++ __ li(AT, 4); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id4_offset())); ++ ++ __ li(AT, 5); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id5_offset())); ++ ++ __ li(AT, 6); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id6_offset())); ++ ++ __ li(AT, 10); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id10_offset())); ++ ++ __ li(AT, 11); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id11_offset())); ++ ++ __ li(AT, 12); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id12_offset())); ++ ++ __ li(AT, 13); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id13_offset())); ++ ++ __ li(AT, 14); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id14_offset())); ++ ++ __ pop(T5); ++ __ pop(AT); ++ __ leave(); ++ __ jr(RA); ++# undef __ ++ return start; ++ }; ++}; ++ ++uint32_t VM_Version::get_feature_flags_by_cpucfg() { ++ uint32_t result = 0; ++ if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b00 || _cpuid_info.cpucfg_info_id1.bits.ARCH == 0b01 ) { ++ result |= CPU_LA32; ++ } else if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b10 ) { ++ result |= CPU_LA64; ++ } ++ ++ if (_cpuid_info.cpucfg_info_id2.bits.FP_CFG != 0) ++ result |= CPU_FP; ++ ++ if (_cpuid_info.cpucfg_info_id3.bits.CCDMA != 0) ++ result |= CPU_CCDMA; ++ if (_cpuid_info.cpucfg_info_id3.bits.LLDBAR != 0) ++ result |= CPU_LLDBAR; ++ if (_cpuid_info.cpucfg_info_id3.bits.SCDLY != 0) ++ result |= CPU_SCDLY; ++ if (_cpuid_info.cpucfg_info_id3.bits.LLEXC != 0) ++ result |= CPU_LLEXC; ++ ++ result |= CPU_ULSYNC; ++ ++ return result; ++} ++ ++void VM_Version::get_processor_features() { ++ ++ clean_cpuFeatures(); ++ ++ get_os_cpu_info(); ++ ++ get_cpu_info_stub(&_cpuid_info); ++ _features |= get_feature_flags_by_cpucfg(); ++ ++ _supports_cx8 = true; ++ ++ if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) { ++ FLAG_SET_DEFAULT(MaxGCPauseMillis, 150); ++ } ++ ++ if (supports_lsx()) { ++ if (FLAG_IS_DEFAULT(UseLSX)) { ++ FLAG_SET_DEFAULT(UseLSX, true); ++ } ++ } else if (UseLSX) { ++ warning("LSX instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLSX, false); ++ } ++ ++ if (supports_lasx()) { ++ if (FLAG_IS_DEFAULT(UseLASX)) { ++ FLAG_SET_DEFAULT(UseLASX, true); ++ } ++ } else if (UseLASX) { ++ warning("LASX instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLASX, false); ++ } ++ ++ if (UseLASX && !UseLSX) { ++ warning("LASX instructions depends on LSX, setting UseLASX to false"); ++ FLAG_SET_DEFAULT(UseLASX, false); ++ } ++ ++#ifdef COMPILER2 ++ int max_vector_size = 0; ++ int min_vector_size = 0; ++ if (UseLASX) { ++ max_vector_size = 32; ++ min_vector_size = 16; ++ } ++ else if (UseLSX) { ++ max_vector_size = 16; ++ min_vector_size = 16; ++ } ++ ++ if (!FLAG_IS_DEFAULT(MaxVectorSize)) { ++ if (MaxVectorSize == 0) { ++ // do nothing ++ } else if (MaxVectorSize > max_vector_size) { ++ warning("MaxVectorSize must be at most %i on this platform", max_vector_size); ++ FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); ++ } else if (MaxVectorSize < min_vector_size) { ++ warning("MaxVectorSize must be at least %i or 0 on this platform, setting to: %i", min_vector_size, min_vector_size); ++ FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); ++ } else if (!is_power_of_2(MaxVectorSize)) { ++ warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); ++ FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); ++ } ++ } else { ++ // If default, use highest supported configuration ++ FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); ++ } ++#endif ++ ++ char buf[256]; ++ ++ // A note on the _features_string format: ++ // There are jtreg tests checking the _features_string for various properties. ++ // For some strange reason, these tests require the string to contain ++ // only _lowercase_ characters. Keep that in mind when being surprised ++ // about the unusual notation of features - and when adding new ones. ++ // Features may have one comma at the end. ++ // Furthermore, use one, and only one, separator space between features. ++ // Multiple spaces are considered separate tokens, messing up everything. ++ jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, " ++ "0x%lx, fp_ver: %d, lvz_ver: %d, ", ++ (is_la64() ? "la64" : ""), ++ (is_la32() ? "la32" : ""), ++ (supports_lsx() ? ", lsx" : ""), ++ (supports_lasx() ? ", lasx" : ""), ++ (supports_crypto() ? ", crypto" : ""), ++ (supports_lam() ? ", am" : ""), ++ (supports_ual() ? ", ual" : ""), ++ (supports_lldbar() ? ", lldbar" : ""), ++ (supports_scdly() ? ", scdly" : ""), ++ (supports_llexc() ? ", llexc" : ""), ++ (supports_lbt_x86() ? ", lbt_x86" : ""), ++ (supports_lbt_arm() ? ", lbt_arm" : ""), ++ (supports_lbt_mips() ? ", lbt_mips" : ""), ++ (needs_llsync() ? ", needs_llsync" : ""), ++ (needs_tgtsync() ? ", needs_tgtsync": ""), ++ (needs_ulsync() ? ", needs_ulsync": ""), ++ _cpuid_info.cpucfg_info_id0.bits.PRID, ++ _cpuid_info.cpucfg_info_id2.bits.FP_VER, ++ _cpuid_info.cpucfg_info_id2.bits.LVZ_VER); ++ _features_str = strdup(buf); ++ ++ assert(!is_la32(), "Should Not Reach Here, what is the cpu type?"); ++ assert( is_la64(), "Should be LoongArch64"); ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchLines, 3); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 192); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1); ++ } ++ ++ // Basic instructions are used to implement SHA Intrinsics on LA, so sha ++ // instructions support is not needed. ++ if (/*supports_crypto()*/ 1) { ++ if (FLAG_IS_DEFAULT(UseSHA)) { ++ FLAG_SET_DEFAULT(UseSHA, true); ++ } ++ } else if (UseSHA) { ++ warning("SHA instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA, false); ++ } ++ ++ if (UseSHA/* && supports_crypto()*/) { ++ if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { ++ FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); ++ } ++ } else if (UseSHA1Intrinsics) { ++ warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); ++ } ++ ++ if (UseSHA/* && supports_crypto()*/) { ++ if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { ++ FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); ++ } ++ } else if (UseSHA256Intrinsics) { ++ warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); ++ } ++ ++ if (UseSHA512Intrinsics) { ++ warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); ++ } ++ ++ if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { ++ FLAG_SET_DEFAULT(UseSHA, false); ++ } ++ ++ // Basic instructions are used to implement AES Intrinsics on LA, so AES ++ // instructions support is not needed. ++ if (/*supports_crypto()*/ 1) { ++ if (FLAG_IS_DEFAULT(UseAES)) { ++ FLAG_SET_DEFAULT(UseAES, true); ++ } ++ } else if (UseAES) { ++ if (!FLAG_IS_DEFAULT(UseAES)) ++ warning("AES instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAES, false); ++ } ++ ++ if (UseAES/* && supports_crypto()*/) { ++ if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { ++ FLAG_SET_DEFAULT(UseAESIntrinsics, true); ++ } ++ } else if (UseAESIntrinsics) { ++ if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) ++ warning("AES intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESIntrinsics, false); ++ } ++ ++ if (UseAESCTRIntrinsics) { ++ warning("AES/CTR intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseCRC32)) { ++ FLAG_SET_DEFAULT(UseCRC32, true); ++ } ++ ++ if (UseCRC32) { ++ if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { ++ UseCRC32Intrinsics = true; ++ } ++ ++ if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { ++ UseCRC32CIntrinsics = true; ++ } ++ } ++ ++#ifdef COMPILER2 ++ if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMulAddIntrinsic, true); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { ++ UseMontgomeryMultiplyIntrinsic = true; ++ } ++ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { ++ UseMontgomerySquareIntrinsic = true; ++ } ++#endif ++ ++ // This machine allows unaligned memory accesses ++ if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { ++ FLAG_SET_DEFAULT(UseUnalignedAccesses, true); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseFMA)) { ++ FLAG_SET_DEFAULT(UseFMA, true); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { ++ FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); ++ } ++ ++ UNSUPPORTED_OPTION(CriticalJNINatives); ++} ++ ++void VM_Version::initialize() { ++ ResourceMark rm; ++ // Making this stub must be FIRST use of assembler ++ ++ stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size); ++ if (stub_blob == NULL) { ++ vm_exit_during_initialization("Unable to allocate get_cpu_info_stub"); ++ } ++ CodeBuffer c(stub_blob); ++ VM_Version_StubGenerator g(&c); ++ get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, ++ g.generate_get_cpu_info()); ++ ++ get_processor_features(); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp +--- a/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,292 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP ++ ++#include "runtime/abstract_vm_version.hpp" ++#include "runtime/globals_extension.hpp" ++#include "utilities/sizes.hpp" ++ ++class VM_Version: public Abstract_VM_Version { ++ friend class JVMCIVMStructs; ++ ++public: ++ ++ union LoongArch_Cpucfg_Id0 { ++ uint32_t value; ++ struct { ++ uint32_t PRID : 32; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id1 { ++ uint32_t value; ++ struct { ++ uint32_t ARCH : 2, ++ PGMMU : 1, ++ IOCSR : 1, ++ PALEN : 8, ++ VALEN : 8, ++ UAL : 1, // unaligned access ++ RI : 1, ++ EP : 1, ++ RPLV : 1, ++ HP : 1, ++ IOCSR_BRD : 1, ++ MSG_INT : 1, ++ : 5; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id2 { ++ uint32_t value; ++ struct { ++ uint32_t FP_CFG : 1, // FP is used, use FP_CFG instead ++ FP_SP : 1, ++ FP_DP : 1, ++ FP_VER : 3, ++ LSX : 1, ++ LASX : 1, ++ COMPLEX : 1, ++ CRYPTO : 1, ++ LVZ : 1, ++ LVZ_VER : 3, ++ LLFTP : 1, ++ LLFTP_VER : 3, ++ LBT_X86 : 1, ++ LBT_ARM : 1, ++ LBT_MIPS : 1, ++ LSPW : 1, ++ LAM : 1, ++ : 9; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id3 { ++ uint32_t value; ++ struct { ++ uint32_t CCDMA : 1, ++ SFB : 1, ++ UCACC : 1, ++ LLEXC : 1, ++ SCDLY : 1, ++ LLDBAR : 1, ++ ITLBHMC : 1, ++ ICHMC : 1, ++ SPW_LVL : 3, ++ SPW_HP_HF : 1, ++ RVA : 1, ++ RVAMAXM1 : 4, ++ : 15; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id4 { ++ uint32_t value; ++ struct { ++ uint32_t CC_FREQ : 32; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id5 { ++ uint32_t value; ++ struct { ++ uint32_t CC_MUL : 16, ++ CC_DIV : 16; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id6 { ++ uint32_t value; ++ struct { ++ uint32_t PMP : 1, ++ PMVER : 3, ++ PMNUM : 4, ++ PMBITS : 6, ++ UPM : 1, ++ : 17; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id10 { ++ uint32_t value; ++ struct { ++ uint32_t L1IU_PRESENT : 1, ++ L1IU_UNIFY : 1, ++ L1D_PRESENT : 1, ++ L2IU_PRESENT : 1, ++ L2IU_UNIFY : 1, ++ L2IU_PRIVATE : 1, ++ L2IU_INCLUSIVE : 1, ++ L2D_PRESENT : 1, ++ L2D_PRIVATE : 1, ++ L2D_INCLUSIVE : 1, ++ L3IU_PRESENT : 1, ++ L3IU_UNIFY : 1, ++ L3IU_PRIVATE : 1, ++ L3IU_INCLUSIVE : 1, ++ L3D_PRESENT : 1, ++ L3D_PRIVATE : 1, ++ L3D_INCLUSIVE : 1, ++ : 15; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id11 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id12 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id13 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id14 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++protected: ++ ++ enum { ++ CPU_LAM = (1 << 1), ++ CPU_UAL = (1 << 2), ++ CPU_LSX = (1 << 4), ++ CPU_LASX = (1 << 5), ++ CPU_COMPLEX = (1 << 7), ++ CPU_CRYPTO = (1 << 8), ++ CPU_LBT_X86 = (1 << 10), ++ CPU_LBT_ARM = (1 << 11), ++ CPU_LBT_MIPS = (1 << 12), ++ // flags above must follow Linux HWCAP ++ CPU_LA32 = (1 << 13), ++ CPU_LA64 = (1 << 14), ++ CPU_FP = (1 << 15), ++ CPU_LLEXC = (1 << 16), ++ CPU_SCDLY = (1 << 17), ++ CPU_LLDBAR = (1 << 18), ++ CPU_CCDMA = (1 << 19), ++ CPU_LLSYNC = (1 << 20), ++ CPU_TGTSYNC = (1 << 21), ++ CPU_ULSYNC = (1 << 22), ++ ++ //////////////////////add some other feature here////////////////// ++ } cpuFeatureFlags; ++ ++ static const char* _features_str; ++ static bool _cpu_info_is_initialized; ++ ++ struct CpuidInfo { ++ LoongArch_Cpucfg_Id0 cpucfg_info_id0; ++ LoongArch_Cpucfg_Id1 cpucfg_info_id1; ++ LoongArch_Cpucfg_Id2 cpucfg_info_id2; ++ LoongArch_Cpucfg_Id3 cpucfg_info_id3; ++ LoongArch_Cpucfg_Id4 cpucfg_info_id4; ++ LoongArch_Cpucfg_Id5 cpucfg_info_id5; ++ LoongArch_Cpucfg_Id6 cpucfg_info_id6; ++ LoongArch_Cpucfg_Id10 cpucfg_info_id10; ++ LoongArch_Cpucfg_Id11 cpucfg_info_id11; ++ LoongArch_Cpucfg_Id12 cpucfg_info_id12; ++ LoongArch_Cpucfg_Id13 cpucfg_info_id13; ++ LoongArch_Cpucfg_Id14 cpucfg_info_id14; ++ }; ++ ++ // The actual cpuid info block ++ static CpuidInfo _cpuid_info; ++ ++ static uint32_t get_feature_flags_by_cpucfg(); ++ static void get_processor_features(); ++ static void get_os_cpu_info(); ++ ++public: ++ // Offsets for cpuid asm stub ++ static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); } ++ static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); } ++ static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); } ++ static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); } ++ static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); } ++ static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); } ++ static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); } ++ static ByteSize Loongson_Cpucfg_id10_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id10); } ++ static ByteSize Loongson_Cpucfg_id11_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id11); } ++ static ByteSize Loongson_Cpucfg_id12_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id12); } ++ static ByteSize Loongson_Cpucfg_id13_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id13); } ++ static ByteSize Loongson_Cpucfg_id14_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id14); } ++ ++ static void clean_cpuFeatures() { _features = 0; } ++ ++ // Initialization ++ static void initialize(); ++ ++ static bool cpu_info_is_initialized() { return _cpu_info_is_initialized; } ++ ++ static bool is_la32() { return _features & CPU_LA32; } ++ static bool is_la64() { return _features & CPU_LA64; } ++ static bool supports_crypto() { return _features & CPU_CRYPTO; } ++ static bool supports_lsx() { return _features & CPU_LSX; } ++ static bool supports_lasx() { return _features & CPU_LASX; } ++ static bool supports_lam() { return _features & CPU_LAM; } ++ static bool supports_llexc() { return _features & CPU_LLEXC; } ++ static bool supports_scdly() { return _features & CPU_SCDLY; } ++ static bool supports_lldbar() { return _features & CPU_LLDBAR; } ++ static bool supports_ual() { return _features & CPU_UAL; } ++ static bool supports_lbt_x86() { return _features & CPU_LBT_X86; } ++ static bool supports_lbt_arm() { return _features & CPU_LBT_ARM; } ++ static bool supports_lbt_mips() { return _features & CPU_LBT_MIPS; } ++ static bool needs_llsync() { return !supports_lldbar(); } ++ static bool needs_tgtsync() { return 1; } ++ static bool needs_ulsync() { return 1; } ++ ++ static const char* cpu_features() { return _features_str; } ++}; ++ ++#endif // CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp +--- a/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,322 @@ ++/* ++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/vtableStubs.hpp" ++#include "interp_masm_loongarch.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klassVtable.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++ ++// machine-dependent part of VtableStubs: create VtableStub of correct size and ++// initialize its code ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#ifndef PRODUCT ++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); ++#endif ++ ++// used by compiler only; reciever in T0. ++// used registers : ++// Rmethod : receiver klass & method ++// NOTE: If this code is used by the C1, the receiver_location is always 0. ++// when reach here, receiver in T0, klass in T8 ++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(true); ++ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ int load_const_maxLen = 4*BytesPerInstWord; // load_const generates 4 instructions. Assume that as max size for li ++ // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation. ++ const int index_dependent_slop = 0; ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ Register t1 = T8, t2 = Rmethod; ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ start_pc = __ pc(); ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ ld_w(t1, AT , 0); ++ __ addi_w(t1, t1, 1); ++ __ st_w(t1, AT,0); ++ } ++#endif ++ ++ // get receiver (need to skip return address on top of stack) ++ //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0"); ++ ++ // get receiver klass ++ address npe_addr = __ pc(); ++ __ load_klass(t1, T0); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ // check offset vs vtable length ++ __ ld_w(t2, t1, in_bytes(Klass::vtable_length_offset())); ++ assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code"); ++ __ li(AT, vtable_index*vtableEntry::size()); ++ __ blt(AT, t2, L); ++ __ li(A2, vtable_index); ++ __ move(A1, A0); ++ ++ // VTABLE TODO: find upper bound for call_VM length. ++ start_pc = __ pc(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2); ++ const ptrdiff_t estimate = 512; ++ const ptrdiff_t codesize = __ pc() - start_pc; ++ slop_delta = estimate - codesize; // call_VM varies in length, depending on data ++ assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ const Register method = Rmethod; ++ ++ // load methodOop and target address ++ start_pc = __ pc(); ++ // lookup_virtual_method generates 6 instructions (worst case) ++ __ lookup_virtual_method(t1, vtable_index, method); ++ slop_delta = 6*BytesPerInstWord - (int)(__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ __ beq(method, R0, L); ++ __ ld_d(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L); ++ __ stop("Vtable entry is NULL"); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ ++ // T8: receiver klass ++ // T0: receiver ++ // Rmethod: methodOop ++ // T4: entry ++ address ame_addr = __ pc(); ++ __ ld_ptr(T4, method,in_bytes(Method::from_compiled_offset())); ++ __ jr(T4); ++ masm->flush(); ++ slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop); ++ ++ return s; ++} ++ ++ ++// used registers : ++// T1 T2 ++// when reach here, the receiver in T0, klass in T1 ++VtableStub* VtableStubs::create_itable_stub(int itable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(false); ++ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ int load_const_maxLen = 4*BytesPerInstWord; // load_const generates 4 instructions. Assume that as max size for li ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler *masm = new MacroAssembler(&cb); ++ ++ // we use T8, T4, T2 as temparary register, they are free from register allocator ++ Register t1 = T8, t2 = T2, t3 = T4; ++ // Entry arguments: ++ // T1: Interface ++ // T0: Receiver ++ ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ start_pc = __ pc(); ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ ld_w(T8, AT, 0); ++ __ addi_w(T8, T8, 1); ++ __ st_w(T8, AT, 0); ++ } ++#endif // PRODUCT ++ ++ const Register holder_klass_reg = T1; // declaring interface klass (DECC) ++ const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC) ++ const Register icholder_reg = T1; ++ ++ Label L_no_such_interface; ++ ++ __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(holder_klass_reg, icholder_reg, CompiledICHolder::holder_metadata_offset()); ++ ++ // get receiver klass (also an implicit null-check) ++ address npe_addr = __ pc(); ++ __ load_klass(t1, T0); ++ ++ // x86 use lookup_interface_method, but lookup_interface_method makes more instructions. ++ // No dynamic code size variance here, so slop_bytes is not needed. ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); ++ assert(Assembler::is_simm16(base), "change this code"); ++ __ addi_d(t2, t1, base); ++ __ ld_w(AT, t1, in_bytes(Klass::vtable_length_offset())); ++ __ alsl_d(t2, AT, t2, Address::times_8 - 1); ++ ++ __ move(t3, t2); ++ { ++ Label hit, entry; ++ ++ __ ld_ptr(AT, t3, itableOffsetEntry::interface_offset_in_bytes()); ++ __ beq(AT, resolved_klass_reg, hit); ++ ++ __ bind(entry); ++ // Check that the entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ __ beqz(AT, L_no_such_interface); ++ ++ __ addi_d(t3, t3, itableOffsetEntry::size() * wordSize); ++ __ ld_ptr(AT, t3, itableOffsetEntry::interface_offset_in_bytes()); ++ __ bne(AT, resolved_klass_reg, entry); ++ ++ __ bind(hit); ++ } ++ ++ { ++ Label hit, entry; ++ ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ beq(AT, holder_klass_reg, hit); ++ ++ __ bind(entry); ++ // Check that the entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ __ beqz(AT, L_no_such_interface); ++ ++ __ addi_d(t2, t2, itableOffsetEntry::size() * wordSize); ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ bne(AT, holder_klass_reg, entry); ++ ++ __ bind(hit); ++ } ++ ++ // We found a hit, move offset into T4 ++ __ ld_wu(t2, t2, itableOffsetEntry::offset_offset_in_bytes()); ++ ++ // Compute itableMethodEntry. ++ const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) + ++ itableMethodEntry::method_offset_in_bytes(); ++ ++ // Get methodOop and entrypoint for compiler ++ const Register method = Rmethod; ++ ++ start_pc = __ pc(); ++ __ li(AT, method_offset); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ add_d(AT, AT, t2); ++ __ ldx_d(method, t1, AT); ++ ++#ifdef ASSERT ++ if (DebugVtables) { ++ Label L1; ++ __ beq(method, R0, L1); ++ __ ld_d(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L1); ++ __ stop("methodOop is null"); ++ __ bind(L1); ++ } ++#endif // ASSERT ++ ++ // Rmethod: methodOop ++ // T0: receiver ++ // T4: entry point ++ address ame_addr = __ pc(); ++ __ ld_ptr(T4, method, in_bytes(Method::from_compiled_offset())); ++ __ jr(T4); ++ ++ __ bind(L_no_such_interface); ++ // Handle IncompatibleClassChangeError in itable stubs. ++ // More detailed error message. ++ // We force resolving of the call site by jumping to the "handle ++ // wrong method" stub, and so let the interpreter runtime do all the ++ // dirty work. ++ assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order"); ++ __ jmp((address)SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type); ++ ++ masm->flush(); ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); ++ ++ return s; ++} ++ ++// NOTE : whenever you change the code above, dont forget to change the const here ++int VtableStub::pd_code_alignment() { ++ const unsigned int icache_line_size = wordSize; ++ return icache_line_size; ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp +--- a/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,132 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "ci/ciMethod.hpp" ++#include "interpreter/interpreter.hpp" ++#include "runtime/frame.inline.hpp" ++ ++// asm based interpreter deoptimization helpers ++int AbstractInterpreter::size_activation(int max_stack, ++ int temps, ++ int extra_args, ++ int monitors, ++ int callee_params, ++ int callee_locals, ++ bool is_top_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ ++ // fixed size of an interpreter frame: ++ int overhead = frame::sender_sp_offset - ++ frame::interpreter_frame_initial_sp_offset; ++ // Our locals were accounted for by the caller (or last_frame_adjust ++ // on the transistion) Since the callee parameters already account ++ // for the callee's params we only need to account for the extra ++ // locals. ++ int size = overhead + ++ (callee_locals - callee_params)*Interpreter::stackElementWords + ++ monitors * frame::interpreter_frame_monitor_size() + ++ temps* Interpreter::stackElementWords + extra_args; ++ ++ return size; ++} ++ ++// How much stack a method activation needs in words. ++int AbstractInterpreter::size_top_interpreter_activation(Method* method) { ++ ++ const int entry_size = frame::interpreter_frame_monitor_size(); ++ ++ // total overhead size: entry_size + (saved ebp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size; ++ ++ const int stub_code = 6; // see generate_call_stub ++ // return overhead_size + method->max_locals() + method->max_stack() + stub_code; ++ const int method_stack = (method->max_locals() + method->max_stack()) * ++ Interpreter::stackElementWords; ++ return overhead_size + method_stack + stub_code; ++} ++ ++void AbstractInterpreter::layout_activation(Method* method, ++ int tempcount, ++ int popframe_extra_args, ++ int moncount, ++ int caller_actual_parameters, ++ int callee_param_count, ++ int callee_locals, ++ frame* caller, ++ frame* interpreter_frame, ++ bool is_top_frame, ++ bool is_bottom_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ // If interpreter_frame!=NULL, set up the method, locals, and monitors. ++ // The frame interpreter_frame, if not NULL, is guaranteed to be the ++ // right size, as determined by a previous call to this method. ++ // It is also guaranteed to be walkable even though it is in a skeletal state ++ ++ // fixed size of an interpreter frame: ++ ++ int max_locals = method->max_locals() * Interpreter::stackElementWords; ++ int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords; ++ ++#ifdef ASSERT ++ assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); ++#endif ++ ++ interpreter_frame->interpreter_frame_set_method(method); ++ // NOTE the difference in using sender_sp and interpreter_frame_sender_sp ++ // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) ++ // and sender_sp is fp+8 ++ intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; ++ ++#ifdef ASSERT ++ if (caller->is_interpreted_frame()) { ++ assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); ++ } ++#endif ++ ++ interpreter_frame->interpreter_frame_set_locals(locals); ++ BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); ++ BasicObjectLock* monbot = montop - moncount; ++ interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount); ++ ++ //set last sp; ++ intptr_t* esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords - ++ popframe_extra_args; ++ interpreter_frame->interpreter_frame_set_last_sp(esp); ++ // All frames but the initial interpreter frame we fill in have a ++ // value for sender_sp that allows walking the stack but isn't ++ // truly correct. Correct the value here. ++ // ++ if (extra_locals != 0 && ++ interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { ++ interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); ++ } ++ *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); ++ *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror(); ++} ++ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/assembler_mips.cpp b/src/hotspot/cpu/mips/assembler_mips.cpp +--- a/src/hotspot/cpu/mips/assembler_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/assembler_mips.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,759 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++#ifndef PRODUCT ++#include "compiler/disassembler.hpp" ++#endif ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) block_comment(str) ++#define STOP(error) block_comment(error); stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++// Implementation of AddressLiteral ++ ++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { ++ _is_lval = false; ++ _target = target; ++ _rspec = rspec_from_rtype(rtype, target); ++} ++ ++// Implementation of Address ++ ++Address Address::make_array(ArrayAddress adr) { ++ AddressLiteral base = adr.base(); ++ Address index = adr.index(); ++ assert(index._disp == 0, "must not have disp"); // maybe it can? ++ Address array(index._base, index._index, index._scale, (intptr_t) base.target()); ++ array._rspec = base._rspec; ++ return array; ++} ++ ++// exceedingly dangerous constructor ++Address::Address(address loc, RelocationHolder spec) { ++ _base = noreg; ++ _index = noreg; ++ _scale = no_scale; ++ _disp = (intptr_t) loc; ++ _rspec = spec; ++} ++ ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of Assembler ++const char *Assembler::ops_name[] = { ++ "special", "regimm", "j", "jal", "beq", "bne", "blez", "bgtz", ++ "addi", "addiu", "slti", "sltiu", "andi", "ori", "xori", "lui", ++ "cop0", "cop1", "cop2", "cop3", "beql", "bnel", "bleql", "bgtzl", ++ "daddi", "daddiu", "ldl", "ldr", "", "", "", "", ++ "lb", "lh", "lwl", "lw", "lbu", "lhu", "lwr", "lwu", ++ "sb", "sh", "swl", "sw", "sdl", "sdr", "swr", "cache", ++ "ll", "lwc1", "", "", "lld", "ldc1", "", "ld", ++ "sc", "swc1", "", "", "scd", "sdc1", "", "sd" ++}; ++ ++const char* Assembler::special_name[] = { ++ "sll", "", "srl", "sra", "sllv", "", "srlv", "srav", ++ "jr", "jalr", "movz", "movn", "syscall", "break", "", "sync", ++ "mfhi", "mthi", "mflo", "mtlo", "dsll", "", "dsrl", "dsra", ++ "mult", "multu", "div", "divu", "dmult", "dmultu", "ddiv", "ddivu", ++ "add", "addu", "sub", "subu", "and", "or", "xor", "nor", ++ "", "", "slt", "sltu", "dadd", "daddu", "dsub", "dsubu", ++ "tge", "tgeu", "tlt", "tltu", "teq", "", "tne", "", ++ "dsll", "", "dsrl", "dsra", "dsll32", "", "dsrl32", "dsra32" ++}; ++ ++const char* Assembler::cop1_name[] = { ++ "add", "sub", "mul", "div", "sqrt", "abs", "mov", "neg", ++ "round.l", "trunc.l", "ceil.l", "floor.l", "round.w", "trunc.w", "ceil.w", "floor.w", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "c.f", "c.un", "c.eq", "c.ueq", "c.olt", "c.ult", "c.ole", "c.ule", ++ "c.sf", "c.ngle", "c.seq", "c.ngl", "c.lt", "c.nge", "c.le", "c.ngt" ++}; ++ ++const char* Assembler::cop1x_name[] = { ++ "lwxc1", "ldxc1", "", "", "", "luxc1", "", "", ++ "swxc1", "sdxc1", "", "", "", "suxc1", "", "prefx", ++ "", "", "", "", "", "", "alnv.ps", "", ++ "", "", "", "", "", "", "", "", ++ "madd.s", "madd.d", "", "", "", "", "madd.ps", "", ++ "msub.s", "msub.d", "", "", "", "", "msub.ps", "", ++ "nmadd.s", "nmadd.d", "", "", "", "", "nmadd.ps", "", ++ "nmsub.s", "nmsub.d", "", "", "", "", "nmsub.ps", "" ++}; ++ ++const char* Assembler::special2_name[] = { ++ "madd", "", "mul", "", "msub", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "gsdmult", "", "", "gsdiv", "gsddiv", "", "", ++ "", "", "", "", "gsmod", "gsdmod", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "" ++}; ++ ++const char* Assembler::special3_name[] = { ++ "ext", "", "", "", "ins", "dinsm", "dinsu", "dins", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "bshfl", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++}; ++ ++const char* Assembler::regimm_name[] = { ++ "bltz", "bgez", "bltzl", "bgezl", "", "", "", "", ++ "tgei", "tgeiu", "tlti", "tltiu", "teqi", "", "tnei", "", ++ "bltzal", "bgezal", "bltzall", "bgezall" ++}; ++ ++const char* Assembler::gs_ldc2_name[] = { ++ "gslbx", "gslhx", "gslwx", "gsldx", "", "", "gslwxc1", "gsldxc1" ++}; ++ ++ ++const char* Assembler::gs_lwc2_name[] = { ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "gslble", "gslbgt", "gslhle", "gslhgt", "gslwle", "gslwgt", "gsldle", "gsldgt", ++ "", "", "", "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/ ++ "gslq", "" ++}; ++ ++const char* Assembler::gs_sdc2_name[] = { ++ "gssbx", "gsshx", "gsswx", "gssdx", "", "", "gsswxc1", "gssdxc1" ++}; ++ ++const char* Assembler::gs_swc2_name[] = { ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "gssble", "gssbgt", "gsshle", "gsshgt", "gsswle", "gsswgt", "gssdle", "gssdgt", ++ "", "", "", "", "gsswlec1", "gsswgtc1", "gssdlec1", "gssdgtc1", ++ "gssq", "" ++}; ++ ++//misleading name, print only branch/jump instruction ++void Assembler::print_instruction(int inst) { ++ const char *s; ++ switch( opcode(inst) ) { ++ default: ++ s = ops_name[opcode(inst)]; ++ break; ++ case special_op: ++ s = special_name[special(inst)]; ++ break; ++ case regimm_op: ++ s = special_name[rt(inst)]; ++ break; ++ } ++ ++ ::tty->print("%s", s); ++} ++ ++int Assembler::is_int_mask(int x) { ++ int xx = x; ++ int count = 0; ++ ++ while (x != 0) { ++ x &= (x - 1); ++ count++; ++ } ++ ++ if ((1<>2; ++ switch(opcode(inst)) { ++ case j_op: ++ case jal_op: ++ case lui_op: ++ case ori_op: ++ case daddiu_op: ++ ShouldNotReachHere(); ++ break; ++ default: ++ assert(is_simm16(v), "must be simm16"); ++#ifndef PRODUCT ++ if (!is_simm16(v)) { ++ tty->print_cr("must be simm16"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ v = low16(v); ++ inst &= 0xffff0000; ++ break; ++ } ++ ++ return inst | v; ++} ++ ++int Assembler::branch_destination(int inst, int pos) { ++ int off = 0; ++ ++ switch(opcode(inst)) { ++ case j_op: ++ case jal_op: ++ assert(false, "should not use j/jal here"); ++ break; ++ default: ++ off = expand(low16(inst), 15); ++ break; ++ } ++ ++ return off ? pos + 4 + (off<<2) : 0; ++} ++ ++int AbstractAssembler::code_fill_byte() { ++ return 0x00; // illegal instruction 0x00000000 ++} ++ ++// Now the Assembler instruction (identical for 32/64 bits) ++ ++void Assembler::lb(Register rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lb(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lbu(Register rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lbu(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ld(Register rt, Address dst){ ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (Assembler::is_simm16(disp)) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ gsldx(src, base, index, disp); ++ } else { ++ dsll(AT, index, scale); ++ gsldx(src, base, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ ld(src, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gsldx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ ld(src, AT, 0); ++ } ++ } else { ++ assert_different_registers(src, AT); ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(src, split_low(disp >> 16)); ++ if (split_low(disp)) ori(src, src, split_low(disp)); ++ if (UseLEXT1) { ++ gsldx(src, AT, src, 0); ++ } else { ++ daddu(AT, AT, src); ++ ld(src, AT, 0); ++ } ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ ld(src, base, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gsldx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ ld(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::ldl(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ldl(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ldr(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ldr(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lh(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lh(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lhu(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lhu(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ll(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ll(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lld(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lld(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lw(Register rt, Address dst){ ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (Assembler::is_simm16(disp)) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ gslwx(src, base, index, disp); ++ } else { ++ dsll(AT, index, scale); ++ gslwx(src, base, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ lw(src, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gslwx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ lw(src, AT, 0); ++ } ++ } else { ++ assert_different_registers(src, AT); ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(src, split_low(disp >> 16)); ++ if (split_low(disp)) ori(src, src, split_low(disp)); ++ if (UseLEXT1) { ++ gslwx(src, AT, src, 0); ++ } else { ++ daddu(AT, AT, src); ++ lw(src, AT, 0); ++ } ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ lw(src, base, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gslwx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ lw(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::lea(Register rt, Address src) { ++ Register dst = rt; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index == noreg) { ++ if (is_simm16(disp)) { ++ daddiu(dst, base, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(dst, base, AT); ++ } ++ } else { ++ if (scale == 0) { ++ if (is_simm16(disp)) { ++ daddu(AT, base, index); ++ daddiu(dst, AT, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, base, AT); ++ daddu(dst, AT, index); ++ } ++ } else { ++ if (is_simm16(disp)) { ++ dsll(AT, index, scale); ++ daddu(AT, AT, base); ++ daddiu(dst, AT, disp); ++ } else { ++ assert_different_registers(dst, AT); ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ dsll(dst, index, scale); ++ daddu(dst, dst, AT); ++ } ++ } ++ } ++} ++ ++void Assembler::lwl(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwl(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lwr(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwr(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lwu(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwu(rt, src.base(), src.disp()); ++} ++ ++void Assembler::sb(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sb(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sc(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sc(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::scd(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ scd(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sd(Register rt, Address dst) { ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (is_simm16(disp)) { ++ if ( UseLEXT1 && is_simm(disp, 8)) { ++ if (scale == 0) { ++ gssdx(src, base, index, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ dsll(AT, index, scale); ++ gssdx(src, base, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ sd(src, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gssdx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ sd(src, AT, 0); ++ } ++ } else { ++ daddiu(SP, SP, -wordSize); ++ sd(T9, SP, 0); ++ ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(T9, split_low(disp >> 16)); ++ if (split_low(disp)) ori(T9, T9, split_low(disp)); ++ daddu(AT, AT, T9); ++ ld(T9, SP, 0); ++ daddiu(SP, SP, wordSize); ++ sd(src, AT, 0); ++ } ++ } ++ } else { ++ if (is_simm16(disp)) { ++ sd(src, base, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gssdx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ sd(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::sdl(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sdl(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sdr(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sdr(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sh(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sh(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sw(Register rt, Address dst) { ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if ( Assembler::is_simm16(disp) ) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ gsswx(src, base, index, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ dsll(AT, index, scale); ++ gsswx(src, base, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ sw(src, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gsswx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ sw(src, AT, 0); ++ } ++ } else { ++ daddiu(SP, SP, -wordSize); ++ sd(T9, SP, 0); ++ ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(T9, split_low(disp >> 16)); ++ if (split_low(disp)) ori(T9, T9, split_low(disp)); ++ daddu(AT, AT, T9); ++ ld(T9, SP, 0); ++ daddiu(SP, SP, wordSize); ++ sw(src, AT, 0); ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ sw(src, base, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gsswx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ sw(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::swl(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ swl(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::swr(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ swr(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::lwc1(FloatRegister rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwc1(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ldc1(FloatRegister rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ldc1(rt, src.base(), src.disp()); ++} ++ ++void Assembler::swc1(FloatRegister rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ swc1(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sdc1(FloatRegister rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sdc1(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::j(address entry) { ++ int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2; ++ emit_long((j_op<<26) | dest); ++ has_delay_slot(); ++} ++ ++void Assembler::jal(address entry) { ++ int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2; ++ emit_long((jal_op<<26) | dest); ++ has_delay_slot(); ++} ++ ++void Assembler::emit_long(int x) { // shadows AbstractAssembler::emit_long ++ check_delay(); ++ AbstractAssembler::emit_int32(x); ++} ++ ++inline void Assembler::emit_data(int x) { emit_long(x); } ++inline void Assembler::emit_data(int x, relocInfo::relocType rtype) { ++ relocate(rtype); ++ emit_long(x); ++} ++ ++inline void Assembler::emit_data(int x, RelocationHolder const& rspec) { ++ relocate(rspec); ++ emit_long(x); ++} ++ ++inline void Assembler::check_delay() { ++#ifdef CHECK_DELAY ++ guarantee(delay_state != at_delay_slot, "must say delayed() when filling delay slot"); ++ delay_state = no_delay; ++#endif ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/assembler_mips.hpp b/src/hotspot/cpu/mips/assembler_mips.hpp +--- a/src/hotspot/cpu/mips/assembler_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/assembler_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,1789 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_ASSEMBLER_MIPS_HPP ++ ++#include "asm/register.hpp" ++#include "runtime/vm_version.hpp" ++ ++class BiasedLockingCounters; ++ ++ ++// Note: A register location is represented via a Register, not ++// via an address for efficiency & simplicity reasons. ++ ++class ArrayAddress; ++ ++class Address { ++ public: ++ enum ScaleFactor { ++ no_scale = -1, ++ times_1 = 0, ++ times_2 = 1, ++ times_4 = 2, ++ times_8 = 3, ++ times_ptr = times_8 ++ }; ++ static ScaleFactor times(int size) { ++ assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); ++ if (size == 8) return times_8; ++ if (size == 4) return times_4; ++ if (size == 2) return times_2; ++ return times_1; ++ } ++ ++ private: ++ Register _base; ++ Register _index; ++ ScaleFactor _scale; ++ int _disp; ++ RelocationHolder _rspec; ++ ++ // Easily misused constructors make them private ++ Address(address loc, RelocationHolder spec); ++ Address(int disp, address loc, relocInfo::relocType rtype); ++ Address(int disp, address loc, RelocationHolder spec); ++ ++ public: ++ ++ // creation ++ Address() ++ : _base(noreg), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(0) { ++ } ++ ++ // No default displacement otherwise Register can be implicitly ++ // converted to 0(Register) which is quite a different animal. ++ ++ Address(Register base, int disp = 0) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(disp) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, int disp = 0) ++ : _base (base), ++ _index(index), ++ _scale(scale), ++ _disp (disp) { ++ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); ++ assert_different_registers(_base, _index, AT); ++ } ++ ++ // The following two overloads are used in connection with the ++ // ByteSize type (see sizes.hpp). They simplify the use of ++ // ByteSize'd arguments in assembly code. Note that their equivalent ++ // for the optimized build are the member functions with int disp ++ // argument since ByteSize is mapped to an int type in that case. ++ // ++ // Note: DO NOT introduce similar overloaded functions for WordSize ++ // arguments as in the optimized mode, both ByteSize and WordSize ++ // are mapped to the same type and thus the compiler cannot make a ++ // distinction anymore (=> compiler errors). ++ ++#ifdef ASSERT ++ Address(Register base, ByteSize disp) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(in_bytes(disp)) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, ByteSize disp) ++ : _base(base), ++ _index(index), ++ _scale(scale), ++ _disp(in_bytes(disp)) { ++ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); ++ assert_different_registers(_base, _index, AT); ++ } ++#endif // ASSERT ++ ++ // accessors ++ bool uses(Register reg) const { return _base == reg || _index == reg; } ++ Register base() const { return _base; } ++ Register index() const { return _index; } ++ ScaleFactor scale() const { return _scale; } ++ int disp() const { return _disp; } ++ ++ static Address make_array(ArrayAddress); ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class LIR_Assembler; // base/index/scale/disp ++}; ++ ++// Calling convention ++class Argument { ++ private: ++ int _number; ++ public: ++ enum { ++ n_register_parameters = 8, // 8 integer registers used to pass parameters ++ n_float_register_parameters = 8 // 8 float registers used to pass parameters ++ }; ++ ++ Argument(int number):_number(number){ } ++ Argument successor() {return Argument(number() + 1);} ++ ++ int number()const {return _number;} ++ bool is_Register()const {return _number < n_register_parameters;} ++ bool is_FloatRegister()const {return _number < n_float_register_parameters;} ++ ++ Register as_Register()const { ++ assert(is_Register(), "must be a register argument"); ++ return ::as_Register(A0->encoding() + _number); ++ } ++ FloatRegister as_FloatRegister()const { ++ assert(is_FloatRegister(), "must be a float register argument"); ++ return ::as_FloatRegister(F12->encoding() + _number); ++ } ++ ++ Address as_caller_address()const {return Address(SP, (number() - n_register_parameters) * wordSize);} ++}; ++ ++// ++// AddressLiteral has been split out from Address because operands of this type ++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out ++// the few instructions that need to deal with address literals are unique and the ++// MacroAssembler does not have to implement every instruction in the Assembler ++// in order to search for address literals that may need special handling depending ++// on the instruction and the platform. As small step on the way to merging i486/amd64 ++// directories. ++// ++class AddressLiteral { ++ friend class ArrayAddress; ++ RelocationHolder _rspec; ++ // Typically we use AddressLiterals we want to use their rval ++ // However in some situations we want the lval (effect address) of the item. ++ // We provide a special factory for making those lvals. ++ bool _is_lval; ++ ++ // If the target is far we'll need to load the ea of this to ++ // a register to reach it. Otherwise if near we can do rip ++ // relative addressing. ++ ++ address _target; ++ ++ protected: ++ // creation ++ AddressLiteral() ++ : _is_lval(false), ++ _target(NULL) ++ {} ++ ++ public: ++ ++ ++ AddressLiteral(address target, relocInfo::relocType rtype); ++ ++ AddressLiteral(address target, RelocationHolder const& rspec) ++ : _rspec(rspec), ++ _is_lval(false), ++ _target(target) ++ {} ++ ++ AddressLiteral addr() { ++ AddressLiteral ret = *this; ++ ret._is_lval = true; ++ return ret; ++ } ++ ++ ++ private: ++ ++ address target() { return _target; } ++ bool is_lval() { return _is_lval; } ++ ++ relocInfo::relocType reloc() const { return _rspec.type(); } ++ const RelocationHolder& rspec() const { return _rspec; } ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class Address; ++ friend class LIR_Assembler; ++ RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { ++ switch (rtype) { ++ case relocInfo::external_word_type: ++ return external_word_Relocation::spec(addr); ++ case relocInfo::internal_word_type: ++ return internal_word_Relocation::spec(addr); ++ case relocInfo::opt_virtual_call_type: ++ return opt_virtual_call_Relocation::spec(); ++ case relocInfo::static_call_type: ++ return static_call_Relocation::spec(); ++ case relocInfo::runtime_call_type: ++ return runtime_call_Relocation::spec(); ++ case relocInfo::poll_type: ++ case relocInfo::poll_return_type: ++ return Relocation::spec_simple(rtype); ++ case relocInfo::none: ++ case relocInfo::oop_type: ++ // Oops are a special case. Normally they would be their own section ++ // but in cases like icBuffer they are literals in the code stream that ++ // we don't have a section for. We use none so that we get a literal address ++ // which is always patchable. ++ return RelocationHolder(); ++ default: ++ ShouldNotReachHere(); ++ return RelocationHolder(); ++ } ++ } ++ ++}; ++ ++// Convience classes ++class RuntimeAddress: public AddressLiteral { ++ ++ public: ++ ++ RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} ++ ++}; ++ ++class OopAddress: public AddressLiteral { ++ ++ public: ++ ++ OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){} ++ ++}; ++ ++class ExternalAddress: public AddressLiteral { ++ ++ public: ++ ++ ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){} ++ ++}; ++ ++class InternalAddress: public AddressLiteral { ++ ++ public: ++ ++ InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} ++ ++}; ++ ++// x86 can do array addressing as a single operation since disp can be an absolute ++// address amd64 can't. We create a class that expresses the concept but does extra ++// magic on amd64 to get the final result ++ ++class ArrayAddress { ++ private: ++ ++ AddressLiteral _base; ++ Address _index; ++ ++ public: ++ ++ ArrayAddress() {}; ++ ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; ++ AddressLiteral base() { return _base; } ++ Address index() { return _index; } ++ ++}; ++ ++const int FPUStateSizeInWords = 512 / wordSize; ++ ++// The MIPS LOONGSON Assembler: Pure assembler doing NO optimizations on the instruction ++// level ; i.e., what you write is what you get. The Assembler is generating code into ++// a CodeBuffer. ++ ++class Assembler : public AbstractAssembler { ++ friend class AbstractAssembler; // for the non-virtual hack ++ friend class LIR_Assembler; // as_Address() ++ friend class StubGenerator; ++ ++ public: ++ enum Condition { ++ zero , ++ notZero , ++ equal , ++ notEqual , ++ less , ++ lessEqual , ++ greater , ++ greaterEqual , ++ below , ++ belowEqual , ++ above , ++ aboveEqual ++ }; ++ ++ static const int LogInstructionSize = 2; ++ static const int InstructionSize = 1 << LogInstructionSize; ++ ++ // opcode, highest 6 bits: bits[31...26] ++ enum ops { ++ special_op = 0x00, // special_ops ++ regimm_op = 0x01, // regimm_ops ++ j_op = 0x02, ++ jal_op = 0x03, ++ beq_op = 0x04, ++ bne_op = 0x05, ++ blez_op = 0x06, ++ bgtz_op = 0x07, ++ addiu_op = 0x09, ++ slti_op = 0x0a, ++ sltiu_op = 0x0b, ++ andi_op = 0x0c, ++ ori_op = 0x0d, ++ xori_op = 0x0e, ++ lui_op = 0x0f, ++ cop0_op = 0x10, // cop0_ops ++ cop1_op = 0x11, // cop1_ops ++ gs_cop2_op = 0x12, // gs_cop2_ops ++ cop1x_op = 0x13, // cop1x_ops ++ beql_op = 0x14, ++ bnel_op = 0x15, ++ blezl_op = 0x16, ++ bgtzl_op = 0x17, ++ daddiu_op = 0x19, ++ ldl_op = 0x1a, ++ ldr_op = 0x1b, ++ special2_op = 0x1c, // special2_ops ++ msa_op = 0x1e, // msa_ops ++ special3_op = 0x1f, // special3_ops ++ lb_op = 0x20, ++ lh_op = 0x21, ++ lwl_op = 0x22, ++ lw_op = 0x23, ++ lbu_op = 0x24, ++ lhu_op = 0x25, ++ lwr_op = 0x26, ++ lwu_op = 0x27, ++ sb_op = 0x28, ++ sh_op = 0x29, ++ swl_op = 0x2a, ++ sw_op = 0x2b, ++ sdl_op = 0x2c, ++ sdr_op = 0x2d, ++ swr_op = 0x2e, ++ cache_op = 0x2f, ++ ll_op = 0x30, ++ lwc1_op = 0x31, ++ gs_lwc2_op = 0x32, //gs_lwc2_ops ++ pref_op = 0x33, ++ lld_op = 0x34, ++ ldc1_op = 0x35, ++ gs_ldc2_op = 0x36, //gs_ldc2_ops ++ ld_op = 0x37, ++ sc_op = 0x38, ++ swc1_op = 0x39, ++ gs_swc2_op = 0x3a, //gs_swc2_ops ++ scd_op = 0x3c, ++ sdc1_op = 0x3d, ++ gs_sdc2_op = 0x3e, //gs_sdc2_ops ++ sd_op = 0x3f ++ }; ++ ++ static const char *ops_name[]; ++ ++ //special family, the opcode is in low 6 bits. ++ enum special_ops { ++ sll_op = 0x00, ++ movci_op = 0x01, ++ srl_op = 0x02, ++ sra_op = 0x03, ++ sllv_op = 0x04, ++ srlv_op = 0x06, ++ srav_op = 0x07, ++ jr_op = 0x08, ++ jalr_op = 0x09, ++ movz_op = 0x0a, ++ movn_op = 0x0b, ++ syscall_op = 0x0c, ++ break_op = 0x0d, ++ sync_op = 0x0f, ++ mfhi_op = 0x10, ++ mthi_op = 0x11, ++ mflo_op = 0x12, ++ mtlo_op = 0x13, ++ dsllv_op = 0x14, ++ dsrlv_op = 0x16, ++ dsrav_op = 0x17, ++ mult_op = 0x18, ++ multu_op = 0x19, ++ div_op = 0x1a, ++ divu_op = 0x1b, ++ dmult_op = 0x1c, ++ dmultu_op = 0x1d, ++ ddiv_op = 0x1e, ++ ddivu_op = 0x1f, ++ addu_op = 0x21, ++ subu_op = 0x23, ++ and_op = 0x24, ++ or_op = 0x25, ++ xor_op = 0x26, ++ nor_op = 0x27, ++ slt_op = 0x2a, ++ sltu_op = 0x2b, ++ daddu_op = 0x2d, ++ dsubu_op = 0x2f, ++ tge_op = 0x30, ++ tgeu_op = 0x31, ++ tlt_op = 0x32, ++ tltu_op = 0x33, ++ teq_op = 0x34, ++ tne_op = 0x36, ++ dsll_op = 0x38, ++ dsrl_op = 0x3a, ++ dsra_op = 0x3b, ++ dsll32_op = 0x3c, ++ dsrl32_op = 0x3e, ++ dsra32_op = 0x3f ++ }; ++ ++ static const char* special_name[]; ++ ++ //regimm family, the opcode is in rt[16...20], 5 bits ++ enum regimm_ops { ++ bltz_op = 0x00, ++ bgez_op = 0x01, ++ bltzl_op = 0x02, ++ bgezl_op = 0x03, ++ tgei_op = 0x08, ++ tgeiu_op = 0x09, ++ tlti_op = 0x0a, ++ tltiu_op = 0x0b, ++ teqi_op = 0x0c, ++ tnei_op = 0x0e, ++ bltzal_op = 0x10, ++ bgezal_op = 0x11, ++ bltzall_op = 0x12, ++ bgezall_op = 0x13, ++ bposge32_op = 0x1c, ++ bposge64_op = 0x1d, ++ synci_op = 0x1f, ++ }; ++ ++ static const char* regimm_name[]; ++ ++ //cop0 family, the ops is in bits[25...21], 5 bits ++ enum cop0_ops { ++ mfc0_op = 0x00, ++ dmfc0_op = 0x01, ++ // ++ mxgc0_op = 0x03, //MFGC0, DMFGC0, MTGC0 ++ mtc0_op = 0x04, ++ dmtc0_op = 0x05, ++ rdpgpr_op = 0x0a, ++ inter_op = 0x0b, ++ wrpgpr_op = 0x0c ++ }; ++ ++ //cop1 family, the ops is in bits[25...21], 5 bits ++ enum cop1_ops { ++ mfc1_op = 0x00, ++ dmfc1_op = 0x01, ++ cfc1_op = 0x02, ++ mfhc1_op = 0x03, ++ mtc1_op = 0x04, ++ dmtc1_op = 0x05, ++ ctc1_op = 0x06, ++ mthc1_op = 0x07, ++ bc1f_op = 0x08, ++ single_fmt = 0x10, ++ double_fmt = 0x11, ++ word_fmt = 0x14, ++ long_fmt = 0x15, ++ ps_fmt = 0x16 ++ }; ++ ++ ++ //2 bist (bits[17...16]) of bc1x instructions (cop1) ++ enum bc_ops { ++ bcf_op = 0x0, ++ bct_op = 0x1, ++ bcfl_op = 0x2, ++ bctl_op = 0x3, ++ }; ++ ++ // low 6 bits of c_x_fmt instructions (cop1) ++ enum c_conds { ++ f_cond = 0x30, ++ un_cond = 0x31, ++ eq_cond = 0x32, ++ ueq_cond = 0x33, ++ olt_cond = 0x34, ++ ult_cond = 0x35, ++ ole_cond = 0x36, ++ ule_cond = 0x37, ++ sf_cond = 0x38, ++ ngle_cond = 0x39, ++ seq_cond = 0x3a, ++ ngl_cond = 0x3b, ++ lt_cond = 0x3c, ++ nge_cond = 0x3d, ++ le_cond = 0x3e, ++ ngt_cond = 0x3f ++ }; ++ ++ // low 6 bits of cop1 instructions ++ enum float_ops { ++ fadd_op = 0x00, ++ fsub_op = 0x01, ++ fmul_op = 0x02, ++ fdiv_op = 0x03, ++ fsqrt_op = 0x04, ++ fabs_op = 0x05, ++ fmov_op = 0x06, ++ fneg_op = 0x07, ++ froundl_op = 0x08, ++ ftruncl_op = 0x09, ++ fceill_op = 0x0a, ++ ffloorl_op = 0x0b, ++ froundw_op = 0x0c, ++ ftruncw_op = 0x0d, ++ fceilw_op = 0x0e, ++ ffloorw_op = 0x0f, ++ movf_f_op = 0x11, ++ movt_f_op = 0x11, ++ movz_f_op = 0x12, ++ movn_f_op = 0x13, ++ frecip_op = 0x15, ++ frsqrt_op = 0x16, ++ fcvts_op = 0x20, ++ fcvtd_op = 0x21, ++ fcvtw_op = 0x24, ++ fcvtl_op = 0x25, ++ fcvtps_op = 0x26, ++ fcvtspl_op = 0x28, ++ fpll_op = 0x2c, ++ fplu_op = 0x2d, ++ fpul_op = 0x2e, ++ fpuu_op = 0x2f ++ }; ++ ++ static const char* cop1_name[]; ++ ++ //cop1x family, the opcode is in low 6 bits. ++ enum cop1x_ops { ++ lwxc1_op = 0x00, ++ ldxc1_op = 0x01, ++ luxc1_op = 0x05, ++ swxc1_op = 0x08, ++ sdxc1_op = 0x09, ++ suxc1_op = 0x0d, ++ prefx_op = 0x0f, ++ ++ alnv_ps_op = 0x1e, ++ madd_s_op = 0x20, ++ madd_d_op = 0x21, ++ madd_ps_op = 0x26, ++ msub_s_op = 0x28, ++ msub_d_op = 0x29, ++ msub_ps_op = 0x2e, ++ nmadd_s_op = 0x30, ++ nmadd_d_op = 0x31, ++ nmadd_ps_op = 0x36, ++ nmsub_s_op = 0x38, ++ nmsub_d_op = 0x39, ++ nmsub_ps_op = 0x3e ++ }; ++ ++ static const char* cop1x_name[]; ++ ++ //special2 family, the opcode is in low 6 bits. ++ enum special2_ops { ++ madd_op = 0x00, ++ maddu_op = 0x01, ++ mul_op = 0x02, ++ gs0x03_op = 0x03, ++ msub_op = 0x04, ++ msubu_op = 0x05, ++ gs0x06_op = 0x06, ++ gsemul2_op = 0x07, ++ gsemul3_op = 0x08, ++ gsemul4_op = 0x09, ++ gsemul5_op = 0x0a, ++ gsemul6_op = 0x0b, ++ gsemul7_op = 0x0c, ++ gsemul8_op = 0x0d, ++ gsemul9_op = 0x0e, ++ gsemul10_op = 0x0f, ++ gsmult_op = 0x10, ++ gsdmult_op = 0x11, ++ gsmultu_op = 0x12, ++ gsdmultu_op = 0x13, ++ gsdiv_op = 0x14, ++ gsddiv_op = 0x15, ++ gsdivu_op = 0x16, ++ gsddivu_op = 0x17, ++ gsmod_op = 0x1c, ++ gsdmod_op = 0x1d, ++ gsmodu_op = 0x1e, ++ gsdmodu_op = 0x1f, ++ clz_op = 0x20, ++ clo_op = 0x21, ++ xctx_op = 0x22, //ctz, cto, dctz, dcto, gsX ++ gsrxr_x_op = 0x23, //gsX ++ dclz_op = 0x24, ++ dclo_op = 0x25, ++ gsle_op = 0x26, ++ gsgt_op = 0x27, ++ gs86j_op = 0x28, ++ gsloop_op = 0x29, ++ gsaj_op = 0x2a, ++ gsldpc_op = 0x2b, ++ gs86set_op = 0x30, ++ gstm_op = 0x31, ++ gscvt_ld_op = 0x32, ++ gscvt_ud_op = 0x33, ++ gseflag_op = 0x34, ++ gscam_op = 0x35, ++ gstop_op = 0x36, ++ gssettag_op = 0x37, ++ gssdbbp_op = 0x38 ++ }; ++ ++ static const char* special2_name[]; ++ ++ // special3 family, the opcode is in low 6 bits. ++ enum special3_ops { ++ ext_op = 0x00, ++ dextm_op = 0x01, ++ dextu_op = 0x02, ++ dext_op = 0x03, ++ ins_op = 0x04, ++ dinsm_op = 0x05, ++ dinsu_op = 0x06, ++ dins_op = 0x07, ++ lxx_op = 0x0a, //lwx, lhx, lbux, ldx ++ insv_op = 0x0c, ++ dinsv_op = 0x0d, ++ ar1_op = 0x10, //MIPS DSP ++ cmp1_op = 0x11, //MIPS DSP ++ re1_op = 0x12, //MIPS DSP, re1_ops ++ sh1_op = 0x13, //MIPS DSP ++ ar2_op = 0x14, //MIPS DSP ++ cmp2_op = 0x15, //MIPS DSP ++ re2_op = 0x16, //MIPS DSP, re2_ops ++ sh2_op = 0x17, //MIPS DSP ++ ar3_op = 0x18, //MIPS DSP ++ bshfl_op = 0x20 //seb, seh ++ }; ++ ++ // re1_ops ++ enum re1_ops { ++ absq_s_qb_op = 0x01, ++ repl_qb_op = 0x02, ++ replv_qb_op = 0x03, ++ absq_s_ph_op = 0x09, ++ repl_ph_op = 0x0a, ++ replv_ph_op = 0x0b, ++ absq_s_w_op = 0x11, ++ bitrev_op = 0x1b ++ }; ++ ++ // re2_ops ++ enum re2_ops { ++ repl_ob_op = 0x02, ++ replv_ob_op = 0x03, ++ absq_s_qh_op = 0x09, ++ repl_qh_op = 0x0a, ++ replv_qh_op = 0x0b, ++ absq_s_pw_op = 0x11, ++ repl_pw_op = 0x12, ++ replv_pw_op = 0x13 ++ }; ++ ++ static const char* special3_name[]; ++ ++ // lwc2/gs_lwc2 family, the opcode is in low 6 bits. ++ enum gs_lwc2_ops { ++ gslble_op = 0x10, ++ gslbgt_op = 0x11, ++ gslhle_op = 0x12, ++ gslhgt_op = 0x13, ++ gslwle_op = 0x14, ++ gslwgt_op = 0x15, ++ gsldle_op = 0x16, ++ gsldgt_op = 0x17, ++ gslwlec1_op = 0x1c, ++ gslwgtc1_op = 0x1d, ++ gsldlec1_op = 0x1e, ++ gsldgtc1_op = 0x1f, ++ gslq_op = 0x20 ++ }; ++ ++ static const char* gs_lwc2_name[]; ++ ++ // ldc2/gs_ldc2 family, the opcode is in low 3 bits. ++ enum gs_ldc2_ops { ++ gslbx_op = 0x0, ++ gslhx_op = 0x1, ++ gslwx_op = 0x2, ++ gsldx_op = 0x3, ++ gslwxc1_op = 0x6, ++ gsldxc1_op = 0x7 ++ }; ++ ++ static const char* gs_ldc2_name[]; ++ ++ // swc2/gs_swc2 family, the opcode is in low 6 bits. ++ enum gs_swc2_ops { ++ gssble_op = 0x10, ++ gssbgt_op = 0x11, ++ gsshle_op = 0x12, ++ gsshgt_op = 0x13, ++ gsswle_op = 0x14, ++ gsswgt_op = 0x15, ++ gssdle_op = 0x16, ++ gssdgt_op = 0x17, ++ gsswlec1_op = 0x1c, ++ gsswgtc1_op = 0x1d, ++ gssdlec1_op = 0x1e, ++ gssdgtc1_op = 0x1f, ++ gssq_op = 0x20 ++ }; ++ ++ static const char* gs_swc2_name[]; ++ ++ // sdc2/gs_sdc2 family, the opcode is in low 3 bits. ++ enum gs_sdc2_ops { ++ gssbx_op = 0x0, ++ gsshx_op = 0x1, ++ gsswx_op = 0x2, ++ gssdx_op = 0x3, ++ gsswxc1_op = 0x6, ++ gssdxc1_op = 0x7 ++ }; ++ ++ static const char* gs_sdc2_name[]; ++ ++ enum WhichOperand { ++ // input to locate_operand, and format code for relocations ++ imm_operand = 0, // embedded 32-bit|64-bit immediate operand ++ disp32_operand = 1, // embedded 32-bit displacement or address ++ call32_operand = 2, // embedded 32-bit self-relative displacement ++ narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop ++ _WhichOperand_limit = 4 ++ }; ++ ++ static int opcode(int insn) { return (insn>>26)&0x3f; } ++ static int rs(int insn) { return (insn>>21)&0x1f; } ++ static int rt(int insn) { return (insn>>16)&0x1f; } ++ static int rd(int insn) { return (insn>>11)&0x1f; } ++ static int sa(int insn) { return (insn>>6)&0x1f; } ++ static int special(int insn) { return insn&0x3f; } ++ static int imm_off(int insn) { return (short)low16(insn); } ++ ++ static int low (int x, int l) { return bitfield(x, 0, l); } ++ static int low16(int x) { return low(x, 16); } ++ static int low26(int x) { return low(x, 26); } ++ ++ protected: ++ //help methods for instruction ejection ++ ++ // I-Type (Immediate) ++ // 31 26 25 21 20 16 15 0 ++ //| opcode | rs | rt | immediat | ++ //| | | | | ++ // 6 5 5 16 ++ static int insn_ORRI(int op, int rs, int rt, int imm) { assert(is_simm16(imm), "not a signed 16-bit int"); return (op<<26) | (rs<<21) | (rt<<16) | low16(imm); } ++ ++ // R-Type (Register) ++ // 31 26 25 21 20 16 15 11 10 6 5 0 ++ //| special | rs | rt | rd | 0 | opcode | ++ //| 0 0 0 0 0 0 | | | | 0 0 0 0 0 | | ++ // 6 5 5 5 5 6 ++ static int insn_RRRO(int rs, int rt, int rd, int op) { return (rs<<21) | (rt<<16) | (rd<<11) | op; } ++ static int insn_RRSO(int rt, int rd, int sa, int op) { return (rt<<16) | (rd<<11) | (sa<<6) | op; } ++ static int insn_RRCO(int rs, int rt, int code, int op) { return (rs<<21) | (rt<<16) | (code<<6) | op; } ++ ++ static int insn_COP0(int op, int rt, int rd) { return (cop0_op<<26) | (op<<21) | (rt<<16) | (rd<<11); } ++ static int insn_COP1(int op, int rt, int fs) { return (cop1_op<<26) | (op<<21) | (rt<<16) | (fs<<11); } ++ ++ static int insn_F3RO(int fmt, int ft, int fs, int fd, int func) { ++ return (cop1_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func; ++ } ++ static int insn_F3ROX(int fmt, int ft, int fs, int fd, int func) { ++ return (cop1x_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func; ++ } ++ ++ static int high (int x, int l) { return bitfield(x, 32-l, l); } ++ static int high16(int x) { return high(x, 16); } ++ static int high6 (int x) { return high(x, 6); } ++ ++ //get the offset field of jump/branch instruction ++ int offset(address entry) { ++ assert(is_simm16((entry - pc() - 4) / 4), "change this code"); ++ if (!is_simm16((entry - pc() - 4) / 4)) { ++ tty->print_cr("!!! is_simm16: %lx", (entry - pc() - 4) / 4); ++ } ++ return (entry - pc() - 4) / 4; ++ } ++ ++ ++public: ++ using AbstractAssembler::offset; ++ ++ //sign expand with the sign bit is h ++ static int expand(int x, int h) { return -(x & (1<> 16; ++ } ++ ++ static int split_high(int x) { ++ return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff; ++ } ++ ++ static int merge(int low, int high) { ++ return expand(low, 15) + (high<<16); ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x16, intptr_t x32, intptr_t x48) { ++ return (x48 << 48) | (x32 << 32) | (x16 << 16) | x0; ++ } ++ ++ // Test if x is within signed immediate range for nbits. ++ static bool is_simm (int x, int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int min = -( ((int)1) << nbits-1 ); ++ const int maxplus1 = ( ((int)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ static bool is_simm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong min = -( ((jlong)1) << nbits-1 ); ++ const jlong maxplus1 = ( ((jlong)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ // Test if x is within unsigned immediate range for nbits ++ static bool is_uimm(int x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int maxplus1 = ( ((int)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++ static bool is_uimm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong maxplus1 = ( ((jlong)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++ static bool is_simm16(int x) { return is_simm(x, 16); } ++ static bool is_simm16(long x) { return is_simm((jlong)x, (unsigned int)16); } ++ ++ static bool fit_in_jal(address target, address pc) { ++ intptr_t mask = 0xfffffffff0000000; ++ return ((intptr_t)(pc + 4) & mask) == ((intptr_t)target & mask); ++ } ++ ++ bool fit_int_branch(address entry) { ++ return is_simm16(offset(entry)); ++ } ++ ++protected: ++#ifdef ASSERT ++ #define CHECK_DELAY ++#endif ++#ifdef CHECK_DELAY ++ enum Delay_state { no_delay, at_delay_slot, filling_delay_slot } delay_state; ++#endif ++ ++public: ++ void assert_not_delayed() { ++#ifdef CHECK_DELAY ++ assert(delay_state == no_delay, "next instruction should not be a delay slot"); ++#endif ++ } ++ ++protected: ++ // Delay slot helpers ++ // cti is called when emitting control-transfer instruction, ++ // BEFORE doing the emitting. ++ // Only effective when assertion-checking is enabled. ++ ++ // called when emitting cti with a delay slot, AFTER emitting ++ void has_delay_slot() { ++#ifdef CHECK_DELAY ++ assert(delay_state == no_delay, "just checking"); ++ delay_state = at_delay_slot; ++#endif ++ } ++ ++public: ++ Assembler* delayed() { ++#ifdef CHECK_DELAY ++ guarantee( delay_state == at_delay_slot, "delayed instructition is not in delay slot"); ++ delay_state = filling_delay_slot; ++#endif ++ return this; ++ } ++ ++ void flush() { ++#ifdef CHECK_DELAY ++ guarantee( delay_state == no_delay, "ending code with a delay slot"); ++#endif ++ AbstractAssembler::flush(); ++ } ++ ++ void emit_long(int); // shadows AbstractAssembler::emit_long ++ void emit_data(int); ++ void emit_data(int, RelocationHolder const&); ++ void emit_data(int, relocInfo::relocType rtype); ++ void check_delay(); ++ ++ ++ // Generic instructions ++ // Does 32bit or 64bit as needed for the platform. In some sense these ++ // belong in macro assembler but there is no need for both varieties to exist ++ ++ void addu32(Register rd, Register rs, Register rt){ emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), addu_op)); } ++ void addiu32(Register rt, Register rs, int imm) { emit_long(insn_ORRI(addiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void addiu(Register rt, Register rs, int imm) { daddiu (rt, rs, imm);} ++ void addu(Register rd, Register rs, Register rt) { daddu (rd, rs, rt); } ++ ++ void andr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), and_op)); } ++ void andi(Register rt, Register rs, int imm) { emit_long(insn_ORRI(andi_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } ++ ++ void beq (Register rs, Register rt, int off) { emit_long(insn_ORRI(beq_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ void beql (Register rs, Register rt, int off) { emit_long(insn_ORRI(beql_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ void bgez (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgez_op, off)); has_delay_slot(); } ++ void bgezal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezal_op, off)); has_delay_slot(); } ++ void bgezall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezall_op, off)); has_delay_slot(); } ++ void bgezl (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezl_op, off)); has_delay_slot(); } ++ void bgtz (Register rs, int off) { emit_long(insn_ORRI(bgtz_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void bgtzl (Register rs, int off) { emit_long(insn_ORRI(bgtzl_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void blez (Register rs, int off) { emit_long(insn_ORRI(blez_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void blezl (Register rs, int off) { emit_long(insn_ORRI(blezl_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void bltz (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltz_op, off)); has_delay_slot(); } ++ void bltzal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzal_op, off)); has_delay_slot(); } ++ void bltzall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzall_op, off)); has_delay_slot(); } ++ void bltzl (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzl_op, off)); has_delay_slot(); } ++ void bne (Register rs, Register rt, int off) { emit_long(insn_ORRI(bne_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ void bnel (Register rs, Register rt, int off) { emit_long(insn_ORRI(bnel_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ // two versions of brk: ++ // the brk(code) version is according to MIPS64 Architecture For Programmers Volume II: The MIPS64 Instruction Set ++ // the brk(code1, code2) is according to disassembler of hsdis (binutils-2.27) ++ // both versions work ++ void brk (int code) { assert(is_uimm(code, 20), "code is 20 bits"); emit_long( (low(code, 20)<<6) | break_op ); } ++ void brk (int code1, int code2) { assert(is_uimm(code1, 10) && is_uimm(code2, 10), "code is 20 bits"); emit_long( (low(code1, 10)<<16) | (low(code2, 10)<<6) | break_op ); } ++ ++ void beq (Register rs, Register rt, address entry) { beq(rs, rt, offset(entry)); } ++ void beql (Register rs, Register rt, address entry) { beql(rs, rt, offset(entry));} ++ void bgez (Register rs, address entry) { bgez (rs, offset(entry)); } ++ void bgezal (Register rs, address entry) { bgezal (rs, offset(entry)); } ++ void bgezall(Register rs, address entry) { bgezall(rs, offset(entry)); } ++ void bgezl (Register rs, address entry) { bgezl (rs, offset(entry)); } ++ void bgtz (Register rs, address entry) { bgtz (rs, offset(entry)); } ++ void bgtzl (Register rs, address entry) { bgtzl (rs, offset(entry)); } ++ void blez (Register rs, address entry) { blez (rs, offset(entry)); } ++ void blezl (Register rs, address entry) { blezl (rs, offset(entry)); } ++ void bltz (Register rs, address entry) { bltz (rs, offset(entry)); } ++ void bltzal (Register rs, address entry) { bltzal (rs, offset(entry)); } ++ void bltzall(Register rs, address entry) { bltzall(rs, offset(entry)); } ++ void bltzl (Register rs, address entry) { bltzl (rs, offset(entry)); } ++ void bne (Register rs, Register rt, address entry) { bne(rs, rt, offset(entry)); } ++ void bnel (Register rs, Register rt, address entry) { bnel(rs, rt, offset(entry)); } ++ ++ void beq (Register rs, Register rt, Label& L) { beq(rs, rt, target(L)); } ++ void beql (Register rs, Register rt, Label& L) { beql(rs, rt, target(L)); } ++ void bgez (Register rs, Label& L){ bgez (rs, target(L)); } ++ void bgezal (Register rs, Label& L){ bgezal (rs, target(L)); } ++ void bgezall(Register rs, Label& L){ bgezall(rs, target(L)); } ++ void bgezl (Register rs, Label& L){ bgezl (rs, target(L)); } ++ void bgtz (Register rs, Label& L){ bgtz (rs, target(L)); } ++ void bgtzl (Register rs, Label& L){ bgtzl (rs, target(L)); } ++ void blez (Register rs, Label& L){ blez (rs, target(L)); } ++ void blezl (Register rs, Label& L){ blezl (rs, target(L)); } ++ void bltz (Register rs, Label& L){ bltz (rs, target(L)); } ++ void bltzal (Register rs, Label& L){ bltzal (rs, target(L)); } ++ void bltzall(Register rs, Label& L){ bltzall(rs, target(L)); } ++ void bltzl (Register rs, Label& L){ bltzl (rs, target(L)); } ++ void bne (Register rs, Register rt, Label& L){ bne(rs, rt, target(L)); } ++ void bnel (Register rs, Register rt, Label& L){ bnel(rs, rt, target(L)); } ++ ++ void daddiu(Register rt, Register rs, int imm) { emit_long(insn_ORRI(daddiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void daddu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), daddu_op)); } ++ void ddiv (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddiv_op)); } ++ void ddivu (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddivu_op)); } ++ ++ void movz (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), movz_op)); } ++ void movn (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), movn_op)); } ++ ++ void movt (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | (1 << 16) | ((int)rd->encoding() << 11) | movci_op); } ++ void movf (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | ((int)rd->encoding() << 11) | movci_op); } ++ ++ enum bshfl_ops { ++ seb_op = 0x10, ++ seh_op = 0x18 ++ }; ++ void seb (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seb_op << 6) | bshfl_op); } ++ void seh (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seh_op << 6) | bshfl_op); } ++ ++ void ext (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); ++ guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]"); ++ ++ int lsb = pos; ++ int msbd = size - 1; ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | ext_op); ++ } ++ ++ void dext (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); ++ guarantee((0 < pos + size) && (pos + size <= 63), "pos + size must be in (0, 63]"); ++ ++ int lsb = pos; ++ int msbd = size - 1; ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dext_op); ++ } ++ ++ void dextm (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((32 < size) && (size <= 64), "size must be in (32, 64]"); ++ guarantee((32 < pos + size) && (pos + size <= 64), "pos + size must be in (32, 64]"); ++ ++ int lsb = pos; ++ int msbd = size - 1 - 32; ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dextm_op); ++ } ++ ++ void rotr (Register rd, Register rt, int sa) { ++ emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | srl_op); ++ } ++ ++ void drotr (Register rd, Register rt, int sa) { ++ emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl_op); ++ } ++ ++ void drotr32 (Register rd, Register rt, int sa) { ++ emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl32_op); ++ } ++ ++ void rotrv (Register rd, Register rt, Register rs) { ++ emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | srlv_op); ++ } ++ ++ void drotrv (Register rd, Register rt, Register rs) { ++ emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | dsrlv_op); ++ } ++ ++ void div (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, div_op)); } ++ void divu (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, divu_op)); } ++ void dmult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmult_op)); } ++ void dmultu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmultu_op)); } ++ void dsll (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll_op)); } ++ void dsllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsllv_op)); } ++ void dsll32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll32_op)); } ++ void dsra (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra_op)); } ++ void dsrav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrav_op)); } ++ void dsra32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra32_op)); } ++ void dsrl (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl_op)); } ++ void dsrlv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrlv_op)); } ++ void dsrl32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl32_op)); } ++ void dsubu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsubu_op)); } ++ ++ void b(int off) { beq(R0, R0, off); } ++ void b(address entry) { b(offset(entry)); } ++ void b(Label& L) { b(target(L)); } ++ ++ void j(address entry); ++ void jal(address entry); ++ ++ void jalr(Register rd, Register rs) { emit_long( ((int)rs->encoding()<<21) | ((int)rd->encoding()<<11) | jalr_op); has_delay_slot(); } ++ void jalr(Register rs) { jalr(RA, rs); } ++ void jalr() { jalr(RT9); } ++ ++ void jr(Register rs) { emit_long(((int)rs->encoding()<<21) | jr_op); has_delay_slot(); } ++ void jr_hb(Register rs) { emit_long(((int)rs->encoding()<<21) | (1 << 10) | jr_op); has_delay_slot(); } ++ ++ void lb (Register rt, Register base, int off) { emit_long(insn_ORRI(lb_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lbu(Register rt, Register base, int off) { emit_long(insn_ORRI(lbu_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ld (Register rt, Register base, int off) { emit_long(insn_ORRI(ld_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ldl(Register rt, Register base, int off) { emit_long(insn_ORRI(ldl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ldr(Register rt, Register base, int off) { emit_long(insn_ORRI(ldr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lh (Register rt, Register base, int off) { emit_long(insn_ORRI(lh_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lhu(Register rt, Register base, int off) { emit_long(insn_ORRI(lhu_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ll (Register rt, Register base, int off) { emit_long(insn_ORRI(ll_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lld(Register rt, Register base, int off) { emit_long(insn_ORRI(lld_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lui(Register rt, int imm) { emit_long(insn_ORRI(lui_op, 0, (int)rt->encoding(), simm16(imm))); } ++ void lw (Register rt, Register base, int off) { emit_long(insn_ORRI(lw_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lwl(Register rt, Register base, int off) { emit_long(insn_ORRI(lwl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lwr(Register rt, Register base, int off) { emit_long(insn_ORRI(lwr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lwu(Register rt, Register base, int off) { emit_long(insn_ORRI(lwu_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ ++ void lb (Register rt, Address src); ++ void lbu(Register rt, Address src); ++ void ld (Register rt, Address src); ++ void ldl(Register rt, Address src); ++ void ldr(Register rt, Address src); ++ void lh (Register rt, Address src); ++ void lhu(Register rt, Address src); ++ void ll (Register rt, Address src); ++ void lld(Register rt, Address src); ++ void lw (Register rt, Address src); ++ void lwl(Register rt, Address src); ++ void lwr(Register rt, Address src); ++ void lwu(Register rt, Address src); ++ void lea(Register rt, Address src); ++ void pref(int hint, Register base, int off) { emit_long(insn_ORRI(pref_op, (int)base->encoding(), low(hint, 5), low(off, 16))); } ++ ++ void mfhi (Register rd) { emit_long( ((int)rd->encoding()<<11) | mfhi_op ); } ++ void mflo (Register rd) { emit_long( ((int)rd->encoding()<<11) | mflo_op ); } ++ void mthi (Register rs) { emit_long( ((int)rs->encoding()<<21) | mthi_op ); } ++ void mtlo (Register rs) { emit_long( ((int)rs->encoding()<<21) | mtlo_op ); } ++ ++ void mult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, mult_op)); } ++ void multu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, multu_op)); } ++ ++ void nor(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), nor_op)); } ++ ++ void orr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), or_op)); } ++ void ori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(ori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } ++ ++ void sb (Register rt, Register base, int off) { emit_long(insn_ORRI(sb_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sc (Register rt, Register base, int off) { emit_long(insn_ORRI(sc_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void scd (Register rt, Register base, int off) { emit_long(insn_ORRI(scd_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sd (Register rt, Register base, int off) { emit_long(insn_ORRI(sd_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sdl (Register rt, Register base, int off) { emit_long(insn_ORRI(sdl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sdr (Register rt, Register base, int off) { emit_long(insn_ORRI(sdr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sh (Register rt, Register base, int off) { emit_long(insn_ORRI(sh_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sll (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), sll_op)); } ++ void sllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), sllv_op)); } ++ void slt (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), slt_op)); } ++ void slti (Register rt, Register rs, int imm) { emit_long(insn_ORRI(slti_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void sltiu(Register rt, Register rs, int imm) { emit_long(insn_ORRI(sltiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void sltu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), sltu_op)); } ++ void sra (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), sra_op)); } ++ void srav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), srav_op)); } ++ void srl (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), srl_op)); } ++ void srlv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), srlv_op)); } ++ ++ void subu (Register rd, Register rs, Register rt) { dsubu (rd, rs, rt); } ++ void subu32 (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), subu_op)); } ++ void sw (Register rt, Register base, int off) { emit_long(insn_ORRI(sw_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void swl (Register rt, Register base, int off) { emit_long(insn_ORRI(swl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void swr (Register rt, Register base, int off) { emit_long(insn_ORRI(swr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void synci(Register base, int off) { emit_long(insn_ORRI(regimm_op, (int)base->encoding(), synci_op, off)); } ++ void sync () { ++ if (os::is_ActiveCoresMP()) ++ emit_long(0); ++ else ++ emit_long(sync_op); ++ } ++ void syscall(int code) { emit_long( (code<<6) | syscall_op ); } ++ ++ void sb(Register rt, Address dst); ++ void sc(Register rt, Address dst); ++ void scd(Register rt, Address dst); ++ void sd(Register rt, Address dst); ++ void sdl(Register rt, Address dst); ++ void sdr(Register rt, Address dst); ++ void sh(Register rt, Address dst); ++ void sw(Register rt, Address dst); ++ void swl(Register rt, Address dst); ++ void swr(Register rt, Address dst); ++ ++ void teq (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, teq_op)); } ++ void teqi (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), teqi_op, imm)); } ++ void tge (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tge_op)); } ++ void tgei (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgei_op, imm)); } ++ void tgeiu(Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgeiu_op, imm)); } ++ void tgeu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tgeu_op)); } ++ void tlt (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tlt_op)); } ++ void tlti (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tlti_op, imm)); } ++ void tltiu(Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tltiu_op, imm)); } ++ void tltu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tltu_op)); } ++ void tne (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tne_op)); } ++ void tnei (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tnei_op, imm)); } ++ ++ void xorr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), xor_op)); } ++ void xori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(xori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } ++ ++ void nop() { emit_long(0); } ++ ++ ++ ++ void ldc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(ldc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void lwc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(lwc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void ldc1(FloatRegister ft, Address src); ++ void lwc1(FloatRegister ft, Address src); ++ ++ //COP0 ++ void mfc0 (Register rt, Register rd) { emit_long(insn_COP0( mfc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ void dmfc0 (Register rt, FloatRegister rd) { emit_long(insn_COP0(dmfc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ // MFGC0, DMFGC0, MTGC0, DMTGC0 not implemented yet ++ void mtc0 (Register rt, Register rd) { emit_long(insn_COP0( mtc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ void dmtc0 (Register rt, FloatRegister rd) { emit_long(insn_COP0(dmtc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ //COP0 end ++ ++ ++ //COP1 ++ void mfc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1 (mfc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void dmfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmfc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void cfc1 (Register rt, int fs) { emit_long(insn_COP1( cfc1_op, (int)rt->encoding(), fs)); } ++ void mfhc1(Register rt, int fs) { emit_long(insn_COP1(mfhc1_op, (int)rt->encoding(), fs)); } ++ void mtc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( mtc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void dmtc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmtc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void ctc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( ctc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void ctc1 (Register rt, int fs) { emit_long(insn_COP1(ctc1_op, (int)rt->encoding(), fs)); } ++ void mthc1(Register rt, int fs) { emit_long(insn_COP1(mthc1_op, (int)rt->encoding(), fs)); } ++ ++ void bc1f (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcf_op, off)); has_delay_slot(); } ++ void bc1fl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcfl_op, off)); has_delay_slot(); } ++ void bc1t (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bct_op, off)); has_delay_slot(); } ++ void bc1tl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bctl_op, off)); has_delay_slot(); } ++ ++ void bc1f (address entry) { bc1f(offset(entry)); } ++ void bc1fl(address entry) { bc1fl(offset(entry)); } ++ void bc1t (address entry) { bc1t(offset(entry)); } ++ void bc1tl(address entry) { bc1tl(offset(entry)); } ++ ++ void bc1f (Label& L) { bc1f(target(L)); } ++ void bc1fl(Label& L) { bc1fl(target(L)); } ++ void bc1t (Label& L) { bc1t(target(L)); } ++ void bc1tl(Label& L) { bc1tl(target(L)); } ++ ++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags. ++#define INSN_SINGLE(r1, r2, r3, op) \ ++ { emit_long(insn_F3RO(single_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ void add_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fadd_op)} ++ void sub_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fsub_op)} ++ void mul_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fmul_op)} ++ void div_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fdiv_op)} ++ void sqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fsqrt_op)} ++ void abs_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fabs_op)} ++ void mov_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fmov_op)} ++ void neg_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fneg_op)} ++ void round_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundl_op)} ++ void trunc_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncl_op)} ++ void ceil_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceill_op)} ++ void floor_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorl_op)} ++ void round_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundw_op)} ++ void trunc_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncw_op)} ++ void ceil_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceilw_op)} ++ void floor_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorw_op)} ++ //null ++ void movf_s(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movt_s(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movz_s (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movz_f_op)} ++ void movn_s (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movn_f_op)} ++ //null ++ void recip_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frecip_op)} ++ void rsqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frsqrt_op)} ++ //null ++ void cvt_d_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtd_op)} ++ //null ++ void cvt_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtw_op)} ++ void cvt_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtl_op)} ++ void cvt_ps_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fcvtps_op)} ++ //null ++ void c_f_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, f_cond)} ++ void c_un_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, un_cond)} ++ void c_eq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, eq_cond)} ++ void c_ueq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ueq_cond)} ++ void c_olt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, olt_cond)} ++ void c_ult_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ult_cond)} ++ void c_ole_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ole_cond)} ++ void c_ule_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ule_cond)} ++ void c_sf_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, sf_cond)} ++ void c_ngle_s(FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngle_cond)} ++ void c_seq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, seq_cond)} ++ void c_ngl_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngl_cond)} ++ void c_lt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, lt_cond)} ++ void c_nge_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, nge_cond)} ++ void c_le_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, le_cond)} ++ void c_ngt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngt_cond)} ++ ++#undef INSN_SINGLE ++ ++ ++//R0->encoding() is 0; INSN_DOUBLE is enclosed by {} for ctags. ++#define INSN_DOUBLE(r1, r2, r3, op) \ ++ { emit_long(insn_F3RO(double_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ ++ void add_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fadd_op)} ++ void sub_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fsub_op)} ++ void mul_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fmul_op)} ++ void div_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fdiv_op)} ++ void sqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fsqrt_op)} ++ void abs_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fabs_op)} ++ void mov_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fmov_op)} ++ void neg_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fneg_op)} ++ void round_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundl_op)} ++ void trunc_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncl_op)} ++ void ceil_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceill_op)} ++ void floor_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorl_op)} ++ void round_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundw_op)} ++ void trunc_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncw_op)} ++ void ceil_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceilw_op)} ++ void floor_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorw_op)} ++ //null ++ void movf_d(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movt_d(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movz_d (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movz_f_op)} ++ void movn_d (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movn_f_op)} ++ //null ++ void recip_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frecip_op)} ++ void rsqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frsqrt_op)} ++ //null ++ void cvt_s_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvts_op)} ++ void cvt_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtl_op)} ++ //null ++ void cvt_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtw_op)} ++ //null ++ void c_f_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, f_cond)} ++ void c_un_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, un_cond)} ++ void c_eq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, eq_cond)} ++ void c_ueq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ueq_cond)} ++ void c_olt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, olt_cond)} ++ void c_ult_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ult_cond)} ++ void c_ole_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ole_cond)} ++ void c_ule_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ule_cond)} ++ void c_sf_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, sf_cond)} ++ void c_ngle_d(FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngle_cond)} ++ void c_seq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, seq_cond)} ++ void c_ngl_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngl_cond)} ++ void c_lt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, lt_cond)} ++ void c_nge_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, nge_cond)} ++ void c_le_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, le_cond)} ++ void c_ngt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngt_cond)} ++ ++#undef INSN_DOUBLE ++ ++ ++ //null ++ void cvt_s_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); } ++ void cvt_d_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); } ++ //null ++ void cvt_s_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); } ++ void cvt_d_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); } ++ //null ++ ++ ++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags. ++#define INSN_PS(r1, r2, r3, op) \ ++ { emit_long(insn_F3RO(ps_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ ++ void add_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fadd_op)} ++ void sub_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fsub_op)} ++ void mul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fmul_op)} ++ //null ++ void abs_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fabs_op)} ++ void mov_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fmov_op)} ++ void neg_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fneg_op)} ++ //null ++ //void movf_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movf_ps")} ++ //void movt_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movt_ps") } ++ void movz_ps (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movz_f_op)} ++ void movn_ps (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movn_f_op)} ++ //null ++ void cvt_s_pu (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvts_op)} ++ //null ++ void cvt_s_pl (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvtspl_op)} ++ //null ++ void pll_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpll_op)} ++ void plu_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fplu_op)} ++ void pul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpul_op)} ++ void puu_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpuu_op)} ++ void c_f_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, f_cond)} ++ void c_un_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, un_cond)} ++ void c_eq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, eq_cond)} ++ void c_ueq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ueq_cond)} ++ void c_olt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, olt_cond)} ++ void c_ult_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ult_cond)} ++ void c_ole_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ole_cond)} ++ void c_ule_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ule_cond)} ++ void c_sf_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, sf_cond)} ++ void c_ngle_ps(FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngle_cond)} ++ void c_seq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, seq_cond)} ++ void c_ngl_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngl_cond)} ++ void c_lt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, lt_cond)} ++ void c_nge_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, nge_cond)} ++ void c_le_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, le_cond)} ++ void c_ngt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngt_cond)} ++ //null ++#undef INSN_PS ++ //COP1 end ++ ++ ++ //COP1X ++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags. ++#define INSN_COP1X(r0, r1, r2, r3, op) \ ++ { emit_long(insn_F3ROX((int)r0->encoding(), (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ void madd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_s_op) } ++ void madd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_d_op) } ++ void madd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, madd_ps_op) } ++ void msub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_s_op) } ++ void msub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_d_op) } ++ void msub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, msub_ps_op) } ++ void nmadd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_s_op) } ++ void nmadd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_d_op) } ++ void nmadd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmadd_ps_op) } ++ void nmsub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_s_op) } ++ void nmsub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_d_op) } ++ void nmsub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmsub_ps_op) } ++#undef INSN_COP1X ++ //COP1X end ++ ++ //SPECIAL2 ++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags. ++#define INSN_S2(op) \ ++ { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | op);} ++ ++ void madd (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | madd_op); } ++ void maddu (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | maddu_op); } ++ void mul (Register rd, Register rs, Register rt) { INSN_S2(mul_op) } ++ void gsandn (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x03_op) } ++ void msub (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msub_op); } ++ void msubu (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msubu_op); } ++ void gsorn (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x06_op) } ++ ++ void gsmult (Register rd, Register rs, Register rt) { INSN_S2(gsmult_op) } ++ void gsdmult (Register rd, Register rs, Register rt) { INSN_S2(gsdmult_op) } ++ void gsmultu (Register rd, Register rs, Register rt) { INSN_S2(gsmultu_op) } ++ void gsdmultu(Register rd, Register rs, Register rt) { INSN_S2(gsdmultu_op)} ++ void gsdiv (Register rd, Register rs, Register rt) { INSN_S2(gsdiv_op) } ++ void gsddiv (Register rd, Register rs, Register rt) { INSN_S2(gsddiv_op) } ++ void gsdivu (Register rd, Register rs, Register rt) { INSN_S2(gsdivu_op) } ++ void gsddivu (Register rd, Register rs, Register rt) { INSN_S2(gsddivu_op) } ++ void gsmod (Register rd, Register rs, Register rt) { INSN_S2(gsmod_op) } ++ void gsdmod (Register rd, Register rs, Register rt) { INSN_S2(gsdmod_op) } ++ void gsmodu (Register rd, Register rs, Register rt) { INSN_S2(gsmodu_op) } ++ void gsdmodu (Register rd, Register rs, Register rt) { INSN_S2(gsdmodu_op) } ++ void clz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clz_op); } ++ void clo (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clo_op); } ++ void ctz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 0 << 6| xctx_op); } ++ void cto (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 1 << 6| xctx_op); } ++ void dctz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 2 << 6| xctx_op); } ++ void dcto(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 3 << 6| xctx_op); } ++ void dclz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclz_op); } ++ void dclo(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclo_op); } ++ ++#undef INSN_S2 ++ ++ //SPECIAL3 ++/* ++// FIXME ++#define is_0_to_32(a, b) \ ++ assert (a >= 0, " just a check"); \ ++ assert (a <= 0, " just a check"); \ ++ assert (b >= 0, " just a check"); \ ++ assert (b <= 0, " just a check"); \ ++ assert (a+b >= 0, " just a check"); \ ++ assert (a+b <= 0, " just a check"); ++ */ ++#define is_0_to_32(a, b) ++ ++ void ins (Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | ins_op); } ++ void dinsm(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos, 5) << 6) | dinsm_op); } ++ void dinsu(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos-32, 5) << 6) | dinsu_op); } ++ void dins (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); ++ guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]"); ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | dins_op); ++ } ++ ++ void repl_qb (Register rd, int const8) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16) | ((int)rd->encoding() << 11) | repl_qb_op << 6 | re1_op); } ++ void replv_qb(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qb_op << 6 | re1_op ); } ++ void repl_ph (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_ph_op << 6 | re1_op); } ++ void replv_ph(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ph_op << 6 | re1_op ); } ++ ++ void repl_ob (Register rd, int const8) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16) | ((int)rd->encoding() << 11) | repl_ob_op << 6 | re2_op); } ++ void replv_ob(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ob_op << 6 | re2_op ); } ++ void repl_qh (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_qh_op << 6 | re2_op); } ++ void replv_qh(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qh_op << 6 | re2_op ); } ++ void repl_pw (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_pw_op << 6 | re2_op); } ++ void replv_pw(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_pw_op << 6 | re2_op ); } ++ ++ void sdc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(sdc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void sdc1(FloatRegister ft, Address dst); ++ void swc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(swc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void swc1(FloatRegister ft, Address dst); ++ ++ ++ static void print_instruction(int); ++ int patched_branch(int dest_pos, int inst, int inst_pos); ++ int branch_destination(int inst, int pos); ++ ++ // Loongson extension ++ ++ // gssq/gslq/gssqc1/gslqc1: vAddr = sign_extend(offset << 4 ) + GPR[base]. Therefore, the off should be ">> 4". ++ void gslble(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslble_op); ++ } ++ ++ void gslbgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslbgt_op); ++ } ++ ++ void gslhle(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhle_op); ++ } ++ ++ void gslhgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhgt_op); ++ } ++ ++ void gslwle(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwle_op); ++ } ++ ++ void gslwgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgt_op); ++ } ++ ++ void gsldle(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldle_op); ++ } ++ ++ void gsldgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgt_op); ++ } ++ ++ void gslwlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwlec1_op); ++ } ++ ++ void gslwgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgtc1_op); ++ } ++ ++ void gsldlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldlec1_op); ++ } ++ ++ void gsldgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgtc1_op); ++ } ++ ++ void gslq(Register rq, Register rt, Register base, int off) { ++ assert(!(off & 0xF), "gslq: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gslq: off exceeds 9 bits"); ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() ); ++ } ++ ++ void gslqc1(FloatRegister rq, FloatRegister rt, Register base, int off) { ++ assert(!(off & 0xF), "gslqc1: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gslqc1: off exceeds 9 bits"); ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() ); ++ } ++ ++ void gssble(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssble_op); ++ } ++ ++ void gssbgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssbgt_op); ++ } ++ ++ void gsshle(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshle_op); ++ } ++ ++ void gsshgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshgt_op); ++ } ++ ++ void gsswle(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswle_op); ++ } ++ ++ void gsswgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgt_op); ++ } ++ ++ void gssdle(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdle_op); ++ } ++ ++ void gssdgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgt_op); ++ } ++ ++ void gsswlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswlec1_op); ++ } ++ ++ void gsswgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgtc1_op); ++ } ++ ++ void gssdlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdlec1_op); ++ } ++ ++ void gssdgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgtc1_op); ++ } ++ ++ void gssq(Register rq, Register rt, Register base, int off) { ++ assert(!(off & 0xF), "gssq: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gssq: off exceeds 9 bits"); ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() ); ++ } ++ ++ void gssqc1(FloatRegister rq, FloatRegister rt, Register base, int off) { ++ assert(!(off & 0xF), "gssqc1: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gssqc1: off exceeds 9 bits"); ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() ); ++ } ++ ++ //LDC2 & SDC2 ++#define INSN(OPS, OP) \ ++ assert(is_simm(off, 8), "NAME: off exceeds 8 bits"); \ ++ assert(UseLEXT1, "check UseLEXT1"); \ ++ emit_long( (OPS << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | \ ++ ((int)index->encoding() << 11) | (low(off, 8) << 3) | OP); ++ ++#define INSN_LDC2(NAME, op) \ ++ void NAME(Register rt, Register base, Register index, int off) { \ ++ INSN(gs_ldc2_op, op) \ ++ } ++ ++#define INSN_LDC2_F(NAME, op) \ ++ void NAME(FloatRegister rt, Register base, Register index, int off) { \ ++ INSN(gs_ldc2_op, op) \ ++ } ++ ++#define INSN_SDC2(NAME, op) \ ++ void NAME(Register rt, Register base, Register index, int off) { \ ++ INSN(gs_sdc2_op, op) \ ++ } ++ ++#define INSN_SDC2_F(NAME, op) \ ++ void NAME(FloatRegister rt, Register base, Register index, int off) { \ ++ INSN(gs_sdc2_op, op) \ ++ } ++ ++/* ++ void gslbx(Register rt, Register base, Register index, int off) { ++ assert(is_simm(off, 8), "gslbx: off exceeds 8 bits"); ++ assert(UseLEXT1, "check UseLEXT1"); ++ emit_long( (gs_ldc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ++ ((int)index->encoding() << 11) | (low(off, 8) << 3) | gslbx_op); ++ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op);} ++ ++ INSN_LDC2(gslbx, gslbx_op) ++ INSN_LDC2(gslhx, gslhx_op) ++ INSN_LDC2(gslwx, gslwx_op) ++ INSN_LDC2(gsldx, gsldx_op) ++ INSN_LDC2_F(gslwxc1, gslwxc1_op) ++ INSN_LDC2_F(gsldxc1, gsldxc1_op) ++ ++ INSN_SDC2(gssbx, gssbx_op) ++ INSN_SDC2(gsshx, gsshx_op) ++ INSN_SDC2(gsswx, gsswx_op) ++ INSN_SDC2(gssdx, gssdx_op) ++ INSN_SDC2_F(gsswxc1, gsswxc1_op) ++ INSN_SDC2_F(gssdxc1, gssdxc1_op) ++*/ ++ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op) } ++ void gslhx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslhx_op) } ++ void gslwx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwx_op) } ++ void gsldx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldx_op) } ++ void gslwxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwxc1_op) } ++ void gsldxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldxc1_op) } ++ ++ void gssbx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssbx_op) } ++ void gsshx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsshx_op) } ++ void gsswx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswx_op) } ++ void gssdx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdx_op) } ++ void gsswxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswxc1_op) } ++ void gssdxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdxc1_op) } ++ ++#undef INSN ++#undef INSN_LDC2 ++#undef INSN_LDC2_F ++#undef INSN_SDC2 ++#undef INSN_SDC2_F ++ ++ // cpucfg on Loongson CPUs above 3A4000 ++ void cpucfg(Register rd, Register rs) { emit_long((gs_lwc2_op << 26) | ((int)rs->encoding() << 21) | (0b01000 << 16) | ((int)rd->encoding() << 11) | ( 0b00100 << 6) | 0b011000);} ++ ++ enum Membar_mask_bits { ++ StoreStore = 1 << 3, ++ LoadStore = 1 << 2, ++ StoreLoad = 1 << 1, ++ LoadLoad = 1 << 0 ++ }; ++ ++ // Serializes memory and blows flags ++ void membar(Membar_mask_bits order_constraint) { ++ sync(); ++ } ++ ++public: ++ // Creation ++ Assembler(CodeBuffer* code) : AbstractAssembler(code) { ++#ifdef CHECK_DELAY ++ delay_state = no_delay; ++#endif ++ } ++ ++ // Decoding ++ static address locate_operand(address inst, WhichOperand which); ++ static address locate_next_instruction(address inst); ++}; ++ ++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/assembler_mips.inline.hpp b/src/hotspot/cpu/mips/assembler_mips.inline.hpp +--- a/src/hotspot/cpu/mips/assembler_mips.inline.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/assembler_mips.inline.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/bytes_mips.hpp b/src/hotspot/cpu/mips/bytes_mips.hpp +--- a/src/hotspot/cpu/mips/bytes_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/bytes_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,181 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_BYTES_MIPS_HPP ++#define CPU_MIPS_VM_BYTES_MIPS_HPP ++ ++#include "memory/allocation.hpp" ++ ++class Bytes: AllStatic { ++ public: ++ // Returns true if the byte ordering used by Java is different from the native byte ordering ++ // of the underlying machine. For example, this is true for Intel x86, but false for Solaris ++ // on Sparc. ++ // we use mipsel, so return true ++ static inline bool is_Java_byte_ordering_different(){ return true; } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering ++ // (no special code is needed since x86 CPUs can access unaligned data) ++ static inline u2 get_native_u2(address p) { ++ if ((intptr_t)p & 0x1) { ++ return ((u2)p[1] << 8) | (u2)p[0]; ++ } else { ++ return *(u2*)p; ++ } ++ } ++ ++ static inline u4 get_native_u4(address p) { ++ if ((intptr_t)p & 3) { ++ u4 res; ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips64\n" ++ " .set noreorder\n" ++ ++ " lwr %[res], 0(%[addr]) \n" ++ " lwl %[res], 3(%[addr]) \n" ++ ++ " .set pop" ++ : [res] "=&r" (res) ++ : [addr] "r" (p) ++ : "memory" ++ ); ++ return res; ++ } else { ++ return *(u4*)p; ++ } ++ } ++ ++ static inline u8 get_native_u8(address p) { ++ u8 res; ++ u8 temp = 0; ++ // u4 tp;//tmp register ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips64\n" ++ " .set noreorder\n" ++ " .set noat\n" ++ " andi $1,%[addr],0x7 \n" ++ " beqz $1,1f \n" ++ " nop \n" ++ " ldr %[temp], 0(%[addr]) \n" ++ " ldl %[temp], 7(%[addr]) \n" ++ " b 2f \n" ++ " nop \n" ++ " 1:\t ld %[temp],0(%[addr]) \n" ++ " 2:\t sd %[temp], %[res] \n" ++ ++ " .set at\n" ++ " .set pop\n" ++ : [addr]"=r"(p), [temp]"=r" (temp) ++ : "[addr]"(p), "[temp]" (temp), [res]"m" (*(volatile jint*)&res) ++ : "memory" ++ ); ++ ++ return res; ++ } ++ ++ //use mips unaligned load instructions ++ static inline void put_native_u2(address p, u2 x) { ++ if((intptr_t)p & 0x1) { ++ p[0] = (u_char)(x); ++ p[1] = (u_char)(x>>8); ++ } else { ++ *(u2*)p = x; ++ } ++ } ++ ++ static inline void put_native_u4(address p, u4 x) { ++ // refer to sparc implementation. ++ // Note that sparc is big-endian, while mips is little-endian ++ switch ( intptr_t(p) & 3 ) { ++ case 0: *(u4*)p = x; ++ break; ++ ++ case 2: ((u2*)p)[1] = x >> 16; ++ ((u2*)p)[0] = x; ++ break; ++ ++ default: ((u1*)p)[3] = x >> 24; ++ ((u1*)p)[2] = x >> 16; ++ ((u1*)p)[1] = x >> 8; ++ ((u1*)p)[0] = x; ++ break; ++ } ++ } ++ ++ static inline void put_native_u8(address p, u8 x) { ++ // refer to sparc implementation. ++ // Note that sparc is big-endian, while mips is little-endian ++ switch ( intptr_t(p) & 7 ) { ++ case 0: *(u8*)p = x; ++ break; ++ ++ case 4: ((u4*)p)[1] = x >> 32; ++ ((u4*)p)[0] = x; ++ break; ++ ++ case 2: ((u2*)p)[3] = x >> 48; ++ ((u2*)p)[2] = x >> 32; ++ ((u2*)p)[1] = x >> 16; ++ ((u2*)p)[0] = x; ++ break; ++ ++ default: ((u1*)p)[7] = x >> 56; ++ ((u1*)p)[6] = x >> 48; ++ ((u1*)p)[5] = x >> 40; ++ ((u1*)p)[4] = x >> 32; ++ ((u1*)p)[3] = x >> 24; ++ ((u1*)p)[2] = x >> 16; ++ ((u1*)p)[1] = x >> 8; ++ ((u1*)p)[0] = x; ++ } ++ } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in Java ++ // byte ordering (i.e. big-endian ordering). Byte-order reversal is ++ // needed since MIPS64EL CPUs use little-endian format. ++ static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } ++ static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } ++ static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } ++ ++ static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } ++ static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } ++ static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } ++ ++ ++ // Efficient swapping of byte ordering ++ static inline u2 swap_u2(u2 x); // compiler-dependent implementation ++ static inline u4 swap_u4(u4 x); // compiler-dependent implementation ++ static inline u8 swap_u8(u8 x); ++}; ++ ++ ++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] ++#include OS_CPU_HEADER_INLINE(bytes) ++ ++#endif // CPU_MIPS_VM_BYTES_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/c2_globals_mips.hpp b/src/hotspot/cpu/mips/c2_globals_mips.hpp +--- a/src/hotspot/cpu/mips/c2_globals_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/c2_globals_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,95 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP ++#define CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the server compiler. ++// (see c2_globals.hpp). Alpha-sorted. ++define_pd_global(bool, BackgroundCompilation, true); ++define_pd_global(bool, UseTLAB, true); ++define_pd_global(bool, ResizeTLAB, true); ++define_pd_global(bool, CICompileOSR, true); ++define_pd_global(bool, InlineIntrinsics, true); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, true); ++define_pd_global(bool, UseOnStackReplacement, true); ++#ifdef CC_INTERP ++define_pd_global(bool, ProfileInterpreter, false); ++#else ++define_pd_global(bool, ProfileInterpreter, true); ++#endif // CC_INTERP ++// Disable C1 in server JIT ++define_pd_global(bool, TieredCompilation, false); ++define_pd_global(intx, CompileThreshold, 10000); ++define_pd_global(intx, BackEdgeThreshold, 100000); ++ ++define_pd_global(intx, OnStackReplacePercentage, 140); ++define_pd_global(intx, ConditionalMoveLimit, 3); ++define_pd_global(intx, FLOATPRESSURE, 6); ++define_pd_global(intx, FreqInlineSize, 325); ++define_pd_global(intx, MinJumpTableSize, 10); ++define_pd_global(intx, INTPRESSURE, 13); ++define_pd_global(intx, InteriorEntryAlignment, 16); ++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); ++define_pd_global(intx, LoopUnrollLimit, 60); ++define_pd_global(intx, LoopPercentProfileLimit, 10); ++// InitialCodeCacheSize derived from specjbb2000 run. ++define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize ++define_pd_global(intx, CodeCacheExpansionSize, 64*K); ++ ++// Ergonomics related flags ++define_pd_global(uint64_t,MaxRAM, 128ULL*G); ++define_pd_global(intx, RegisterCostAreaRatio, 16000); ++ ++// Peephole and CISC spilling both break the graph, and so makes the ++// scheduler sick. ++define_pd_global(bool, OptoPeephole, false); ++define_pd_global(bool, UseCISCSpill, false); ++define_pd_global(bool, OptoScheduling, false); ++define_pd_global(bool, OptoBundling, false); ++define_pd_global(bool, OptoRegScheduling, false); ++define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); ++define_pd_global(bool, IdealizeClearArrayNode, true); ++ ++define_pd_global(intx, ReservedCodeCacheSize, 120*M); ++define_pd_global(intx, NonProfiledCodeHeapSize, 57*M); ++define_pd_global(intx, ProfiledCodeHeapSize, 58*M); ++define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); ++define_pd_global(uintx, CodeCacheMinBlockLength, 4); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++ ++define_pd_global(bool, TrapBasedRangeChecks, false); ++ ++// Heap related flags ++define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); ++ ++// Ergonomics related flags ++define_pd_global(bool, NeverActAsServerClassMachine, false); ++ ++#endif // CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/c2_init_mips.cpp b/src/hotspot/cpu/mips/c2_init_mips.cpp +--- a/src/hotspot/cpu/mips/c2_init_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/c2_init_mips.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "opto/compile.hpp" ++#include "opto/node.hpp" ++ ++// processor dependent initialization for mips ++ ++void Compile::pd_compiler2_init() { ++ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/codeBuffer_mips.hpp b/src/hotspot/cpu/mips/codeBuffer_mips.hpp +--- a/src/hotspot/cpu/mips/codeBuffer_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/codeBuffer_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_CODEBUFFER_MIPS_HPP ++#define CPU_MIPS_VM_CODEBUFFER_MIPS_HPP ++ ++private: ++ void pd_initialize() {} ++ ++public: ++ void flush_bundle(bool start_new_bundle) {} ++ ++#endif // CPU_MIPS_VM_CODEBUFFER_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/compiledIC_mips.cpp b/src/hotspot/cpu/mips/compiledIC_mips.cpp +--- a/src/hotspot/cpu/mips/compiledIC_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/compiledIC_mips.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,151 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/compiledIC.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nmethod.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/safepoint.hpp" ++ ++// ---------------------------------------------------------------------------- ++ ++#define __ _masm. ++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { ++ ++ if (mark == NULL) { ++ mark = cbuf.insts_mark(); // get mark within main instrs section ++ } ++ ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a stub. ++ MacroAssembler _masm(&cbuf); ++ ++ address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); ++ if (base == NULL) return NULL; // CodeBuffer::expand failed ++ // static stub relocation stores the instruction address of the call ++ ++ __ relocate(static_stub_Relocation::spec(mark), 0); ++ ++ // Code stream for loading method may be changed. ++ __ synci(R0, 0); ++ ++ // Rmethod contains methodOop, it should be relocated for GC ++ // static stub relocation also tags the methodOop in the code-stream. ++ __ mov_metadata(Rmethod, NULL); ++ // This is recognized as unresolved by relocs/nativeInst/ic code ++ ++ __ relocate(relocInfo::runtime_call_type); ++ ++ cbuf.set_insts_mark(); ++ address call_pc = (address)-1; ++ __ patchable_jump(call_pc); ++ __ align(16); ++ // Update current stubs pointer and restore code_end. ++ __ end_a_stub(); ++ return base; ++} ++#undef __ ++ ++int CompiledStaticCall::to_interp_stub_size() { ++ int size = NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeCall::instruction_size; ++ return round_to(size, 16); ++} ++ ++int CompiledStaticCall::to_trampoline_stub_size() { ++ return NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size; ++} ++ ++// Relocation entries for call stub, compiled java to interpreter. ++int CompiledStaticCall::reloc_to_interp_stub() { ++ return 16; ++} ++ ++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { ++ address stub = find_stub(false /* is_aot */); ++ guarantee(stub != NULL, "stub not found"); ++ ++ if (TraceICs) { ++ ResourceMark rm; ++ tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", ++ p2i(instruction_address()), ++ callee->name_and_sig_as_C_string()); ++ } ++ ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ ++ assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), ++ "a) MT-unsafe modification of inline cache"); ++ assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, ++ "b) MT-unsafe modification of inline cache"); ++ ++ // Update stub. ++ method_holder->set_data((intptr_t)callee()); ++ jump->set_jump_destination(entry); ++ ++ // Update jump to call. ++ set_destination_mt_safe(stub); ++} ++ ++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { ++ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); ++ // Reset stub. ++ address stub = static_stub->addr(); ++ assert(stub != NULL, "stub not found"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ method_holder->set_data(0); ++ jump->set_jump_destination((address)-1); ++} ++ ++//----------------------------------------------------------------------------- ++// Non-product mode code ++#ifndef PRODUCT ++ ++void CompiledDirectStaticCall::verify() { ++ // Verify call. ++ _call->verify(); ++ if (os::is_MP()) { ++ _call->verify_alignment(); ++ } ++ ++ // Verify stub. ++ address stub = find_stub(false /* is_aot */); ++ assert(stub != NULL, "no stub found for static call"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ ++ ++ // Verify state. ++ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); ++} ++ ++#endif // !PRODUCT +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/copy_mips.hpp b/src/hotspot/cpu/mips/copy_mips.hpp +--- a/src/hotspot/cpu/mips/copy_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/copy_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_COPY_MIPS_HPP ++#define CPU_MIPS_VM_COPY_MIPS_HPP ++ ++// Inline functions for memory copy and fill. ++ ++// Contains inline asm implementations ++#include OS_CPU_HEADER_INLINE(copy) ++ ++// Template for atomic, element-wise copy. ++template ++static void copy_conjoint_atomic(const T* from, T* to, size_t count) { ++ if (from > to) { ++ while (count-- > 0) { ++ // Copy forwards ++ *to++ = *from++; ++ } ++ } else { ++ from += count - 1; ++ to += count - 1; ++ while (count-- > 0) { ++ // Copy backwards ++ *to-- = *from--; ++ } ++ } ++} ++ ++ ++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { ++ julong* to = (julong*) tohw; ++ julong v = ((julong) value << 32) | value; ++ while (count-- > 0) { ++ *to++ = v; ++ } ++} ++ ++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { ++ pd_fill_to_words(tohw, count, value); ++} ++ ++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { ++ (void)memset(to, value, count); ++} ++ ++static void pd_zero_to_words(HeapWord* tohw, size_t count) { ++ pd_fill_to_words(tohw, count, 0); ++} ++ ++static void pd_zero_to_bytes(void* to, size_t count) { ++ (void)memset(to, 0, count); ++} ++ ++#endif //CPU_MIPS_VM_COPY_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/depChecker_mips.cpp b/src/hotspot/cpu/mips/depChecker_mips.cpp +--- a/src/hotspot/cpu/mips/depChecker_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/depChecker_mips.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "compiler/disassembler.hpp" ++#include "depChecker_mips.hpp" ++ ++// Nothing to do on mips +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/depChecker_mips.hpp b/src/hotspot/cpu/mips/depChecker_mips.hpp +--- a/src/hotspot/cpu/mips/depChecker_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/depChecker_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_DEPCHECKER_MIPS_HPP ++#define CPU_MIPS_VM_DEPCHECKER_MIPS_HPP ++ ++// Nothing to do on MIPS ++ ++#endif // CPU_MIPS_VM_DEPCHECKER_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/disassembler_mips.hpp b/src/hotspot/cpu/mips/disassembler_mips.hpp +--- a/src/hotspot/cpu/mips/disassembler_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/disassembler_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP ++ ++ static int pd_instruction_alignment() { ++ return sizeof(int); ++ } ++ ++ static const char* pd_cpu_opts() { ++ return "gpr-names=64"; ++ } ++ ++#endif // CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/frame_mips.cpp b/src/hotspot/cpu/mips/frame_mips.cpp +--- a/src/hotspot/cpu/mips/frame_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/frame_mips.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,690 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/markOop.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/monitorChunk.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_mips.inline.hpp" ++ ++#ifdef ASSERT ++void RegisterMap::check_location_valid() { ++} ++#endif ++ ++ ++// Profiling/safepoint support ++// for Profiling - acting on another frame. walks sender frames ++// if valid. ++// frame profile_find_Java_sender_frame(JavaThread *thread); ++ ++bool frame::safe_for_sender(JavaThread *thread) { ++ address sp = (address)_sp; ++ address fp = (address)_fp; ++ address unextended_sp = (address)_unextended_sp; ++ ++ // consider stack guards when trying to determine "safe" stack pointers ++ static size_t stack_guard_size = os::uses_stack_guard_pages() ? ++ JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size() : 0; ++ size_t usable_stack_size = thread->stack_size() - stack_guard_size; ++ ++ // sp must be within the usable part of the stack (not in guards) ++ bool sp_safe = (sp < thread->stack_base()) && ++ (sp >= thread->stack_base() - usable_stack_size); ++ ++ ++ if (!sp_safe) { ++ return false; ++ } ++ ++ // unextended sp must be within the stack and above or equal sp ++ bool unextended_sp_safe = (unextended_sp < thread->stack_base()) && ++ (unextended_sp >= sp); ++ ++ if (!unextended_sp_safe) { ++ return false; ++ } ++ ++ // an fp must be within the stack and above (but not equal) sp ++ // second evaluation on fp+ is added to handle situation where fp is -1 ++ bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); ++ ++ // We know sp/unextended_sp are safe only fp is questionable here ++ ++ // If the current frame is known to the code cache then we can attempt to ++ // construct the sender and do some validation of it. This goes a long way ++ // toward eliminating issues when we get in frame construction code ++ ++ if (_cb != NULL ) { ++ ++ // First check if frame is complete and tester is reliable ++ // Unfortunately we can only check frame complete for runtime stubs and nmethod ++ // other generic buffer blobs are more problematic so we just assume they are ++ // ok. adapter blobs never have a frame complete and are never ok. ++ ++ if (!_cb->is_frame_complete_at(_pc)) { ++ if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { ++ return false; ++ } ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!_cb->code_contains(_pc)) { ++ return false; ++ } ++ ++ // Entry frame checks ++ if (is_entry_frame()) { ++ // an entry frame must have a valid fp. ++ return fp_safe && is_entry_frame_valid(thread); ++ } ++ ++ intptr_t* sender_sp = NULL; ++ intptr_t* sender_unextended_sp = NULL; ++ address sender_pc = NULL; ++ intptr_t* saved_fp = NULL; ++ ++ if (is_interpreted_frame()) { ++ // fp must be safe ++ if (!fp_safe) { ++ return false; ++ } ++ ++ sender_pc = (address) this->fp()[return_addr_offset]; ++ // for interpreted frames, the value below is the sender "raw" sp, ++ // which can be different from the sender unextended sp (the sp seen ++ // by the sender) because of current frame local variables ++ sender_sp = (intptr_t*) addr_at(sender_sp_offset); ++ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; ++ saved_fp = (intptr_t*) this->fp()[link_offset]; ++ ++ } else { ++ // must be some sort of compiled/runtime frame ++ // fp does not have to be safe (although it could be check for c1?) ++ ++ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc ++ if (_cb->frame_size() <= 0) { ++ return false; ++ } ++ ++ sender_sp = _unextended_sp + _cb->frame_size(); ++ // Is sender_sp safe? ++ if ((address)sender_sp >= thread->stack_base()) { ++ return false; ++ } ++ sender_unextended_sp = sender_sp; ++ // On MIPS the return_address is always the word on the stack ++ sender_pc = (address) *(sender_sp-1); ++ // Note: frame::sender_sp_offset is only valid for compiled frame ++ saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); ++ } ++ ++ ++ // If the potential sender is the interpreter then we can do some more checking ++ if (Interpreter::contains(sender_pc)) { ++ ++ // FP is always saved in a recognizable place in any code we generate. However ++ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP ++ // is really a frame pointer. ++ ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ return sender.is_interpreted_frame_valid(thread); ++ ++ } ++ ++ // We must always be able to find a recognizable pc ++ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); ++ if (sender_pc == NULL || sender_blob == NULL) { ++ return false; ++ } ++ ++ // Could be a zombie method ++ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { ++ return false; ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!sender_blob->code_contains(sender_pc)) { ++ return false; ++ } ++ ++ // We should never be able to see an adapter if the current frame is something from code cache ++ if (sender_blob->is_adapter_blob()) { ++ return false; ++ } ++ ++ // Could be the call_stub ++ if (StubRoutines::returns_to_call_stub(sender_pc)) { ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ // Validate the JavaCallWrapper an entry frame must have ++ address jcw = (address)sender.entry_frame_call_wrapper(); ++ ++ bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp()); ++ ++ return jcw_safe; ++ } ++ ++ CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); ++ if (nm != NULL) { ++ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || ++ nm->method()->is_method_handle_intrinsic()) { ++ return false; ++ } ++ } ++ ++ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size ++ // because the return address counts against the callee's frame. ++ ++ if (sender_blob->frame_size() <= 0) { ++ assert(!sender_blob->is_compiled(), "should count return address at least"); ++ return false; ++ } ++ ++ // We should never be able to see anything here except an nmethod. If something in the ++ // code cache (current frame) is called by an entity within the code cache that entity ++ // should not be anything but the call stub (already covered), the interpreter (already covered) ++ // or an nmethod. ++ ++ if (!sender_blob->is_compiled()) { ++ return false; ++ } ++ ++ // Could put some more validation for the potential non-interpreted sender ++ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... ++ ++ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb ++ ++ // We've validated the potential sender that would be created ++ return true; ++ } ++ ++ // Must be native-compiled frame. Since sender will try and use fp to find ++ // linkages it must be safe ++ ++ if (!fp_safe) { ++ return false; ++ } ++ ++ // Will the pc we fetch be non-zero (which we'll find at the oldest frame) ++ ++ if ( (address) this->fp()[return_addr_offset] == NULL) return false; ++ ++ ++ // could try and do some more potential verification of native frame if we could think of some... ++ ++ return true; ++ ++} ++ ++void frame::patch_pc(Thread* thread, address pc) { ++ address* pc_addr = &(((address*) sp())[-1]); ++ if (TracePcPatching) { ++ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", ++ p2i(pc_addr), p2i(*pc_addr), p2i(pc)); ++ } ++ // Either the return address is the original one or we are going to ++ // patch in the same address that's already there. ++ assert(_pc == *pc_addr || pc == *pc_addr, "must be"); ++ *pc_addr = pc; ++ _cb = CodeCache::find_blob(pc); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ assert(original_pc == _pc, "expected original PC to be stored before patching"); ++ _deopt_state = is_deoptimized; ++ // leave _pc as is ++ } else { ++ _deopt_state = not_deoptimized; ++ _pc = pc; ++ } ++} ++ ++bool frame::is_interpreted_frame() const { ++ return Interpreter::contains(pc()); ++} ++ ++int frame::frame_size(RegisterMap* map) const { ++ frame sender = this->sender(map); ++ return sender.sp() - sp(); ++} ++ ++intptr_t* frame::entry_frame_argument_at(int offset) const { ++ // convert offset to index to deal with tsi ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ // Entry frame's arguments are always in relation to unextended_sp() ++ return &unextended_sp()[index]; ++} ++ ++// sender_sp ++#ifdef CC_INTERP ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ // QQQ why does this specialize method exist if frame::sender_sp() does same thing? ++ // seems odd and if we always know interpreted vs. non then sender_sp() is really ++ // doing too much work. ++ return get_interpreterState()->sender_sp(); ++} ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return get_interpreterState()->monitor_base(); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ return (BasicObjectLock*) get_interpreterState()->stack_base(); ++} ++ ++#else // CC_INTERP ++ ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ return (intptr_t*) at(interpreter_frame_sender_sp_offset); ++} ++ ++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); ++} ++ ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); ++ // make sure the pointer points inside the frame ++ assert((intptr_t) fp() > (intptr_t) result, "result must < than frame pointer"); ++ assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer"); ++ return result; ++} ++ ++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { ++ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; ++} ++ ++// Used by template based interpreter deoptimization ++void frame::interpreter_frame_set_last_sp(intptr_t* sp) { ++ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; ++} ++#endif // CC_INTERP ++ ++frame frame::sender_for_entry_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); ++ assert(!entry_frame_is_first(), "next Java fp must be non zero"); ++ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); ++ map->clear(); ++ assert(map->include_argument_oops(), "should be set by clear"); ++ if (jfa->last_Java_pc() != NULL ) { ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); ++ return fr; ++ } ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp()); ++ return fr; ++} ++ ++frame frame::sender_for_interpreter_frame(RegisterMap* map) const { ++ // sp is the raw sp from the sender after adapter or interpreter extension ++ intptr_t* sender_sp = this->sender_sp(); ++ ++ // This is the sp before any possible extension (adapter/locals). ++ intptr_t* unextended_sp = interpreter_frame_sender_sp(); ++ ++ // The interpreter and compiler(s) always save FP in a known ++ // location on entry. We must record where that location is ++ // so this if FP was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves FP if we record where it is then ++ // we don't have to always save FP on entry and exit to c2 compiled ++ // code, on entry will be enough. ++#ifdef COMPILER2 ++ if (map->update_map()) { ++ update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); ++ } ++#endif /* COMPILER2 */ ++ return frame(sender_sp, unextended_sp, link(), sender_pc()); ++} ++ ++ ++//------------------------------------------------------------------------------ ++// frame::verify_deopt_original_pc ++// ++// Verifies the calculated original PC of a deoptimization PC for the ++// given unextended SP. The unextended SP might also be the saved SP ++// for MethodHandle call sites. ++#ifdef ASSERT ++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { ++ frame fr; ++ ++ // This is ugly but it's better than to change {get,set}_original_pc ++ // to take an SP value as argument. And it's only a debugging ++ // method anyway. ++ fr._unextended_sp = unextended_sp; ++ ++ address original_pc = nm->get_original_pc(&fr); ++ assert(nm->insts_contains(original_pc), ++ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); ++} ++#endif ++ ++ ++//------------------------------------------------------------------------------ ++// frame::adjust_unextended_sp ++void frame::adjust_unextended_sp() { ++ // On MIPS, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ if (_cb != NULL) { ++ CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); ++ if (sender_cm != NULL) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (sender_cm->is_deopt_entry(_pc) || ++ sender_cm->is_deopt_mh_entry(_pc)) { ++ DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------------------ ++// frame::update_map_with_saved_link ++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { ++ // The interpreter and compiler(s) always save fp in a known ++ // location on entry. We must record where that location is ++ // so that if fp was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves fp if we record where it is then ++ // we don't have to always save fp on entry and exit to c2 compiled ++ // code, on entry will be enough. ++ map->set_location(FP->as_VMReg(), (address) link_addr); ++ // this is weird "H" ought to be at a higher address however the ++ // oopMaps seems to have the "H" regs at the same address and the ++ // vanilla register. ++ // XXXX make this go away ++ if (true) { ++ map->set_location(FP->as_VMReg()->next(), (address) link_addr); ++ } ++} ++ ++//------------------------------sender_for_compiled_frame----------------------- ++frame frame::sender_for_compiled_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ ++ // frame owned by optimizing compiler ++ assert(_cb->frame_size() >= 0, "must have non-zero frame size"); ++ ++ intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); ++ intptr_t* unextended_sp = sender_sp; ++ ++ // On Loongson the return_address is always the word on the stack ++ // the fp in compiler points to sender fp, but in interpreter, fp points to return address, ++ // so getting sender for compiled frame is not same as interpreter frame. ++ // we hard code here temporarily ++ // spark ++ address sender_pc = (address) *(sender_sp-1); ++ ++ intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); ++ ++ if (map->update_map()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); ++ if (_cb->oop_maps() != NULL) { ++ OopMapSet::update_register_map(this, map); ++ } ++ ++ // Since the prolog does the save and restore of epb there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ update_map_with_saved_link(map, saved_fp_addr); ++ } ++ assert(sender_sp != sp(), "must have changed"); ++ return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); ++} ++ ++frame frame::sender(RegisterMap* map) const { ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map->set_include_argument_oops(false); ++ ++ if (is_entry_frame()) return sender_for_entry_frame(map); ++ if (is_interpreted_frame()) return sender_for_interpreter_frame(map); ++ assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); ++ ++ if (_cb != NULL) { ++ return sender_for_compiled_frame(map); ++ } ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return frame(sender_sp(), link(), sender_pc()); ++} ++ ++bool frame::is_interpreted_frame_valid(JavaThread* thread) const { ++// QQQ ++#ifdef CC_INTERP ++#else ++ assert(is_interpreted_frame(), "Not an interpreted frame"); ++ // These are reasonable sanity checks ++ if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (fp() + interpreter_frame_initial_sp_offset < sp()) { ++ return false; ++ } ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ // do some validation of frame elements ++ ++ // first the method ++ ++ Method* m = *interpreter_frame_method_addr(); ++ ++ // validate the method we'd find in this potential sender ++ if (!Method::is_valid_method(m)) return false; ++ ++ // stack frames shouldn't be much larger than max_stack elements ++ ++ //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) { ++ if (fp() - sp() > 4096) { // stack frames shouldn't be large. ++ return false; ++ } ++ ++ // validate bci/bcp ++ ++ address bcp = interpreter_frame_bcp(); ++ if (m->validate_bci_from_bcp(bcp) < 0) { ++ return false; ++ } ++ ++ // validate ConstantPoolCache* ++ ++ ConstantPoolCache* cp = *interpreter_frame_cache_addr(); ++ ++ if (MetaspaceObj::is_valid(cp) == false) return false; ++ ++ // validate locals ++ ++ address locals = (address) *interpreter_frame_locals_addr(); ++ ++ if (locals > thread->stack_base() || locals < (address) fp()) return false; ++ ++ // We'd have to be pretty unlucky to be mislead at this point ++ ++#endif // CC_INTERP ++ return true; ++} ++ ++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { ++#ifdef CC_INTERP ++ // Needed for JVMTI. The result should always be in the interpreterState object ++ assert(false, "NYI"); ++ interpreterState istate = get_interpreterState(); ++#endif // CC_INTERP ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ Method* method = interpreter_frame_method(); ++ BasicType type = method->result_type(); ++ ++ intptr_t* tos_addr; ++ if (method->is_native()) { ++ // Prior to calling into the runtime to report the method_exit the possible ++ // return value is pushed to the native stack. If the result is a jfloat/jdouble ++ // then ST0 is saved. See the note in generate_native_result ++ tos_addr = (intptr_t*)sp(); ++ if (type == T_FLOAT || type == T_DOUBLE) { ++ tos_addr += 2; ++ } ++ } else { ++ tos_addr = (intptr_t*)interpreter_frame_tos_address(); ++ } ++ ++ switch (type) { ++ case T_OBJECT : ++ case T_ARRAY : { ++ oop obj; ++ if (method->is_native()) { ++#ifdef CC_INTERP ++ obj = istate->_oop_temp; ++#else ++ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); ++#endif // CC_INTERP ++ } else { ++ oop* obj_p = (oop*)tos_addr; ++ obj = (obj_p == NULL) ? (oop)NULL : *obj_p; ++ } ++ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); ++ *oop_result = obj; ++ break; ++ } ++ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; ++ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; ++ case T_CHAR : value_result->c = *(jchar*)tos_addr; break; ++ case T_SHORT : value_result->s = *(jshort*)tos_addr; break; ++ case T_INT : value_result->i = *(jint*)tos_addr; break; ++ case T_LONG : value_result->j = *(jlong*)tos_addr; break; ++ case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; ++ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; ++ case T_VOID : /* Nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ return type; ++} ++ ++ ++intptr_t* frame::interpreter_frame_tos_at(jint offset) const { ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ return &interpreter_frame_tos_address()[index]; ++} ++ ++#ifndef PRODUCT ++ ++#define DESCRIBE_FP_OFFSET(name) \ ++ values.describe(frame_no, fp() + frame::name##_offset, #name) ++ ++void frame::describe_pd(FrameValues& values, int frame_no) { ++ if (is_interpreted_frame()) { ++ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_method); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mirror); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mdp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_cache); ++ DESCRIBE_FP_OFFSET(interpreter_frame_locals); ++ DESCRIBE_FP_OFFSET(interpreter_frame_bcp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); ++ } ++} ++#endif ++ ++intptr_t *frame::initial_deoptimization_info() { ++ // used to reset the saved FP ++ return fp(); ++} ++ ++intptr_t* frame::real_fp() const { ++ if (_cb != NULL) { ++ // use the frame size if valid ++ int size = _cb->frame_size(); ++ if (size > 0) { ++ return unextended_sp() + size; ++ } ++ } ++ // else rely on fp() ++ assert(! is_compiled_frame(), "unknown compiled frame size"); ++ return fp(); ++} ++ ++#ifndef PRODUCT ++// This is a generic constructor which is only used by pns() in debug.cpp. ++frame::frame(void* sp, void* fp, void* pc) { ++ init((intptr_t*)sp, (intptr_t*)fp, (address)pc); ++} ++ ++void frame::pd_ps() {} ++#endif +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/frame_mips.hpp b/src/hotspot/cpu/mips/frame_mips.hpp +--- a/src/hotspot/cpu/mips/frame_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/frame_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,215 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_FRAME_MIPS_HPP ++#define CPU_MIPS_VM_FRAME_MIPS_HPP ++ ++#include "runtime/synchronizer.hpp" ++ ++// A frame represents a physical stack frame (an activation). Frames can be ++// C or Java frames, and the Java frames can be interpreted or compiled. ++// In contrast, vframes represent source-level activations, so that one physical frame ++// can correspond to multiple source level frames because of inlining. ++// A frame is comprised of {pc, fp, sp} ++// ------------------------------ Asm interpreter ---------------------------------------- ++// Layout of asm interpreter frame: ++// [expression stack ] * <- sp ++// [monitors ] \ ++// ... | monitor block size ++// [monitors ] / ++// [monitor block size ] ++// [byte code index/pointr] = bcx() bcx_offset ++// [pointer to locals ] = locals() locals_offset ++// [constant pool cache ] = cache() cache_offset ++// [methodData ] = mdp() mdx_offset ++// [methodOop ] = method() method_offset ++// [last sp ] = last_sp() last_sp_offset ++// [old stack pointer ] (sender_sp) sender_sp_offset ++// [old frame pointer ] <- fp = link() ++// [return pc ] ++// [oop temp ] (only for native calls) ++// [locals and parameters ] ++// <- sender sp ++// ------------------------------ Asm interpreter ---------------------------------------- ++ ++// ------------------------------ C++ interpreter ---------------------------------------- ++// ++// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run) ++// ++// <- SP (current sp) ++// [local variables ] BytecodeInterpreter::run local variables ++// ... BytecodeInterpreter::run local variables ++// [local variables ] BytecodeInterpreter::run local variables ++// [old frame pointer ] fp [ BytecodeInterpreter::run's fp ] ++// [return pc ] (return to frame manager) ++// [interpreter_state* ] (arg to BytecodeInterpreter::run) -------------- ++// [expression stack ] <- last_Java_sp | ++// [... ] * <- interpreter_state.stack | ++// [expression stack ] * <- interpreter_state.stack_base | ++// [monitors ] \ | ++// ... | monitor block size | ++// [monitors ] / <- interpreter_state.monitor_base | ++// [struct interpretState ] <-----------------------------------------| ++// [return pc ] (return to callee of frame manager [1] ++// [locals and parameters ] ++// <- sender sp ++ ++// [1] When the c++ interpreter calls a new method it returns to the frame ++// manager which allocates a new frame on the stack. In that case there ++// is no real callee of this newly allocated frame. The frame manager is ++// aware of the additional frame(s) and will pop them as nested calls ++// complete. Howevers tTo make it look good in the debugger the frame ++// manager actually installs a dummy pc pointing to RecursiveInterpreterActivation ++// with a fake interpreter_state* parameter to make it easy to debug ++// nested calls. ++ ++// Note that contrary to the layout for the assembly interpreter the ++// expression stack allocated for the C++ interpreter is full sized. ++// However this is not as bad as it seems as the interpreter frame_manager ++// will truncate the unused space on succesive method calls. ++// ++// ------------------------------ C++ interpreter ---------------------------------------- ++ ++// Layout of interpreter frame: ++// ++// [ monitor entry ] <--- sp ++// ... ++// [ monitor entry ] ++// -9 [ monitor block top ] ( the top monitor entry ) ++// -8 [ byte code pointer ] (if native, bcp = 0) ++// -7 [ constant pool cache ] ++// -6 [ methodData ] mdx_offset(not core only) ++// -5 [ mirror ] ++// -4 [ methodOop ] ++// -3 [ locals offset ] ++// -2 [ last_sp ] ++// -1 [ sender's sp ] ++// 0 [ sender's fp ] <--- fp ++// 1 [ return address ] ++// 2 [ oop temp offset ] (only for native calls) ++// 3 [ result handler offset ] (only for native calls) ++// 4 [ result type info ] (only for native calls) ++// [ local var m-1 ] ++// ... ++// [ local var 0 ] ++// [ argumnet word n-1 ] <--- ( sender's sp ) ++// ... ++// [ argument word 0 ] <--- S7 ++ ++ public: ++ enum { ++ pc_return_offset = 0, ++ // All frames ++ link_offset = 0, ++ return_addr_offset = 1, ++ // non-interpreter frames ++ sender_sp_offset = 2, ++ ++ // Interpreter frames ++ interpreter_frame_return_addr_offset = 1, ++ interpreter_frame_result_handler_offset = 3, // for native calls only ++ interpreter_frame_oop_temp_offset = 2, // for native calls only ++ ++ interpreter_frame_sender_fp_offset = 0, ++ interpreter_frame_sender_sp_offset = -1, ++ // outgoing sp before a call to an invoked method ++ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, ++ interpreter_frame_locals_offset = interpreter_frame_last_sp_offset - 1, ++ interpreter_frame_method_offset = interpreter_frame_locals_offset - 1, ++ interpreter_frame_mirror_offset = interpreter_frame_method_offset - 1, ++ interpreter_frame_mdp_offset = interpreter_frame_mirror_offset - 1, ++ interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1, ++ interpreter_frame_bcp_offset = interpreter_frame_cache_offset - 1, ++ interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, ++ ++ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, ++ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, ++ ++ // Entry frames ++ entry_frame_call_wrapper_offset = -9, ++ ++ // Native frames ++ ++ native_frame_initial_param_offset = 2 ++ ++ }; ++ ++ intptr_t ptr_at(int offset) const { ++ return *ptr_at_addr(offset); ++ } ++ ++ void ptr_at_put(int offset, intptr_t value) { ++ *ptr_at_addr(offset) = value; ++ } ++ ++ private: ++ // an additional field beyond _sp and _pc: ++ intptr_t* _fp; // frame pointer ++ // The interpreter and adapters will extend the frame of the caller. ++ // Since oopMaps are based on the sp of the caller before extension ++ // we need to know that value. However in order to compute the address ++ // of the return address we need the real "raw" sp. Since sparc already ++ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's ++ // original sp we use that convention. ++ ++ intptr_t* _unextended_sp; ++ void adjust_unextended_sp(); ++ ++ intptr_t* ptr_at_addr(int offset) const { ++ return (intptr_t*) addr_at(offset); ++ } ++#ifdef ASSERT ++ // Used in frame::sender_for_{interpreter,compiled}_frame ++ static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp); ++#endif ++ ++ public: ++ // Constructors ++ ++ frame(intptr_t* sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* fp); ++ ++ void init(intptr_t* sp, intptr_t* fp, address pc); ++ ++ // accessors for the instance variables ++ intptr_t* fp() const { return _fp; } ++ ++ inline address* sender_pc_addr() const; ++ ++ // expression stack tos if we are nested in a java call ++ intptr_t* interpreter_frame_last_sp() const; ++ ++ // helper to update a map with callee-saved FP ++ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); ++ ++ // deoptimization support ++ void interpreter_frame_set_last_sp(intptr_t* sp); ++ ++ static jint interpreter_frame_expression_stack_direction() { return -1; } ++ ++#endif // CPU_MIPS_VM_FRAME_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/frame_mips.inline.hpp b/src/hotspot/cpu/mips/frame_mips.inline.hpp +--- a/src/hotspot/cpu/mips/frame_mips.inline.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/frame_mips.inline.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,238 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP ++ ++#include "code/codeCache.hpp" ++#include "code/vmreg.inline.hpp" ++ ++// Inline functions for Loongson frames: ++ ++// Constructors: ++ ++inline frame::frame() { ++ _pc = NULL; ++ _sp = NULL; ++ _unextended_sp = NULL; ++ _fp = NULL; ++ _cb = NULL; ++ _deopt_state = unknown; ++} ++ ++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { ++ init(sp, fp, pc); ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = unextended_sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = (address)(sp[-1]); ++ ++ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace ++ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly ++ // unlucky the junk value could be to a zombied method and we'll die on the ++ // find_blob call. This is also why we can have no asserts on the validity ++ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler ++ // -> pd_last_frame should use a specialized version of pd_last_frame which could ++ // call a specilaized frame constructor instead of this one. ++ // Then we could use the assert below. However this assert is of somewhat dubious ++ // value. ++ // assert(_pc != NULL, "no pc?"); ++ ++ _cb = CodeCache::find_blob(_pc); ++ adjust_unextended_sp(); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++// Accessors ++ ++inline bool frame::equal(frame other) const { ++ bool ret = sp() == other.sp() ++ && unextended_sp() == other.unextended_sp() ++ && fp() == other.fp() ++ && pc() == other.pc(); ++ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); ++ return ret; ++} ++ ++// Return unique id for this frame. The id must have a value where we can distinguish ++// identity and younger/older relationship. NULL represents an invalid (incomparable) ++// frame. ++inline intptr_t* frame::id(void) const { return unextended_sp(); } ++ ++// Relationals on frames based ++// Return true if the frame is younger (more recent activation) than the frame represented by id ++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() < id ; } ++ ++// Return true if the frame is older (less recent activation) than the frame represented by id ++inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() > id ; } ++ ++inline intptr_t* frame::link() const { ++ return (intptr_t*) *(intptr_t **)addr_at(link_offset); ++} ++ ++inline intptr_t* frame::link_or_null() const { ++ intptr_t** ptr = (intptr_t **)addr_at(link_offset); ++ return os::is_readable_pointer(ptr) ? *ptr : NULL; ++} ++ ++inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } ++ ++// Return address: ++ ++inline address* frame::sender_pc_addr() const { return (address*) addr_at( return_addr_offset); } ++inline address frame::sender_pc() const { return *sender_pc_addr(); } ++ ++inline intptr_t* frame::sender_sp() const { return addr_at( sender_sp_offset); } ++ ++inline intptr_t** frame::interpreter_frame_locals_addr() const { ++ return (intptr_t**)addr_at(interpreter_frame_locals_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_last_sp() const { ++ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_bcp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_bcp_offset); ++} ++ ++ ++inline intptr_t* frame::interpreter_frame_mdp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_mdp_offset); ++} ++ ++ ++ ++// Constant pool cache ++ ++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { ++ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); ++} ++ ++// Method ++ ++inline Method** frame::interpreter_frame_method_addr() const { ++ return (Method**)addr_at(interpreter_frame_method_offset); ++} ++ ++// Mirror ++ ++inline oop* frame::interpreter_frame_mirror_addr() const { ++ return (oop*)addr_at(interpreter_frame_mirror_offset); ++} ++ ++// top of expression stack ++inline intptr_t* frame::interpreter_frame_tos_address() const { ++ intptr_t* last_sp = interpreter_frame_last_sp(); ++ if (last_sp == NULL ) { ++ return sp(); ++ } else { ++ // sp() may have been extended by an adapter ++ assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos"); ++ return last_sp; ++ } ++} ++ ++inline oop* frame::interpreter_frame_temp_oop_addr() const { ++ return (oop *)(fp() + interpreter_frame_oop_temp_offset); ++} ++ ++inline int frame::interpreter_frame_monitor_size() { ++ return BasicObjectLock::size(); ++} ++ ++ ++// expression stack ++// (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++inline intptr_t* frame::interpreter_frame_expression_stack() const { ++ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); ++ return monitor_end-1; ++} ++ ++// Entry frames ++ ++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { ++ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); ++} ++ ++// Compiled frames ++ ++inline oop frame::saved_oop_result(RegisterMap* map) const { ++ return *((oop*) map->location(V0->as_VMReg())); ++} ++ ++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { ++ *((oop*) map->location(V0->as_VMReg())) = obj; ++} ++ ++#endif // CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp +--- a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,364 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/g1/g1BarrierSet.hpp" ++#include "gc/g1/g1BarrierSetAssembler.hpp" ++#include "gc/g1/g1BarrierSetRuntime.hpp" ++#include "gc/g1/g1CardTable.hpp" ++#include "gc/g1/g1ThreadLocalData.hpp" ++#include "gc/g1/heapRegion.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "utilities/macros.hpp" ++ ++#define __ masm-> ++ ++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count) { ++ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; ++ ++ if (!dest_uninitialized) { ++#ifndef OPT_THREAD ++ Register thread = T9; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ Label filtered; ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ lw(AT, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lb(AT, in_progress); ++ } ++ ++ __ beq(AT, R0, filtered); ++ __ delayed()->nop(); ++ ++ __ pushad(); // push registers ++ if (count == A0) { ++ if (addr == A1) { ++ __ move(AT, A0); ++ __ move(A0, A1); ++ __ move(A1, AT); ++ } else { ++ __ move(A1, count); ++ __ move(A0, addr); ++ } ++ } else { ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ if (UseCompressedOops) { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); ++ } ++ __ popad(); ++ ++ __ bind(filtered); ++ } ++} ++ ++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp) { ++ __ pushad(); // push registers (overkill) ++ if (count == A0) { ++ assert_different_registers(A1, addr); ++ __ move(A1, count); ++ __ move(A0, addr); ++ } else { ++ assert_different_registers(A0, count); ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); ++ __ popad(); ++} ++ ++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ bool on_oop = type == T_OBJECT || type == T_ARRAY; ++ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; ++ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; ++ bool on_reference = on_weak || on_phantom; ++ ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ if (on_oop && on_reference) { ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // Generate the G1 pre-barrier code to log the value of ++ // the referent field in an SATB buffer. ++ g1_write_barrier_pre(masm /* masm */, ++ noreg /* obj */, ++ dst /* pre_val */, ++ thread /* thread */, ++ tmp1 /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ } ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ ++ assert(thread == TREG, "must be"); ++ ++ Label done; ++ Label runtime; ++ ++ assert(pre_val != noreg, "check this code"); ++ ++ if (obj != noreg) { ++ assert_different_registers(obj, pre_val, tmp); ++ assert(pre_val != V0, "check this code"); ++ } ++ ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ lw(AT, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lb(AT, in_progress); ++ } ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ __ load_heap_oop(pre_val, Address(obj, 0)); ++ } ++ ++ // Is the previous value null? ++ __ beq(pre_val, R0, done); ++ __ delayed()->nop(); ++ ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) ++ ++ __ ld(tmp, index); ++ __ beq(tmp, R0, runtime); ++ __ delayed()->nop(); ++ ++ __ daddiu(tmp, tmp, -1 * wordSize); ++ __ sd(tmp, index); ++ __ ld(AT, buffer); ++ __ daddu(tmp, tmp, AT); ++ ++ // Record the previous value ++ __ sd(pre_val, tmp, 0); ++ __ beq(R0, R0, done); ++ __ delayed()->nop(); ++ ++ __ bind(runtime); ++ // save the live input values ++ if (tosca_live) __ push(V0); ++ ++ if (obj != noreg && obj != V0) __ push(obj); ++ ++ if (pre_val != V0) __ push(pre_val); ++ ++ // Calling the runtime using the regular call_VM_leaf mechanism generates ++ // code (generated by InterpreterMacroAssember::call_VM_leaf_base) ++ // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. ++ // ++ // If we care generating the pre-barrier without a frame (e.g. in the ++ // intrinsified Reference.get() routine) then ebp might be pointing to ++ // the caller frame and so this check will most likely fail at runtime. ++ // ++ // Expanding the call directly bypasses the generation of the check. ++ // So when we do not have have a full interpreter frame on the stack ++ // expand_call should be passed true. ++ ++ if (expand_call) { ++ assert(pre_val != A1, "smashed arg"); ++ if (thread != A1) __ move(A1, thread); ++ if (pre_val != A0) __ move(A0, pre_val); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } ++ ++ // save the live input values ++ if (pre_val != V0) ++ __ pop(pre_val); ++ ++ if (obj != noreg && obj != V0) ++ __ pop(obj); ++ ++ if (tosca_live) __ pop(V0); ++ ++ __ bind(done); ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2) { ++ assert_different_registers(tmp, tmp2, AT); ++ assert(thread == TREG, "must be"); ++ ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++ ++ CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); ++ assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ Label done; ++ Label runtime; ++ ++ // Does store cross heap regions? ++ __ xorr(AT, store_addr, new_val); ++ __ dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ // crosses regions, storing NULL? ++ __ beq(new_val, R0, done); ++ __ delayed()->nop(); ++ ++ // storing region crossing non-NULL, is card already dirty? ++ const Register card_addr = tmp; ++ const Register cardtable = tmp2; ++ ++ __ move(card_addr, store_addr); ++ __ dsrl(card_addr, card_addr, CardTable::card_shift); ++ // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT ++ // a valid address and therefore is not properly handled by the relocation code. ++ __ set64(cardtable, (intptr_t)ct->card_table()->byte_map_base()); ++ __ daddu(card_addr, card_addr, cardtable); ++ ++ __ lb(AT, card_addr, 0); ++ __ daddiu(AT, AT, -1 * (int)G1CardTable::g1_young_card_val()); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ __ sync(); ++ __ lb(AT, card_addr, 0); ++ __ daddiu(AT, AT, -1 * (int)G1CardTable::dirty_card_val()); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ // storing a region crossing, non-NULL oop, card is clean. ++ // dirty card and log. ++ __ move(AT, (int)G1CardTable::dirty_card_val()); ++ __ sb(AT, card_addr, 0); ++ ++ __ lw(AT, queue_index); ++ __ beq(AT, R0, runtime); ++ __ delayed()->nop(); ++ __ daddiu(AT, AT, -1 * wordSize); ++ __ sw(AT, queue_index); ++ __ ld(tmp2, buffer); ++ __ ld(AT, queue_index); ++ __ daddu(tmp2, tmp2, AT); ++ __ sd(card_addr, tmp2, 0); ++ __ beq(R0, R0, done); ++ __ delayed()->nop(); ++ ++ __ bind(runtime); ++ // save the live input values ++ __ push(store_addr); ++ __ push(new_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG); ++ __ pop(new_val); ++ __ pop(store_addr); ++ ++ __ bind(done); ++} ++ ++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool as_normal = (decorators & AS_NORMAL) != 0; ++ assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported"); ++ ++ bool needs_pre_barrier = as_normal; ++ bool needs_post_barrier = val != noreg && in_heap; ++ ++ Register tmp3 = RT3; ++ Register rthread = TREG; ++ // flatten object address if needed ++ // We do it regardless of precise because we need the registers ++ if (dst.index() == noreg && dst.disp() == 0) { ++ if (dst.base() != tmp3) { ++ __ move(tmp3, dst.base()); ++ } ++ } else { ++ __ lea(tmp3, dst); ++ } ++ ++ if (needs_pre_barrier) { ++ g1_write_barrier_pre(masm /*masm*/, ++ tmp3 /* obj */, ++ tmp2 /* pre_val */, ++ rthread /* thread */, ++ tmp1 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); ++ } ++ if (val == noreg) { ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); ++ } else { ++ Register new_val = val; ++ if (needs_post_barrier) { ++ // G1 barrier needs uncompressed oop for region cross check. ++ if (UseCompressedOops) { ++ new_val = tmp2; ++ __ move(new_val, val); ++ } ++ } ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); ++ if (needs_post_barrier) { ++ g1_write_barrier_post(masm /*masm*/, ++ tmp3 /* store_adr */, ++ new_val /* new_val */, ++ rthread /* thread */, ++ tmp1 /* tmp */, ++ tmp2 /* tmp2 */); ++ } ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp +--- a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,71 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++class LIR_Assembler; ++class StubAssembler; ++class G1PreBarrierStub; ++class G1PostBarrierStub; ++ ++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { ++ protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count); ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp); ++ ++ void g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ ++ void g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2); ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ public: ++ void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); ++ void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); ++ ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++}; ++ ++#endif // CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp +--- a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,194 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/thread.hpp" ++ ++#define __ masm-> ++ ++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ ++ switch (type) { ++ case T_OBJECT: ++ case T_ARRAY: { ++ if (in_heap) { ++ if (UseCompressedOops) { ++ __ lwu(dst, src); ++ if (is_not_null) { ++ __ decode_heap_oop_not_null(dst); ++ } else { ++ __ decode_heap_oop(dst); ++ } ++ } else ++ { ++ __ ld_ptr(dst, src); ++ } ++ } else { ++ assert(in_native, "why else?"); ++ __ ld_ptr(dst, src); ++ } ++ break; ++ } ++ case T_BOOLEAN: __ lbu (dst, src); break; ++ case T_BYTE: __ lb (dst, src); break; ++ case T_CHAR: __ lhu (dst, src); break; ++ case T_SHORT: __ lh (dst, src); break; ++ case T_INT: __ lw (dst, src); break; ++ case T_LONG: __ ld (dst, src); break; ++ case T_ADDRESS: __ ld_ptr(dst, src); break; ++ case T_FLOAT: ++ assert(dst == noreg, "only to ftos"); ++ __ lwc1(FSF, src); ++ break; ++ case T_DOUBLE: ++ assert(dst == noreg, "only to dtos"); ++ __ ldc1(FSF, src); ++ break; ++ default: Unimplemented(); ++ } ++} ++ ++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ ++ switch (type) { ++ case T_OBJECT: ++ case T_ARRAY: { ++ if (in_heap) { ++ if (val == noreg) { ++ assert(!is_not_null, "inconsistent access"); ++ if (UseCompressedOops) { ++ __ sw(R0, dst); ++ } else { ++ __ sd(R0, dst); ++ } ++ } else { ++ if (UseCompressedOops) { ++ assert(!dst.uses(val), "not enough registers"); ++ if (is_not_null) { ++ __ encode_heap_oop_not_null(val); ++ } else { ++ __ encode_heap_oop(val); ++ } ++ __ sw(val, dst); ++ } else ++ { ++ __ st_ptr(val, dst); ++ } ++ } ++ } else { ++ assert(in_native, "why else?"); ++ assert(val != noreg, "not supported"); ++ __ st_ptr(val, dst); ++ } ++ break; ++ } ++ case T_BOOLEAN: ++ __ andi(val, val, 0x1); // boolean is true if LSB is 1 ++ __ sb(val, dst); ++ break; ++ case T_BYTE: ++ __ sb(val, dst); ++ break; ++ case T_SHORT: ++ __ sh(val, dst); ++ break; ++ case T_CHAR: ++ __ sh(val, dst); ++ break; ++ case T_INT: ++ __ sw(val, dst); ++ break; ++ case T_LONG: ++ __ sd(val, dst); ++ break; ++ case T_FLOAT: ++ assert(val == noreg, "only tos"); ++ __ swc1(FSF, dst); ++ break; ++ case T_DOUBLE: ++ assert(val == noreg, "only tos"); ++ __ sdc1(FSF, dst); ++ break; ++ case T_ADDRESS: ++ __ st_ptr(val, dst); ++ break; ++ default: Unimplemented(); ++ } ++} ++ ++void BarrierSetAssembler::obj_equals(MacroAssembler* masm, ++ Register obj1, Address obj2) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::obj_equals(MacroAssembler* masm, ++ Register obj1, Register obj2) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath) { ++ __ clear_jweak_tag(obj); ++ __ ld_ptr(obj, Address(obj, 0)); ++} ++ ++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Register t2, ++ Label& slow_case) { ++ Unimplemented(); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Label& slow_case) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ Unimplemented(); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp +--- a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,83 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "memory/allocation.hpp" ++#include "oops/access.hpp" ++ ++class InterpreterMacroAssembler; ++ ++class BarrierSetAssembler: public CHeapObj { ++private: ++ void incr_allocated_bytes(MacroAssembler* masm, Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1); ++ ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG) {} ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG) {} ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ virtual void obj_equals(MacroAssembler* masm, ++ Register obj1, Register obj2); ++ virtual void obj_equals(MacroAssembler* masm, ++ Register obj1, Address obj2); ++ ++ virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) { ++ // Default implementation does not need to do anything. ++ } ++ ++ // Support for jniFastGetField to try resolving a jobject/jweak in native ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); ++ ++ virtual void tlab_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, Register t2, ++ Label& slow_case); ++ virtual void eden_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Label& slow_case); ++ ++ virtual void barrier_stubs_init() {} ++}; ++ ++#endif // CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp +--- a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,147 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/cardTableBarrierSetAssembler.hpp" ++ ++#define __ masm-> ++ ++#define T9 RT9 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++ ++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp) { ++ BarrierSet *bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ intptr_t disp = (intptr_t) ct->byte_map_base(); ++ ++ Label L_loop, L_done; ++ const Register end = count; ++ assert_different_registers(addr, end); ++ ++ __ beq(count, R0, L_done); // zero count - nothing to do ++ __ delayed()->nop(); ++ ++ if (ct->scanned_concurrently()) __ membar(Assembler::StoreStore); ++ ++ __ set64(tmp, disp); ++ ++ __ lea(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size ++ __ daddiu(end, end, -BytesPerHeapOop); // end - 1 to make inclusive ++ __ shr(addr, CardTable::card_shift); ++ __ shr(end, CardTable::card_shift); ++ __ dsubu(end, end, addr); // end --> cards count ++ ++ __ daddu(addr, addr, tmp); ++ ++ __ BIND(L_loop); ++ if (UseLEXT1) { ++ __ gssbx(R0, addr, count, 0); ++ } else { ++ __ daddu(AT, addr, count); ++ __ sb(R0, AT, 0); ++ } ++ __ daddiu(count, count, -1); ++ __ bgez(count, L_loop); ++ __ delayed()->nop(); ++ ++ __ BIND(L_done); ++} ++ ++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) { ++ // Does a store check for the oop in register obj. The content of ++ // register obj is destroyed afterwards. ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ __ shr(obj, CardTable::card_shift); ++ ++ Address card_addr; ++ ++ intptr_t byte_map_base = (intptr_t)ct->byte_map_base(); ++ Register tmp = T9; ++ assert_different_registers(tmp, obj); ++ __ li(tmp, byte_map_base); ++ __ addu(tmp, tmp, obj); ++ ++ assert(CardTable::dirty_card_val() == 0, "must be"); ++ ++ jbyte dirty = CardTable::dirty_card_val(); ++ if (UseCondCardMark) { ++ Label L_already_dirty; ++ __ membar(Assembler::StoreLoad); ++ __ lb(AT, tmp, 0); ++ __ addiu(AT, AT, -1 * dirty); ++ __ beq(AT, R0, L_already_dirty); ++ __ delayed()->nop(); ++ __ sb(R0, tmp, 0); ++ __ bind(L_already_dirty); ++ } else { ++ if (ct->scanned_concurrently()) { ++ __ membar(Assembler::StoreStore); ++ } ++ __ sb(R0, tmp, 0); ++ } ++} ++ ++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ ++ bool is_array = (decorators & IS_ARRAY) != 0; ++ bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; ++ bool precise = is_array || on_anonymous; ++ ++ bool needs_post_barrier = val != noreg && in_heap; ++ ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); ++ if (needs_post_barrier) { ++ // flatten object address if needed ++ if (!precise || (dst.index() == noreg && dst.disp() == 0)) { ++ store_check(masm, dst.base(), dst); ++ } else { ++ __ lea(tmp1, dst); ++ store_check(masm, tmp1, dst); ++ } ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp +--- a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { ++protected: ++ void store_check(MacroAssembler* masm, Register obj, Address dst); ++ ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp); ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; ++ ++#endif // CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp +--- a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++#define __ masm-> ++ ++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch) { ++ if (is_oop) { ++ gen_write_ref_array_pre_barrier(masm, decorators, dst, count); ++ } ++} ++ ++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch) { ++ if (is_oop) { ++ gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch); ++ } ++} ++ ++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ if (type == T_OBJECT || type == T_ARRAY) { ++ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } else { ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp +--- a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++ ++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other ++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected ++// accesses, which are overridden in the concrete BarrierSetAssembler. ++ ++class ModRefBarrierSetAssembler: public BarrierSetAssembler { ++protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count) {} ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp) {} ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) = 0; ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG); ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG); ++ ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; ++ ++#endif // CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/globalDefinitions_mips.hpp b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp +--- a/src/hotspot/cpu/mips/globalDefinitions_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP ++#define CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP ++// Size of MIPS Instructions ++const int BytesPerInstWord = 4; ++ ++const int StackAlignmentInBytes = (2*wordSize); ++ ++// Indicates whether the C calling conventions require that ++// 32-bit integer argument values are properly extended to 64 bits. ++// If set, SharedRuntime::c_calling_convention() must adapt ++// signatures accordingly. ++const bool CCallingConventionRequiresIntsAsLongs = false; ++ ++#define SUPPORTS_NATIVE_CX8 ++ ++#define SUPPORT_RESERVED_STACK_AREA ++ ++#define THREAD_LOCAL_POLL ++ ++#endif // CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/globals_mips.hpp b/src/hotspot/cpu/mips/globals_mips.hpp +--- a/src/hotspot/cpu/mips/globals_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/globals_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,137 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_GLOBALS_MIPS_HPP ++#define CPU_MIPS_VM_GLOBALS_MIPS_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, ShareVtableStubs, true); ++define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this ++ ++define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks ++define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86. ++define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast ++ ++define_pd_global(uintx, CodeCacheSegmentSize, 64); ++define_pd_global(intx, CodeEntryAlignment, 16); ++define_pd_global(intx, OptoLoopAlignment, 16); ++define_pd_global(intx, InlineFrequencyCount, 100); ++// MIPS generates 3x instructions than X86 ++define_pd_global(intx, InlineSmallCode, 4000); ++ ++#define DEFAULT_STACK_YELLOW_PAGES (2) ++#define DEFAULT_STACK_RED_PAGES (1) ++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4)) ++#define DEFAULT_STACK_RESERVED_PAGES (1) ++define_pd_global(uintx, TLABSize, 0); ++define_pd_global(uintx, NewSize, 1024 * K); ++define_pd_global(intx, PreInflateSpin, 10); ++ ++define_pd_global(intx, PrefetchCopyIntervalInBytes, -1); ++define_pd_global(intx, PrefetchScanIntervalInBytes, -1); ++define_pd_global(intx, PrefetchFieldsAhead, -1); ++ ++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES ++#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES ++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES ++#define MIN_STACK_RESERVED_PAGES (0) ++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); ++ ++define_pd_global(intx, StackYellowPages, 2); ++define_pd_global(intx, StackRedPages, 1); ++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); ++ ++define_pd_global(bool, RewriteBytecodes, true); ++define_pd_global(bool, RewriteFrequentPairs, true); ++define_pd_global(bool, UseMembar, true); ++// GC Ergo Flags ++define_pd_global(intx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread ++ ++define_pd_global(uintx, TypeProfileLevel, 111); ++ ++define_pd_global(bool, CompactStrings, true); ++ ++define_pd_global(bool, PreserveFramePointer, false); ++ ++define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); ++ ++define_pd_global(bool, ThreadLocalHandshakes, true); ++// Only c2 cares about this at the moment ++define_pd_global(intx, AllocatePrefetchStyle, 2); ++define_pd_global(intx, AllocatePrefetchDistance, -1); ++ ++#define ARCH_FLAGS(develop, \ ++ product, \ ++ diagnostic, \ ++ experimental, \ ++ notproduct, \ ++ range, \ ++ constraint, \ ++ writeable) \ ++ \ ++ product(bool, UseLEXT1, false, \ ++ "Use LoongISA general EXTensions 1") \ ++ \ ++ product(bool, UseLEXT2, false, \ ++ "Use LoongISA general EXTensions 2") \ ++ \ ++ product(bool, UseLEXT3, false, \ ++ "Use LoongISA general EXTensions 3") \ ++ \ ++ product(bool, UseCodeCacheAllocOpt, true, \ ++ "Allocate code cache within 32-bit memory address space") \ ++ \ ++ product(intx, UseSyncLevel, 10000, \ ++ "The sync level on Loongson CPUs" \ ++ "UseSyncLevel == 10000, 111, for all Loongson CPUs, " \ ++ "UseSyncLevel == 4000, 101, maybe for GS464V" \ ++ "UseSyncLevel == 3000, 001, maybe for GS464V" \ ++ "UseSyncLevel == 2000, 011, maybe for GS464E/GS264" \ ++ "UseSyncLevel == 1000, 110, maybe for GS464") \ ++ \ ++ develop(bool, UseBoundCheckInstruction, false, \ ++ "Use bound check instruction") \ ++ \ ++ product(intx, SetFSFOFN, 999, \ ++ "Set the FS/FO/FN bits in FCSR" \ ++ "999 means FS/FO/FN will not be changed" \ ++ "=XYZ, with X:FS, Y:FO, Z:FN, X, Y and Z in 0=off, 1=on") \ ++ \ ++ /* assembler */ \ ++ product(bool, UseCountLeadingZerosInstructionMIPS64, true, \ ++ "Use count leading zeros instruction") \ ++ \ ++ product(bool, UseCountTrailingZerosInstructionMIPS64, false, \ ++ "Use count trailing zeros instruction") \ ++ \ ++ product(bool, UseActiveCoresMP, false, \ ++ "Eliminate barriers for single active cpu") ++ ++#endif // CPU_MIPS_VM_GLOBALS_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/icache_mips.cpp b/src/hotspot/cpu/mips/icache_mips.cpp +--- a/src/hotspot/cpu/mips/icache_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/icache_mips.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "runtime/icache.hpp" ++ ++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) ++{ ++#define __ _masm-> ++ StubCodeMark mark(this, "ICache", "flush_icache_stub"); ++ address start = __ pc(); ++ ++ __ jr_hb(RA); ++ __ delayed()->ori(V0, A2, 0); ++ ++ *flush_icache_stub = (ICache::flush_icache_stub_t)start; ++#undef __ ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/icache_mips.hpp b/src/hotspot/cpu/mips/icache_mips.hpp +--- a/src/hotspot/cpu/mips/icache_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/icache_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_ICACHE_MIPS_HPP ++#define CPU_MIPS_VM_ICACHE_MIPS_HPP ++ ++// Interface for updating the instruction cache. Whenever the VM modifies ++// code, part of the processor instruction cache potentially has to be flushed. ++ ++class ICache : public AbstractICache { ++ public: ++ enum { ++ stub_size = 2 * BytesPerInstWord, // Size of the icache flush stub in bytes ++ line_size = 32, // flush instruction affects a dword ++ log2_line_size = 5 // log2(line_size) ++ }; ++}; ++ ++#endif // CPU_MIPS_VM_ICACHE_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/icBuffer_mips.cpp b/src/hotspot/cpu/mips/icBuffer_mips.cpp +--- a/src/hotspot/cpu/mips/icBuffer_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/icBuffer_mips.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,88 @@ ++/* ++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/icBuffer.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/bytecodes.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/oop.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++int InlineCacheBuffer::ic_stub_code_size() { ++ return NativeMovConstReg::instruction_size + ++ NativeGeneralJump::instruction_size + ++ 1; ++ // so that code_end can be set in CodeBuffer ++ // 64bit 15 = 6 + 8 bytes + 1 byte ++ // 32bit 7 = 2 + 4 bytes + 1 byte ++} ++ ++ ++// we use T1 as cached oop(klass) now. this is the target of virtual call, ++// when reach here, the receiver in T0 ++// refer to shareRuntime_mips.cpp,gen_i2c2i_adapters ++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { ++ ResourceMark rm; ++ CodeBuffer code(code_begin, ic_stub_code_size()); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ // note: even though the code contains an embedded oop, we do not need reloc info ++ // because ++ // (1) the oop is old (i.e., doesn't matter for scavenges) ++ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear ++// assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop"); ++#define __ masm-> ++ __ patchable_set48(T1, (long)cached_value); ++ ++ __ patchable_jump(entry_point); ++ __ flush(); ++#undef __ ++} ++ ++ ++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ return jump->jump_destination(); ++} ++ ++ ++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { ++ // creation also verifies the object ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); ++ // Verifies the jump ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ void* o= (void*)move->data(); ++ return o; ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/interp_masm_mips_64.cpp b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp +--- a/src/hotspot/cpu/mips/interp_masm_mips_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,2126 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interp_masm_mips.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/markOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of InterpreterMacroAssembler ++ ++#ifdef CC_INTERP ++void InterpreterMacroAssembler::get_method(Register reg) { ++} ++#endif // CC_INTERP ++ ++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) { ++ // The runtime address of BCP may be unaligned. ++ // Refer to the SPARC implementation. ++ lbu(reg, BCP, offset+1); ++ lbu(tmp, BCP, offset); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++} ++ ++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset) { ++ assert(reg != tmp, "need separate temp register"); ++ if (offset & 3) { // Offset unaligned? ++ lbu(reg, BCP, offset+3); ++ lbu(tmp, BCP, offset+2); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++ lbu(tmp, BCP, offset+1); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++ lbu(tmp, BCP, offset); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++ } else { ++ lwu(reg, BCP, offset); ++ } ++} ++ ++void InterpreterMacroAssembler::jump_to_entry(address entry) { ++ assert(entry, "Entry must have been generated by now"); ++ jmp(entry); ++} ++ ++#ifndef CC_INTERP ++ ++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, ++ int number_of_arguments) { ++ // interpreter specific ++ // ++ // Note: No need to save/restore bcp & locals (r13 & r14) pointer ++ // since these are callee saved registers and no blocking/ ++ // GC can happen in leaf calls. ++ // Further Note: DO NOT save/restore bcp/locals. If a caller has ++ // already saved them so that it can use BCP/LVP as temporaries ++ // then a save/restore here will DESTROY the copy the caller ++ // saved! There used to be a save_bcp() that only happened in ++ // the ASSERT path (no restore_bcp). Which caused bizarre failures ++ // when jvm built with ASSERTs. ++#ifdef ASSERT ++ save_bcp(); ++ { ++ Label L; ++ ld(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT,R0,L); ++ delayed()->nop(); ++ stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif ++ // super call ++ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); ++ // interpreter specific ++ // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals ++ // but since they may not have been saved (and we don't want to ++ // save them here (see note above) the assert is invalid. ++} ++ ++void InterpreterMacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // interpreter specific ++ // ++ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't ++ // really make a difference for these runtime calls, since they are ++ // slow anyway. Btw., bcp must be saved/restored since it may change ++ // due to GC. ++ assert(java_thread == noreg , "not expecting a precomputed java thread"); ++ save_bcp(); ++#ifdef ASSERT ++ { ++ Label L; ++ ld(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif /* ASSERT */ ++ // super call ++ MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp, ++ entry_point, number_of_arguments, ++ check_exceptions); ++ // interpreter specific ++ restore_bcp(); ++ restore_locals(); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { ++ if (JvmtiExport::can_pop_frame()) { ++ Label L; ++ // Initiate popframe handling only if it is not already being ++ // processed. If the flag has the popframe_processing bit set, it ++ // means that this code is called *during* popframe handling - we ++ // don't want to reenter. ++ // This method is only called just after the call into the vm in ++ // call_VM_base, so the arg registers are available. ++ // Not clear if any other register is available, so load AT twice ++ assert(AT != java_thread, "check"); ++ lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_pending_bit); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ ++ lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_processing_bit); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); ++ jr(V0); ++ delayed()->nop(); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::load_earlyret_value(TosState state) { ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset())); ++ const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ //V0, oop_addr,V1,val_addr ++ switch (state) { ++ case atos: ++ ld_ptr(V0, oop_addr); ++ st_ptr(R0, oop_addr); ++ verify_oop(V0, state); ++ break; ++ case ltos: ++ ld_ptr(V0, val_addr); // fall through ++ break; ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ lw(V0, val_addr); ++ break; ++ case ftos: ++ lwc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case dtos: ++ ldc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ // Clean up tos value in the thread object ++ move(AT, (int)ilgl); ++ sw(AT, tos_addr); ++ sw(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { ++ if (JvmtiExport::can_force_early_return()) { ++ Label L; ++ Register tmp = T9; ++ ++ assert(java_thread != AT, "check"); ++ assert(java_thread != tmp, "check"); ++ ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ ++ // Initiate earlyret handling only if it is not already being processed. ++ // If the flag has the earlyret_processing bit set, it means that this code ++ // is called *during* earlyret handling - we don't want to reenter. ++ lw(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ move(tmp, JvmtiThreadState::earlyret_pending); ++ bne(tmp, AT, L); ++ delayed()->nop(); ++ ++ // Call Interpreter::remove_activation_early_entry() to get the address of the ++ // same-named entrypoint in the generated interpreter code. ++ ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ lw(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ move(A0, AT); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0); ++ jr(V0); ++ delayed()->nop(); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, ++ int bcp_offset) { ++ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); ++ lbu(AT, BCP, bcp_offset); ++ lbu(reg, BCP, bcp_offset + 1); ++ ins(reg, AT, 8, 8); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ if (index_size == sizeof(u2)) { ++ get_2_byte_integer_at_bcp(index, AT, bcp_offset); ++ } else if (index_size == sizeof(u4)) { ++ get_4_byte_integer_at_bcp(index, AT, bcp_offset); ++ // Check if the secondary index definition is still ~x, otherwise ++ // we have to change the following assembler code to calculate the ++ // plain index. ++ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); ++ nor(index, index, R0); ++ sll(index, index, 0); ++ } else if (index_size == sizeof(u1)) { ++ lbu(index, BCP, bcp_offset); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, ++ Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert_different_registers(cache, index); ++ get_cache_index_at_bcp(index, bcp_offset, index_size); ++ ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line"); ++ shl(index, 2); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, ++ Register index, ++ Register bytecode, ++ int byte_no, ++ int bcp_offset, ++ size_t index_size) { ++ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); ++ // We use a 32-bit load here since the layout of 64-bit words on ++ // little-endian machines allow us that. ++ dsll(AT, index, Address::times_ptr); ++ daddu(AT, cache, AT); ++ lw(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); ++ if(os::is_MP()) { ++ sync(); // load acquire ++ } ++ ++ const int shift_count = (1 + byte_no) * BitsPerByte; ++ assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) || ++ (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift), ++ "correct shift count"); ++ dsrl(bytecode, bytecode, shift_count); ++ assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); ++ move(AT, ConstantPoolCacheEntry::bytecode_1_mask); ++ andr(bytecode, bytecode, AT); ++} ++ ++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, ++ Register tmp, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ assert(cache != tmp, "must use different register"); ++ get_cache_index_at_bcp(tmp, bcp_offset, index_size); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ // convert from field index to ConstantPoolCacheEntry index ++ // and from word offset to byte offset ++ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); ++ shl(tmp, 2 + LogBytesPerWord); ++ ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ // skip past the header ++ daddiu(cache, cache, in_bytes(ConstantPoolCache::base_offset())); ++ daddu(cache, cache, tmp); ++} ++ ++void InterpreterMacroAssembler::get_method_counters(Register method, ++ Register mcs, Label& skip) { ++ Label has_counters; ++ ld(mcs, method, in_bytes(Method::method_counters_offset())); ++ bne(mcs, R0, has_counters); ++ delayed()->nop(); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::build_method_counters), method); ++ ld(mcs, method, in_bytes(Method::method_counters_offset())); ++ beq(mcs, R0, skip); // No MethodCounters allocated, OutOfMemory ++ delayed()->nop(); ++ bind(has_counters); ++} ++ ++// Load object from cpool->resolved_references(index) ++void InterpreterMacroAssembler::load_resolved_reference_at_index( ++ Register result, Register index, Register tmp) { ++ assert_different_registers(result, index); ++ // convert from field index to resolved_references() index and from ++ // word index to byte offset. Since this is a java object, it can be compressed ++ shl(index, LogBytesPerHeapOop); ++ ++ get_constant_pool(result); ++ // load pointer for resolved_references[] objArray ++ ld(result, result, ConstantPool::cache_offset_in_bytes()); ++ ld(result, result, ConstantPoolCache::resolved_references_offset_in_bytes()); ++ resolve_oop_handle(result, tmp); ++ // Add in the index ++ daddu(result, result, index); ++ load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp); ++} ++ ++// load cpool->resolved_klass_at(index) ++void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool, ++ Register index, Register klass) { ++ dsll(AT, index, Address::times_ptr); ++ if (UseLEXT1 && Assembler::is_simm(sizeof(ConstantPool), 8)) { ++ gslhx(index, cpool, AT, sizeof(ConstantPool)); ++ } else { ++ daddu(AT, cpool, AT); ++ lh(index, AT, sizeof(ConstantPool)); ++ } ++ Register resolved_klasses = cpool; ++ ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); ++ dsll(AT, index, Address::times_ptr); ++ daddu(AT, resolved_klasses, AT); ++ ld(klass, AT, Array::base_offset_in_bytes()); ++} ++ ++// Resets LVP to locals. Register sub_klass cannot be any of the above. ++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) { ++ assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" ); ++ assert( Rsub_klass != T1, "T1 holds 2ndary super array length" ); ++ assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" ); ++ // Profile the not-null value's klass. ++ // Here T9 and T1 are used as temporary registers. ++ profile_typecheck(T9, Rsub_klass, T1); // blows T9, reloads T1 ++ ++ // Do the check. ++ check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1 ++ ++ // Profile the failure of the check. ++ profile_typecheck_failed(T9); // blows T9 ++} ++ ++ ++ ++// Java Expression Stack ++ ++void InterpreterMacroAssembler::pop_ptr(Register r) { ++ ld(r, SP, 0); ++ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_i(Register r) { ++ lw(r, SP, 0); ++ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_l(Register r) { ++ ld(r, SP, 0); ++ daddiu(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_f(FloatRegister r) { ++ lwc1(r, SP, 0); ++ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_d(FloatRegister r) { ++ ldc1(r, SP, 0); ++ daddiu(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_ptr(Register r) { ++ daddiu(SP, SP, - Interpreter::stackElementSize); ++ sd(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_i(Register r) { ++ // For compatibility reason, don't change to sw. ++ daddiu(SP, SP, - Interpreter::stackElementSize); ++ sd(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_l(Register r) { ++ daddiu(SP, SP, -2 * Interpreter::stackElementSize); ++ sd(r, SP, 0); ++ sd(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_f(FloatRegister r) { ++ daddiu(SP, SP, - Interpreter::stackElementSize); ++ swc1(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_d(FloatRegister r) { ++ daddiu(SP, SP, -2 * Interpreter::stackElementSize); ++ sdc1(r, SP, 0); ++ sd(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop(TosState state) { ++ switch (state) { ++ case atos: pop_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: pop_i(); break; ++ case ltos: pop_l(); break; ++ case ftos: pop_f(); break; ++ case dtos: pop_d(); break; ++ case vtos: /* nothing to do */ break; ++ default: ShouldNotReachHere(); ++ } ++ verify_oop(FSR, state); ++} ++ ++//FSR=V0,SSR=V1 ++void InterpreterMacroAssembler::push(TosState state) { ++ verify_oop(FSR, state); ++ switch (state) { ++ case atos: push_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: push_i(); break; ++ case ltos: push_l(); break; ++ case ftos: push_f(); break; ++ case dtos: push_d(); break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++ ++ ++void InterpreterMacroAssembler::load_ptr(int n, Register val) { ++ ld(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++void InterpreterMacroAssembler::store_ptr(int n, Register val) { ++ sd(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++// Jump to from_interpreted entry of a call unless single stepping is possible ++// in this thread in which case we must call the i2i entry ++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { ++ // record last_sp ++ move(Rsender, SP); ++ sd(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++#ifndef OPT_THREAD ++ Register thread = temp; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ lw(AT, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(AT, R0, run_compiled_code); ++ delayed()->nop(); ++ ld(AT, method, in_bytes(Method::interpreter_entry_offset())); ++ jr(AT); ++ delayed()->nop(); ++ bind(run_compiled_code); ++ } ++ ++ ld(AT, method, in_bytes(Method::from_interpreted_offset())); ++ jr(AT); ++ delayed()->nop(); ++} ++ ++ ++// The following two routines provide a hook so that an implementation ++// can schedule the dispatch in two parts. mips64 does not do this. ++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { ++ // Nothing mips64 specific to be done here ++} ++ ++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { ++ dispatch_next(state, step); ++} ++ ++// assume the next bytecode in T8. ++void InterpreterMacroAssembler::dispatch_base(TosState state, ++ address* table, ++ bool verifyoop, ++ bool generate_poll) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ ++ if (VerifyActivationFrameSize) { ++ Label L; ++ ++ dsubu(T2, FP, SP); ++ int min_frame_size = (frame::link_offset - ++ frame::interpreter_frame_initial_sp_offset) * wordSize; ++ daddiu(T2, T2, -min_frame_size); ++ bgez(T2, L); ++ delayed()->nop(); ++ stop("broken stack frame"); ++ bind(L); ++ } ++ // FIXME: I do not know which register should pass to verify_oop ++ if (verifyoop) verify_oop(FSR, state); ++ dsll(T2, Rnext, LogBytesPerWord); ++ ++ Label safepoint; ++ address* const safepoint_table = Interpreter::safept_table(state); ++ bool needs_thread_local_poll = generate_poll && ++ SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; ++ ++ if (needs_thread_local_poll) { ++ NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); ++ ld(T3, thread, in_bytes(Thread::polling_page_offset())); ++ andi(T3, T3, SafepointMechanism::poll_bit()); ++ bne(T3, R0, safepoint); ++ delayed()->nop(); ++ } ++ ++ if((long)table >= (long)Interpreter::dispatch_table(btos) && ++ (long)table <= (long)Interpreter::dispatch_table(vtos) ++ ) { ++ int table_size = (long)Interpreter::dispatch_table(itos) - (long)Interpreter::dispatch_table(stos); ++ int table_offset = ((int)state - (int)itos) * table_size; ++ ++ // GP points to the starting address of Interpreter::dispatch_table(itos). ++ // See StubGenerator::generate_call_stub(address& return_address) for the initialization of GP. ++ if(table_offset != 0) { ++ daddiu(T3, GP, table_offset); ++ if (UseLEXT1) { ++ gsldx(T3, T2, T3, 0); ++ } else { ++ daddu(T3, T2, T3); ++ ld(T3, T3, 0); ++ } ++ } else { ++ if (UseLEXT1) { ++ gsldx(T3, T2, GP, 0); ++ } else { ++ daddu(T3, T2, GP); ++ ld(T3, T3, 0); ++ } ++ } ++ } else { ++ li(T3, (long)table); ++ if (UseLEXT1) { ++ gsldx(T3, T2, T3, 0); ++ } else { ++ daddu(T3, T2, T3); ++ ld(T3, T3, 0); ++ } ++ } ++ jr(T3); ++ delayed()->nop(); ++ ++ if (needs_thread_local_poll) { ++ bind(safepoint); ++ li(T3, (long)safepoint_table); ++ if (UseLEXT1) { ++ gsldx(T3, T2, T3, 0); ++ } else { ++ daddu(T3, T2, T3); ++ ld(T3, T3, 0); ++ } ++ jr(T3); ++ delayed()->nop(); ++ } ++} ++ ++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) { ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state)); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state), false); ++} ++ ++ ++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { ++ // load next bytecode (load before advancing r13 to prevent AGI) ++ lbu(Rnext, BCP, step); ++ increment(BCP, step); ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} ++ ++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { ++ // load current bytecode ++ lbu(Rnext, BCP, 0); ++ dispatch_base(state, table); ++} ++ ++// remove activation ++// ++// Unlock the receiver if this is a synchronized method. ++// Unlock any Java monitors from syncronized blocks. ++// Remove the activation from the stack. ++// ++// If there are locked Java monitors ++// If throw_monitor_exception ++// throws IllegalMonitorStateException ++// Else if install_monitor_exception ++// installs IllegalMonitorStateException ++// Else ++// no error processing ++// used registers : T1, T2, T3, T8 ++// T1 : thread, method access flags ++// T2 : monitor entry pointer ++// T3 : method, monitor top ++// T8 : unlock flag ++void InterpreterMacroAssembler::remove_activation( ++ TosState state, ++ Register ret_addr, ++ bool throw_monitor_exception, ++ bool install_monitor_exception, ++ bool notify_jvmdi) { ++ // Note: Registers V0, V1 and F0, F1 may be in use for the result ++ // check if synchronized method ++ Label unlocked, unlock, no_unlock; ++ ++ // get the value of _do_not_unlock_if_synchronized into T8 ++#ifndef OPT_THREAD ++ Register thread = T1; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ lb(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // reset the flag ++ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // get method access flags ++ ld(T3, FP, frame::interpreter_frame_method_offset * wordSize); ++ lw(T1, T3, in_bytes(Method::access_flags_offset())); ++ andi(T1, T1, JVM_ACC_SYNCHRONIZED); ++ beq(T1, R0, unlocked); ++ delayed()->nop(); ++ ++ // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. ++ bne(T8, R0, no_unlock); ++ delayed()->nop(); ++ // unlock monitor ++ push(state); // save result ++ ++ // BasicObjectLock will be first in list, since this is a ++ // synchronized method. However, need to check that the object has ++ // not been unlocked by an explicit monitorexit bytecode. ++ daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize ++ - (int)sizeof(BasicObjectLock)); ++ // address of first monitor ++ ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, unlock); ++ delayed()->nop(); ++ pop(state); ++ if (throw_monitor_exception) { ++ // Entry already unlocked, need to throw exception ++ // I think mips do not need empty_FPU_stack ++ // remove possible return value from FPU-stack, otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Monitor already unlocked during a stack unroll. If requested, ++ // install an illegal_monitor_state_exception. Continue with ++ // stack unrolling. ++ if (install_monitor_exception) { ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ ++ } ++ ++ b(unlocked); ++ delayed()->nop(); ++ } ++ ++ bind(unlock); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ // Check that for block-structured locking (i.e., that all locked ++ // objects has been unlocked) ++ bind(unlocked); ++ ++ // V0, V1: Might contain return value ++ ++ // Check that all monitors are unlocked ++ { ++ Label loop, exception, entry, restart; ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ const Address monitor_block_top(FP, ++ frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ ++ bind(restart); ++ // points to current entry, starting with top-most entry ++ ld(c_rarg0, monitor_block_top); ++ // points to word before bottom of monitor block ++ daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ b(entry); ++ delayed()->nop(); ++ ++ // Entry already locked, need to throw exception ++ bind(exception); ++ ++ if (throw_monitor_exception) { ++ // Throw exception ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Stack unrolling. Unlock object and install illegal_monitor_exception ++ // Unlock does not block, so don't have to worry about the frame ++ // We don't have to preserve c_rarg0, since we are going to ++ // throw an exception ++ ++ push(state); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ if (install_monitor_exception) { ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ } ++ ++ b(restart); ++ delayed()->nop(); ++ } ++ ++ bind(loop); ++ ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, exception);// check if current entry is used ++ delayed()->nop(); ++ ++ daddiu(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry ++ bind(entry); ++ bne(c_rarg0, T3, loop); // check if bottom reached ++ delayed()->nop(); // if not at bottom then check this entry ++ } ++ ++ bind(no_unlock); ++ ++ // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame) ++ if (notify_jvmdi) { ++ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA ++ } else { ++ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA ++ } ++ ++ // remove activation ++ ld(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ if (StackReservedPages > 0) { ++ // testing if reserved zone needs to be re-enabled ++ Label no_reserved_zone_enabling; ++ ++ ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); ++ dsubu(AT, TSR, AT); ++ blez(AT, no_reserved_zone_enabling); ++ delayed()->nop(); ++ ++ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_delayed_StackOverflowError)); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++ } ++ ld(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize); ++ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); ++ move(SP, TSR); // set sp to sender sp ++} ++ ++#endif // CC_INTERP ++ ++// Lock object ++// ++// Args: ++// c_rarg0: BasicObjectLock to be used for locking ++// ++// Kills: ++// T1 ++// T2 ++void InterpreterMacroAssembler::lock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ } else { ++ Label done, slow_case; ++ const Register tmp_reg = T2; ++ const Register scr_reg = T1; ++ const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); ++ const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); ++ const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Load object pointer into scr_reg ++ ld(scr_reg, lock_reg, obj_offset); ++ ++ if (UseBiasedLocking) { ++ // Note: we use noreg for the temporary register since it's hard ++ // to come up with a free register on all incoming code paths ++ biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case); ++ } ++ ++ // Load (object->mark() | 1) into tmp_reg ++ ld(AT, scr_reg, 0); ++ ori(tmp_reg, AT, 1); ++ ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ sd(tmp_reg, lock_reg, mark_offset); ++ ++ assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label succ, fail; ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail); ++ bind(succ); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ b(done); ++ delayed()->nop(); ++ bind(fail); ++ } else { ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done); ++ } ++ ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) SP <= mark < SP + os::pagesize() ++ // ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in tmp_reg as the result of cmpxchg ++ ++ dsubu(tmp_reg, tmp_reg, SP); ++ move(AT, 7 - os::vm_page_size()); ++ andr(tmp_reg, tmp_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ sd(tmp_reg, lock_reg, mark_offset); ++ if (PrintBiasedLockingStatistics) { ++ bne(tmp_reg, R0, slow_case); ++ delayed()->nop(); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ } ++ beq(tmp_reg, R0, done); ++ delayed()->nop(); ++ ++ bind(slow_case); ++ // Call the runtime routine for slow case ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ ++ bind(done); ++ } ++} ++ ++ ++// Unlocks an object. Used in monitorexit bytecode and ++// remove_activation. Throws an IllegalMonitorException if object is ++// not locked by current thread. ++// ++// Args: ++// c_rarg0: BasicObjectLock for lock ++// ++// Kills: ++// T1 ++// T2 ++// T3 ++// Throw an IllegalMonitorException if object is not locked by current thread ++void InterpreterMacroAssembler::unlock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ } else { ++ Label done; ++ ++ const Register tmp_reg = T1; ++ const Register scr_reg = T2; ++ const Register hdr_reg = T3; ++ ++ save_bcp(); // Save in case of exception ++ ++ // Convert from BasicObjectLock structure to object and BasicLock structure ++ // Store the BasicLock address into %T2 ++ daddiu(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes()); ++ ++ // Load oop into scr_reg(%T1) ++ ld(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ // free entry ++ sd(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ if (UseBiasedLocking) { ++ biased_locking_exit(scr_reg, hdr_reg, done); ++ } ++ ++ // Load the old header from BasicLock structure ++ ld(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes()); ++ // zero for recursive case ++ beq(hdr_reg, R0, done); ++ delayed()->nop(); ++ ++ // Atomic swap back the old header ++ cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done); ++ ++ // Call the runtime routine for slow case. ++ sd(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj ++ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), ++ lock_reg); ++ ++ bind(done); ++ ++ restore_bcp(); ++ } ++} ++ ++#ifndef CC_INTERP ++ ++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, ++ Label& zero_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ld(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++ beq(mdp, R0, zero_continue); ++ delayed()->nop(); ++} ++ ++ ++// Set the method data pointer for the current bcp. ++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Label set_mdp; ++ ++ // V0 and T0 will be used as two temporary registers. ++ push2(V0, T0); ++ ++ get_method(T0); ++ // Test MDO to avoid the call if it is NULL. ++ ld(V0, T0, in_bytes(Method::method_data_offset())); ++ beq(V0, R0, set_mdp); ++ delayed()->nop(); ++ ++ // method: T0 ++ // bcp: BCP --> S0 ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP); ++ // mdi: V0 ++ // mdo is guaranteed to be non-zero here, we checked for it before the call. ++ get_method(T0); ++ ld(T0, T0, in_bytes(Method::method_data_offset())); ++ daddiu(T0, T0, in_bytes(MethodData::data_offset())); ++ daddu(V0, T0, V0); ++ bind(set_mdp); ++ sd(V0, FP, frame::interpreter_frame_mdp_offset * wordSize); ++ pop2(V0, T0); ++} ++ ++void InterpreterMacroAssembler::verify_method_data_pointer() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++#ifdef ASSERT ++ Label verify_continue; ++ Register method = V0; ++ Register mdp = V1; ++ Register tmp = A0; ++ push(method); ++ push(mdp); ++ push(tmp); ++ test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue ++ get_method(method); ++ ++ // If the mdp is valid, it will point to a DataLayout header which is ++ // consistent with the bcp. The converse is highly probable also. ++ lhu(tmp, mdp, in_bytes(DataLayout::bci_offset())); ++ ld(AT, method, in_bytes(Method::const_offset())); ++ daddu(tmp, tmp, AT); ++ daddiu(tmp, tmp, in_bytes(ConstMethod::codes_offset())); ++ beq(tmp, BCP, verify_continue); ++ delayed()->nop(); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp); ++ bind(verify_continue); ++ pop(tmp); ++ pop(mdp); ++ pop(method); ++#endif // ASSERT ++} ++ ++ ++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, ++ int constant, ++ Register value) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Address data(mdp_in, constant); ++ sd(value, data); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ int constant, ++ bool decrement) { ++ // Counter address ++ Address data(mdp_in, constant); ++ ++ increment_mdp_data_at(data, decrement); ++} ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Address data, ++ bool decrement) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ // %%% this does 64bit counters at best it is wasting space ++ // at worst it is a rare bug when counters overflow ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Decrement the register. ++ ld(AT, data); ++ sltu(tmp, R0, AT); ++ dsubu(AT, AT, tmp); ++ sd(AT, data); ++ } else { ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Increment the register. ++ ld(AT, data); ++ daddiu(tmp, AT, DataLayout::counter_increment); ++ sltu(tmp, R0, tmp); ++ daddu(AT, AT, tmp); ++ sd(AT, data); ++ } ++ pop(tmp); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ Register reg, ++ int constant, ++ bool decrement) { ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ assert(Assembler::is_simm16(constant), "constant is not a simm16 !"); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Decrement the register. ++ daddu(tmp, mdp_in, reg); ++ ld(AT, tmp, constant); ++ sltu(tmp, R0, AT); ++ dsubu(AT, AT, tmp); ++ daddu(tmp, mdp_in, reg); ++ sd(AT, tmp, constant); ++ } else { ++ assert(Assembler::is_simm16(constant), "constant is not a simm16 !"); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Increment the register. ++ daddu(tmp, mdp_in, reg); ++ ld(AT, tmp, constant); ++ daddiu(tmp, AT, DataLayout::counter_increment); ++ sltu(tmp, R0, tmp); ++ daddu(AT, AT, tmp); ++ daddu(tmp, mdp_in, reg); ++ sd(AT, tmp, constant); ++ } ++ pop(tmp); ++} ++ ++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, ++ int flag_byte_constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ int header_offset = in_bytes(DataLayout::header_offset()); ++ int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant); ++ // Set the flag ++ lw(AT, Address(mdp_in, header_offset)); ++ if(Assembler::is_simm16(header_bits)) { ++ ori(AT, AT, header_bits); ++ } else { ++ push(T8); ++ // T8 is used as a temporary register. ++ move(T8, header_bits); ++ orr(AT, AT, T8); ++ pop(T8); ++ } ++ sw(AT, Address(mdp_in, header_offset)); ++} ++ ++ ++ ++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, ++ int offset, ++ Register value, ++ Register test_value_out, ++ Label& not_equal_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if (test_value_out == noreg) { ++ ld(AT, Address(mdp_in, offset)); ++ bne(AT, value, not_equal_continue); ++ delayed()->nop(); ++ } else { ++ // Put the test value into a register, so caller can use it: ++ ld(test_value_out, Address(mdp_in, offset)); ++ bne(value, test_value_out, not_equal_continue); ++ delayed()->nop(); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16"); ++ ld(AT, mdp_in, offset_of_disp); ++ daddu(mdp_in, mdp_in, AT); ++ sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ Register reg, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ daddu(AT, reg, mdp_in); ++ assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16"); ++ ld(AT, AT, offset_of_disp); ++ daddu(mdp_in, mdp_in, AT); ++ sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, ++ int constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if(Assembler::is_simm16(constant)) { ++ daddiu(mdp_in, mdp_in, constant); ++ } else { ++ move(AT, constant); ++ daddu(mdp_in, mdp_in, AT); ++ } ++ sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ push(return_bci); // save/restore across call_VM ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), ++ return_bci); ++ pop(return_bci); ++} ++ ++ ++void InterpreterMacroAssembler::profile_taken_branch(Register mdp, ++ Register bumped_count) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ // Otherwise, assign to mdp ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the taken count. ++ // We inline increment_mdp_data_at to return bumped_count in a register ++ //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); ++ ld(bumped_count, mdp, in_bytes(JumpData::taken_offset())); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ daddiu(AT, bumped_count, DataLayout::counter_increment); ++ sltu(AT, R0, AT); ++ daddu(bumped_count, bumped_count, AT); ++ sd(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the not taken count. ++ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); ++ ++ // The method data pointer needs to be updated to correspond to ++ // the next bytecode ++ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_final_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_virtual_call(Register receiver, ++ Register mdp, ++ Register reg2, ++ bool receiver_can_be_null) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label skip_receiver_profile; ++ if (receiver_can_be_null) { ++ Label not_null; ++ bne(receiver, R0, not_null); ++ delayed()->nop(); ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ beq(R0, R0, skip_receiver_profile); ++ delayed()->nop(); ++ bind(not_null); ++ } ++ ++ // Record the receiver type. ++ record_klass_in_profile(receiver, mdp, reg2, true); ++ bind(skip_receiver_profile); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++#if INCLUDE_JVMCI ++void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) { ++ assert_different_registers(method, mdp, reg2); ++ if (ProfileInterpreter && MethodProfileWidth > 0) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label done; ++ record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth, ++ &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset())); ++ bind(done); ++ ++ update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++#endif // INCLUDE_JVMCI ++ ++// This routine creates a state machine for updating the multi-row ++// type profile at a virtual call site (or other type-sensitive bytecode). ++// The machine visits each row (of receiver/count) until the receiver type ++// is found, or until it runs out of rows. At the same time, it remembers ++// the location of the first empty row. (An empty row records null for its ++// receiver, and can be allocated for a newly-observed receiver type.) ++// Because there are two degrees of freedom in the state, a simple linear ++// search will not work; it must be a decision tree. Hence this helper ++// function is recursive, to generate the required tree structured code. ++// It's the interpreter, so we are trading off code space for speed. ++// See below for example code. ++void InterpreterMacroAssembler::record_klass_in_profile_helper( ++ Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call) { ++ if (TypeProfileWidth == 0) { ++ if (is_virtual_call) { ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ } ++ return; ++ } ++ ++ int last_row = VirtualCallData::row_limit() - 1; ++ assert(start_row <= last_row, "must be work left to do"); ++ // Test this row for both the receiver and for null. ++ // Take any of three different outcomes: ++ // 1. found receiver => increment count and goto done ++ // 2. found null => keep looking for case 1, maybe allocate this cell ++ // 3. found something else => keep looking for cases 1 and 2 ++ // Case 3 is handled by a recursive call. ++ for (int row = start_row; row <= last_row; row++) { ++ Label next_test; ++ bool test_for_null_also = (row == start_row); ++ ++ // See if the receiver is receiver[n]. ++ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); ++ test_mdp_data_at(mdp, recvr_offset, receiver, ++ (test_for_null_also ? reg2 : noreg), ++ next_test); ++ // (Reg2 now contains the receiver from the CallData.) ++ ++ // The receiver is receiver[n]. Increment count[n]. ++ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); ++ increment_mdp_data_at(mdp, count_offset); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ bind(next_test); ++ ++ if (test_for_null_also) { ++ Label found_null; ++ // Failed the equality check on receiver[n]... Test for null. ++ if (start_row == last_row) { ++ // The only thing left to do is handle the null case. ++ if (is_virtual_call) { ++ beq(reg2, R0, found_null); ++ delayed()->nop(); ++ // Receiver did not match any saved receiver and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ bind(found_null); ++ } else { ++ bne(reg2, R0, done); ++ delayed()->nop(); ++ } ++ break; ++ } ++ // Since null is rare, make it be the branch-taken case. ++ beq(reg2, R0, found_null); ++ delayed()->nop(); ++ ++ // Put all the "Case 3" tests here. ++ record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call); ++ ++ // Found a null. Keep searching for a matching receiver, ++ // but remember that this is an empty (unused) slot. ++ bind(found_null); ++ } ++ } ++ ++ // In the fall-through case, we found no matching receiver, but we ++ // observed the receiver[start_row] is NULL. ++ ++ // Fill in the receiver field and increment the count. ++ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); ++ set_mdp_data_at(mdp, recvr_offset, receiver); ++ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); ++ move(reg2, DataLayout::counter_increment); ++ set_mdp_data_at(mdp, count_offset, reg2); ++ if (start_row > 0) { ++ beq(R0, R0, done); ++ delayed()->nop(); ++ } ++} ++ ++// Example state machine code for three profile rows: ++// // main copy of decision tree, rooted at row[1] ++// if (row[0].rec == rec) { row[0].incr(); goto done; } ++// if (row[0].rec != NULL) { ++// // inner copy of decision tree, rooted at row[1] ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[1].rec != NULL) { ++// // degenerate decision tree, rooted at row[2] ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// if (row[2].rec != NULL) { goto done; } // overflow ++// row[2].init(rec); goto done; ++// } else { ++// // remember row[1] is empty ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[1].init(rec); goto done; ++// } ++// } else { ++// // remember row[0] is empty ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[0].init(rec); goto done; ++// } ++// done: ++ ++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, ++ Register mdp, Register reg2, ++ bool is_virtual_call) { ++ assert(ProfileInterpreter, "must be profiling"); ++ Label done; ++ ++ record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); ++ ++ bind (done); ++} ++ ++void InterpreterMacroAssembler::profile_ret(Register return_bci, ++ Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ uint row; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the total ret count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ for (row = 0; row < RetData::row_limit(); row++) { ++ Label next_test; ++ ++ // See if return_bci is equal to bci[n]: ++ test_mdp_data_at(mdp, ++ in_bytes(RetData::bci_offset(row)), ++ return_bci, noreg, ++ next_test); ++ ++ // return_bci is equal to bci[n]. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, ++ in_bytes(RetData::bci_displacement_offset(row))); ++ beq(R0, R0, profile_continue); ++ delayed()->nop(); ++ bind(next_test); ++ } ++ ++ update_mdp_for_ret(return_bci); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_null_seen(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { ++ if (ProfileInterpreter && TypeProfileCasts) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int count_offset = in_bytes(CounterData::count_offset()); ++ // Back up the address, since we have already bumped the mdp. ++ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // *Decrement* the counter. We expect to see zero or small negatives. ++ increment_mdp_data_at(mdp, count_offset, true); ++ ++ bind (profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // Record the object type. ++ record_klass_in_profile(klass, mdp, reg2, false); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_default(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the default case count ++ increment_mdp_data_at(mdp, ++ in_bytes(MultiBranchData::default_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ in_bytes(MultiBranchData:: ++ default_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_case(Register index, ++ Register mdp, ++ Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Build the base (index * per_case_size_in_bytes()) + ++ // case_array_offset_in_bytes() ++ move(reg2, in_bytes(MultiBranchData::per_case_size())); ++ if (UseLEXT1) { ++ gsdmult(index, index, reg2); ++ } else { ++ dmult(index, reg2); ++ mflo(index); ++ } ++ daddiu(index, index, in_bytes(MultiBranchData::case_array_offset())); ++ ++ // Update the case count ++ increment_mdp_data_at(mdp, ++ index, ++ in_bytes(MultiBranchData::relative_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ index, ++ in_bytes(MultiBranchData:: ++ relative_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::narrow(Register result) { ++ ++ // Get method->_constMethod->_result_type ++ ld(T9, FP, frame::interpreter_frame_method_offset * wordSize); ++ ld(T9, T9, in_bytes(Method::const_offset())); ++ lbu(T9, T9, in_bytes(ConstMethod::result_type_offset())); ++ ++ Label done, notBool, notByte, notChar; ++ ++ // common case first ++ addiu(AT, T9, -T_INT); ++ beq(AT, R0, done); ++ delayed()->nop(); ++ ++ // mask integer result to narrower return type. ++ addiu(AT, T9, -T_BOOLEAN); ++ bne(AT, R0, notBool); ++ delayed()->nop(); ++ andi(result, result, 0x1); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(notBool); ++ addiu(AT, T9, -T_BYTE); ++ bne(AT, R0, notByte); ++ delayed()->nop(); ++ seb(result, result); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(notByte); ++ addiu(AT, T9, -T_CHAR); ++ bne(AT, R0, notChar); ++ delayed()->nop(); ++ andi(result, result, 0xFFFF); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(notChar); ++ seh(result, result); ++ ++ // Nothing to do for T_INT ++ bind(done); ++} ++ ++ ++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { ++ Label update, next, none; ++ ++ verify_oop(obj); ++ ++ if (mdo_addr.index() != noreg) { ++ guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !"); ++ guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !"); ++ push(T0); ++ dsll(T0, mdo_addr.index(), mdo_addr.scale()); ++ daddu(T0, T0, mdo_addr.base()); ++ } ++ ++ bne(obj, R0, update); ++ delayed()->nop(); ++ ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::null_seen); ++ if (mdo_addr.index() == noreg) { ++ sd(AT, mdo_addr); ++ } else { ++ sd(AT, T0, mdo_addr.disp()); ++ } ++ ++ beq(R0, R0, next); ++ delayed()->nop(); ++ ++ bind(update); ++ load_klass(obj, obj); ++ ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ dextm(AT, obj, 2, 62); ++ beq(AT, R0, next); ++ delayed()->nop(); ++ ++ andi(AT, obj, TypeEntries::type_unknown); ++ bne(AT, R0, next); ++ delayed()->nop(); ++ ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ beq(AT, R0, none); ++ delayed()->nop(); ++ ++ daddiu(AT, AT, -(TypeEntries::null_seen)); ++ beq(AT, R0, none); ++ delayed()->nop(); ++ ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ dextm(AT, obj, 2, 62); ++ beq(AT, R0, next); ++ delayed()->nop(); ++ ++ // different than before. Cannot keep accurate profile. ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::type_unknown); ++ if (mdo_addr.index() == noreg) { ++ sd(AT, mdo_addr); ++ } else { ++ sd(AT, T0, mdo_addr.disp()); ++ } ++ beq(R0, R0, next); ++ delayed()->nop(); ++ ++ bind(none); ++ // first time here. Set profile type. ++ if (mdo_addr.index() == noreg) { ++ sd(obj, mdo_addr); ++ } else { ++ sd(obj, T0, mdo_addr.disp()); ++ } ++ ++ bind(next); ++ if (mdo_addr.index() != noreg) { ++ pop(T0); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { ++ if (!ProfileInterpreter) { ++ return; ++ } ++ ++ if (MethodData::profile_arguments() || MethodData::profile_return()) { ++ Label profile_continue; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); ++ ++ lb(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start); ++ li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); ++ bne(tmp, AT, profile_continue); ++ delayed()->nop(); ++ ++ ++ if (MethodData::profile_arguments()) { ++ Label done; ++ int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); ++ if (Assembler::is_simm16(off_to_args)) { ++ daddiu(mdp, mdp, off_to_args); ++ } else { ++ move(AT, off_to_args); ++ daddu(mdp, mdp, AT); ++ } ++ ++ ++ for (int i = 0; i < TypeProfileArgsLimit; i++) { ++ if (i > 0 || MethodData::profile_return()) { ++ // If return value type is profiled we may have no argument to profile ++ ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ if (Assembler::is_simm16(-1 * i * TypeStackSlotEntries::per_arg_count())) { ++ addiu32(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count()); ++ } else { ++ li(AT, i*TypeStackSlotEntries::per_arg_count()); ++ subu32(tmp, tmp, AT); ++ } ++ ++ li(AT, TypeStackSlotEntries::per_arg_count()); ++ slt(AT, tmp, AT); ++ bne(AT, R0, done); ++ delayed()->nop(); ++ } ++ ld(tmp, callee, in_bytes(Method::const_offset())); ++ ++ lhu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // stack offset o (zero based) from the start of the argument ++ // list, for n arguments translates into offset n - o - 1 from ++ // the end of the argument list ++ ld(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args); ++ subu(tmp, tmp, AT); ++ ++ addiu32(tmp, tmp, -1); ++ ++ Address arg_addr = argument_address(tmp); ++ ld(tmp, arg_addr); ++ ++ Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); ++ profile_obj_type(tmp, mdo_arg_addr); ++ ++ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); ++ if (Assembler::is_simm16(to_add)) { ++ daddiu(mdp, mdp, to_add); ++ } else { ++ move(AT, to_add); ++ daddu(mdp, mdp, AT); ++ } ++ ++ off_to_args += to_add; ++ } ++ ++ if (MethodData::profile_return()) { ++ ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(); ++ if (Assembler::is_simm16(-1 * tmp_arg_counts)) { ++ addiu32(tmp, tmp, -1 * tmp_arg_counts); ++ } else { ++ move(AT, tmp_arg_counts); ++ subu32(mdp, mdp, AT); ++ } ++ } ++ ++ bind(done); ++ ++ if (MethodData::profile_return()) { ++ // We're right after the type profile for the last ++ // argument. tmp is the number of cells left in the ++ // CallTypeData/VirtualCallTypeData to reach its end. Non null ++ // if there's a return to profile. ++ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); ++ sll(tmp, tmp, exact_log2(DataLayout::cell_size)); ++ daddu(mdp, mdp, tmp); ++ } ++ sd(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize); ++ } else { ++ assert(MethodData::profile_return(), "either profile call args or call ret"); ++ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); ++ } ++ ++ // mdp points right after the end of the ++ // CallTypeData/VirtualCallTypeData, right after the cells for the ++ // return value type if there's one ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { ++ assert_different_registers(mdp, ret, tmp, _bcp_register); ++ if (ProfileInterpreter && MethodData::profile_return()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ if (MethodData::profile_return_jsr292_only()) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ // If we don't profile all invoke bytecodes we must make sure ++ // it's a bytecode we indeed profile. We can't go back to the ++ // begining of the ProfileData we intend to update to check its ++ // type because we're right after it and we don't known its ++ // length ++ Label do_profile; ++ lb(tmp, _bcp_register, 0); ++ daddiu(AT, tmp, -1 * Bytecodes::_invokedynamic); ++ beq(AT, R0, do_profile); ++ delayed()->daddiu(AT, tmp, -1 * Bytecodes::_invokehandle); ++ beq(AT, R0, do_profile); ++ delayed()->nop(); ++ ++ get_method(tmp); ++ lhu(tmp, tmp, Method::intrinsic_id_offset_in_bytes()); ++ li(AT, vmIntrinsics::_compiledLambdaForm); ++ bne(tmp, AT, profile_continue); ++ delayed()->nop(); ++ ++ bind(do_profile); ++ } ++ ++ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); ++ daddu(tmp, ret, R0); ++ profile_obj_type(tmp, mdo_ret_addr); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { ++ guarantee(T9 == tmp1, "You are reqired to use T9 as the index register for MIPS !"); ++ ++ if (ProfileInterpreter && MethodData::profile_parameters()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Load the offset of the area within the MDO used for ++ // parameters. If it's negative we're not profiling any parameters ++ lw(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())); ++ bltz(tmp1, profile_continue); ++ delayed()->nop(); ++ ++ // Compute a pointer to the area for parameters from the offset ++ // and move the pointer to the slot for the last ++ // parameters. Collect profiling from last parameter down. ++ // mdo start + parameters offset + array length - 1 ++ daddu(mdp, mdp, tmp1); ++ ld(tmp1, mdp, in_bytes(ArrayData::array_len_offset())); ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ ++ ++ Label loop; ++ bind(loop); ++ ++ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); ++ int type_base = in_bytes(ParametersTypeData::type_offset(0)); ++ Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size); ++ Address arg_type(mdp, tmp1, per_arg_scale, type_base); ++ ++ // load offset on the stack from the slot for this parameter ++ dsll(AT, tmp1, per_arg_scale); ++ daddu(AT, AT, mdp); ++ ld(tmp2, AT, off_base); ++ ++ subu(tmp2, R0, tmp2); ++ ++ // read the parameter from the local area ++ dsll(AT, tmp2, Interpreter::logStackElementSize); ++ daddu(AT, AT, _locals_register); ++ ld(tmp2, AT, 0); ++ ++ // profile the parameter ++ profile_obj_type(tmp2, arg_type); ++ ++ // go to next parameter ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ bgtz(tmp1, loop); ++ delayed()->nop(); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { ++ if (state == atos) { ++ MacroAssembler::verify_oop(reg); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ++} ++#endif // !CC_INTERP ++ ++ ++void InterpreterMacroAssembler::notify_method_entry() { ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label L; ++ lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, L); ++ delayed()->nop(); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_method_entry)); ++ bind(L); ++ } ++ ++ { ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ //Rthread, ++ thread, ++ //Rmethod); ++ S3); ++ } ++ ++} ++ ++void InterpreterMacroAssembler::notify_method_exit( ++ TosState state, NotifyMethodExitMode mode) { ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { ++ Label skip; ++ // Note: frame::interpreter_frame_result has a dependency on how the ++ // method result is saved across the call to post_method_exit. If this ++ // is changed then the interpreter_frame_result implementation will ++ // need to be updated too. ++ ++ // template interpreter will leave it on the top of the stack. ++ push(state); ++ lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, skip); ++ delayed()->nop(); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); ++ bind(skip); ++ pop(state); ++ } ++ ++ { ++ // Dtrace notification ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ push(state); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ //Rthread, Rmethod); ++ thread, S3); ++ pop(state); ++ } ++} ++ ++// Jump if ((*counter_addr += increment) & mask) satisfies the condition. ++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, ++ int increment, int mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where) { ++ assert_different_registers(scratch, AT); ++ ++ if (!preloaded) { ++ lw(scratch, counter_addr); ++ } ++ addiu32(scratch, scratch, increment); ++ sw(scratch, counter_addr); ++ ++ move(AT, mask); ++ andr(scratch, scratch, AT); ++ ++ if (cond == Assembler::zero) { ++ beq(scratch, R0, *where); ++ delayed()->nop(); ++ } else { ++ unimplemented(); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/interp_masm_mips.hpp b/src/hotspot/cpu/mips/interp_masm_mips.hpp +--- a/src/hotspot/cpu/mips/interp_masm_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/interp_masm_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,276 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP ++#define CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP ++ ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "interpreter/invocationCounter.hpp" ++#include "runtime/frame.hpp" ++ ++// This file specializes the assember with interpreter-specific macros ++ ++ ++class InterpreterMacroAssembler: public MacroAssembler { ++#ifndef CC_INTERP ++ private: ++ ++ Register _locals_register; // register that contains the pointer to the locals ++ Register _bcp_register; // register that contains the bcp ++ ++ protected: ++ // Interpreter specific version of call_VM_base ++ virtual void call_VM_leaf_base(address entry_point, ++ int number_of_arguments); ++ ++ virtual void call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions); ++ ++ // base routine for all dispatches ++ void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false); ++#endif // CC_INTERP ++ ++ public: ++ void jump_to_entry(address entry); ++ // narrow int return value ++ void narrow(Register result); ++ ++ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {} ++ ++ void get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset); ++ void get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset); ++ ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ void load_earlyret_value(TosState state); ++ ++#ifdef CC_INTERP ++ void save_bcp() { /* not needed in c++ interpreter and harmless */ } ++ void restore_bcp() { /* not needed in c++ interpreter and harmless */ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg); ++ ++#else ++ ++ // Interpreter-specific registers ++ void save_bcp() { ++ sd(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); ++ } ++ ++ void restore_bcp() { ++ ld(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); ++ } ++ ++ void restore_locals() { ++ ld(LVP, FP, frame::interpreter_frame_locals_offset * wordSize); ++ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg) { ++ ld(reg, FP, frame::interpreter_frame_method_offset * wordSize); ++ } ++ ++ void get_const(Register reg){ ++ get_method(reg); ++ ld(reg, reg, in_bytes(Method::const_offset())); ++ } ++ ++ void get_constant_pool(Register reg) { ++ get_const(reg); ++ ld(reg, reg, in_bytes(ConstMethod::constants_offset())); ++ } ++ ++ void get_constant_pool_cache(Register reg) { ++ get_constant_pool(reg); ++ ld(reg, reg, ConstantPool::cache_offset_in_bytes()); ++ } ++ ++ void get_cpool_and_tags(Register cpool, Register tags) { ++ get_constant_pool(cpool); ++ ld(tags, cpool, ConstantPool::tags_offset_in_bytes()); ++ } ++ ++ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); ++ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_method_counters(Register method, Register mcs, Label& skip); ++ ++ // load cpool->resolved_references(index); ++ void load_resolved_reference_at_index(Register result, Register index, Register tmp); ++ ++ // load cpool->resolved_klass_at(index) ++ void load_resolved_klass_at_index(Register cpool, // the constant pool (corrupted on return) ++ Register index, // the constant pool index (corrupted on return) ++ Register klass); // contains the Klass on return ++ ++ void pop_ptr( Register r = FSR); ++ void pop_i( Register r = FSR); ++ void pop_l( Register r = FSR); ++ void pop_f(FloatRegister r = FSF); ++ void pop_d(FloatRegister r = FSF); ++ ++ void push_ptr( Register r = FSR); ++ void push_i( Register r = FSR); ++ void push_l( Register r = FSR); ++ void push_f(FloatRegister r = FSF); ++ void push_d(FloatRegister r = FSF); ++ ++ void pop(Register r ) { ((MacroAssembler*)this)->pop(r); } ++ ++ void push(Register r ) { ((MacroAssembler*)this)->push(r); } ++ ++ void pop(TosState state); // transition vtos -> state ++ void push(TosState state); // transition state -> vtos ++ ++ void empty_expression_stack() { ++ ld(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // NULL last_sp until next java call ++ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ } ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void load_ptr(int n, Register val); ++ void store_ptr(int n, Register val); ++ ++ // Generate a subtype check: branch to ok_is_subtype if sub_klass is ++ // a subtype of super_klass. ++ //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); ++ void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype ); ++ ++ // Dispatching ++ void dispatch_prolog(TosState state, int step = 0); ++ void dispatch_epilog(TosState state, int step = 0); ++ void dispatch_only(TosState state, bool generate_poll = false); ++ void dispatch_only_normal(TosState state); ++ void dispatch_only_noverify(TosState state); ++ void dispatch_next(TosState state, int step = 0, bool generate_poll = false); ++ void dispatch_via (TosState state, address* table); ++ ++ // jump to an invoked target ++ void prepare_to_jump_from_interpreted(); ++ void jump_from_interpreted(Register method, Register temp); ++ ++ ++ // Returning from interpreted functions ++ // ++ // Removes the current activation (incl. unlocking of monitors) ++ // and sets up the return address. This code is also used for ++ // exception unwindwing. In that case, we do not want to throw ++ // IllegalMonitorStateExceptions, since that might get us into an ++ // infinite rethrow exception loop. ++ // Additionally this code is used for popFrame and earlyReturn. ++ // In popFrame case we want to skip throwing an exception, ++ // installing an exception, and notifying jvmdi. ++ // In earlyReturn case we only want to skip throwing an exception ++ // and installing an exception. ++ void remove_activation(TosState state, Register ret_addr, ++ bool throw_monitor_exception = true, ++ bool install_monitor_exception = true, ++ bool notify_jvmdi = true); ++#endif // CC_INTERP ++ ++ // Object locking ++ void lock_object (Register lock_reg); ++ void unlock_object(Register lock_reg); ++ ++#ifndef CC_INTERP ++ ++ // Interpreter profiling operations ++ void set_method_data_pointer_for_bcp(); ++ void test_method_data_pointer(Register mdp, Label& zero_continue); ++ void verify_method_data_pointer(); ++ ++ void set_mdp_data_at(Register mdp_in, int constant, Register value); ++ void increment_mdp_data_at(Address data, bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, int constant, ++ bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, Register reg, int constant, ++ bool decrement = false); ++ void increment_mask_and_jump(Address counter_addr, ++ int increment, int mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where); ++ void set_mdp_flag_at(Register mdp_in, int flag_constant); ++ void test_mdp_data_at(Register mdp_in, int offset, Register value, ++ Register test_value_out, ++ Label& not_equal_continue); ++ ++ void record_klass_in_profile(Register receiver, Register mdp, ++ Register reg2, bool is_virtual_call); ++ void record_klass_in_profile_helper(Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call); ++ ++ void update_mdp_by_offset(Register mdp_in, int offset_of_offset); ++ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); ++ void update_mdp_by_constant(Register mdp_in, int constant); ++ void update_mdp_for_ret(Register return_bci); ++ ++ void profile_taken_branch(Register mdp, Register bumped_count); ++ void profile_not_taken_branch(Register mdp); ++ void profile_call(Register mdp); ++ void profile_final_call(Register mdp); ++ void profile_virtual_call(Register receiver, Register mdp, ++ Register scratch2, ++ bool receiver_can_be_null = false); ++ void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN; ++ void profile_ret(Register return_bci, Register mdp); ++ void profile_null_seen(Register mdp); ++ void profile_typecheck(Register mdp, Register klass, Register scratch); ++ void profile_typecheck_failed(Register mdp); ++ void profile_switch_default(Register mdp); ++ void profile_switch_case(Register index_in_scratch, Register mdp, ++ Register scratch2); ++ ++ // Debugging ++ // only if +VerifyOops && state == atos ++ void verify_oop(Register reg, TosState state = atos); ++ // only if +VerifyFPU && (state == ftos || state == dtos) ++ void verify_FPU(int stack_depth, TosState state = ftos); ++ ++ void profile_obj_type(Register obj, const Address& mdo_addr); ++ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); ++ void profile_return_type(Register mdp, Register ret, Register tmp); ++ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); ++#endif // !CC_INTERP ++ ++ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; ++ ++ // support for jvmti/dtrace ++ void notify_method_entry(); ++ void notify_method_exit(TosState state, NotifyMethodExitMode mode); ++}; ++ ++#endif // CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp +--- a/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,252 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "memory/universe.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/signature.hpp" ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of SignatureHandlerGenerator ++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( ++ const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { ++ _masm = new MacroAssembler(buffer); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { ++ __ ld(temp(), from(), Interpreter::local_offset_in_bytes(from_offset)); ++ __ sd(temp(), to(), to_offset * longSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) { ++ __ addiu(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) ); ++ __ lw(AT, from(), Interpreter::local_offset_in_bytes(from_offset) ); ++ ++ __ movz(temp(), R0, AT); ++ __ sw(temp(), to(), to_offset * wordSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { ++ // generate code to handle arguments ++ iterate(fingerprint); ++ // return result handler ++ __ li(V0, AbstractInterpreter::result_handler(method()->result_type())); ++ // return ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ __ flush(); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ lw(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ sw(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ ld(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ sd(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { ++ Argument jni_arg(jni_offset()); ++ ++ // the handle for a receiver will never be null ++ bool do_NULL_check = offset() != 0 || is_static(); ++ if (do_NULL_check) { ++ __ ld(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ daddiu((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ movz((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), R0, AT); ++ } else { ++ __ daddiu(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset())); ++ } ++ ++ if (!jni_arg.is_Register()) ++ __ sd(temp(), jni_arg.as_caller_address()); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ lwc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ sw(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ ldc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ sd(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++ ++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return LVP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::to() { return SP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return T8; } ++ ++// Implementation of SignatureHandlerLibrary ++ ++void SignatureHandlerLibrary::pd_set_handler(address handler) {} ++ ++ ++class SlowSignatureHandler ++ : public NativeSignatureIterator { ++ private: ++ address _from; ++ intptr_t* _to; ++ intptr_t* _reg_args; ++ intptr_t* _fp_identifiers; ++ unsigned int _num_args; ++ ++ virtual void pass_int() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_register_parameters) { ++ *_reg_args++ = from_obj; ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_long() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2 * Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_register_parameters) { ++ *_reg_args++ = from_obj; ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_object() ++ { ++ intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ if (_num_args < Argument::n_register_parameters) { ++ *_reg_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ _num_args++; ++ } else { ++ *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ } ++ } ++ ++ virtual void pass_float() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_float_register_parameters) { ++ *_reg_args++ = from_obj; ++ *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_double() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2*Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_float_register_parameters) { ++ *_reg_args++ = from_obj; ++ *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ public: ++ SlowSignatureHandler(methodHandle method, address from, intptr_t* to) ++ : NativeSignatureIterator(method) ++ { ++ _from = from; ++ _to = to; ++ ++ // see TemplateInterpreterGenerator::generate_slow_signature_handler() ++ _reg_args = to - Argument::n_register_parameters + jni_offset() - 1; ++ _fp_identifiers = to - 1; ++ *(int*) _fp_identifiers = 0; ++ _num_args = jni_offset(); ++ } ++}; ++ ++ ++IRT_ENTRY(address, ++ InterpreterRuntime::slow_signature_handler(JavaThread* thread, ++ Method* method, ++ intptr_t* from, ++ intptr_t* to)) ++ methodHandle m(thread, (Method*)method); ++ assert(m->is_native(), "sanity check"); ++ ++ // handle arguments ++ SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1)); ++ ++ // return result handler ++ return Interpreter::result_handler(m->result_type()); ++IRT_END +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/interpreterRT_mips.hpp b/src/hotspot/cpu/mips/interpreterRT_mips.hpp +--- a/src/hotspot/cpu/mips/interpreterRT_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/interpreterRT_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,60 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP ++#define CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP ++ ++// This is included in the middle of class Interpreter. ++// Do not include files here. ++ ++// native method calls ++ ++class SignatureHandlerGenerator: public NativeSignatureIterator { ++ private: ++ MacroAssembler* _masm; ++ ++ void move(int from_offset, int to_offset); ++ ++ void box(int from_offset, int to_offset); ++ void pass_int(); ++ void pass_long(); ++ void pass_object(); ++ void pass_float(); ++ void pass_double(); ++ ++ public: ++ // Creation ++ SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); ++ ++ // Code generation ++ void generate(uint64_t fingerprint); ++ ++ // Code generation support ++ static Register from(); ++ static Register to(); ++ static Register temp(); ++}; ++ ++#endif // CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp +--- a/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,87 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP ++#define CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP ++ ++private: ++ ++ // FP value associated with _last_Java_sp: ++ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to ++ ++public: ++ // Each arch must define reset, save, restore ++ // These are used by objects that only care about: ++ // 1 - initializing a new state (thread creation, javaCalls) ++ // 2 - saving a current state (javaCalls) ++ // 3 - restoring an old state (javaCalls) ++ ++ void clear(void) { ++ // clearing _last_Java_sp must be first ++ _last_Java_sp = NULL; ++ // fence? ++ _last_Java_fp = NULL; ++ _last_Java_pc = NULL; ++ } ++ ++ void copy(JavaFrameAnchor* src) { ++ // In order to make sure the transition state is valid for "this" ++ // We must clear _last_Java_sp before copying the rest of the new data ++ // ++ // Hack Alert: Temporary bugfix for 4717480/4721647 ++ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp ++ // unless the value is changing ++ // ++ if (_last_Java_sp != src->_last_Java_sp) ++ _last_Java_sp = NULL; ++ ++ _last_Java_fp = src->_last_Java_fp; ++ _last_Java_pc = src->_last_Java_pc; ++ // Must be last so profiler will always see valid frame if has_last_frame() is true ++ _last_Java_sp = src->_last_Java_sp; ++ } ++ ++ // Always walkable ++ bool walkable(void) { return true; } ++ // Never any thing to do since we are always walkable and can find address of return addresses ++ void make_walkable(JavaThread* thread) { } ++ ++ intptr_t* last_Java_sp(void) const { return _last_Java_sp; } ++ ++ address last_Java_pc(void) { return _last_Java_pc; } ++ ++private: ++ ++ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } ++ ++public: ++ ++ void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } ++ ++ intptr_t* last_Java_fp(void) { return _last_Java_fp; } ++ // Assert (last_Java_sp == NULL || fp == NULL) ++ void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } ++ ++#endif // CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp +--- a/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,167 @@ ++/* ++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/codeBlob.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/safepoint.hpp" ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#define BUFFER_SIZE 30*wordSize ++ ++// Instead of issuing lfence for LoadLoad barrier, we create data dependency ++// between loads, which is more efficient than lfence. ++ ++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { ++ const char *name = NULL; ++ switch (type) { ++ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; ++ case T_BYTE: name = "jni_fast_GetByteField"; break; ++ case T_CHAR: name = "jni_fast_GetCharField"; break; ++ case T_SHORT: name = "jni_fast_GetShortField"; break; ++ case T_INT: name = "jni_fast_GetIntField"; break; ++ case T_LONG: name = "jni_fast_GetLongField"; break; ++ case T_FLOAT: name = "jni_fast_GetFloatField"; break; ++ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; ++ default: ShouldNotReachHere(); ++ } ++ ResourceMark rm; ++ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); ++ CodeBuffer cbuf(blob); ++ MacroAssembler* masm = new MacroAssembler(&cbuf); ++ address fast_entry = __ pc(); ++ ++ Label slow; ++ ++ // return pc RA ++ // jni env A0 ++ // obj A1 ++ // jfieldID A2 ++ ++ address counter_addr = SafepointSynchronize::safepoint_counter_addr(); ++ __ set64(AT, (long)counter_addr); ++ __ lw(T1, AT, 0); ++ ++ // Parameters(A0~A3) should not be modified, since they will be used in slow path ++ __ andi(AT, T1, 1); ++ __ bne(AT, R0, slow); ++ __ delayed()->nop(); ++ ++ __ move(T0, A1); ++ // Both T0 and T9 are clobbered by try_resolve_jobject_in_native. ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->try_resolve_jobject_in_native(masm, /* jni_env */ A0, T0, T9, slow); ++ ++ __ dsrl(T2, A2, 2); // offset ++ __ daddu(T0, T0, T2); ++ ++ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); ++ speculative_load_pclist[count] = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ lbu (V0, T0, 0); break; ++ case T_BYTE: __ lb (V0, T0, 0); break; ++ case T_CHAR: __ lhu (V0, T0, 0); break; ++ case T_SHORT: __ lh (V0, T0, 0); break; ++ case T_INT: __ lw (V0, T0, 0); break; ++ case T_LONG: __ ld (V0, T0, 0); break; ++ case T_FLOAT: __ lwc1(F0, T0, 0); break; ++ case T_DOUBLE: __ ldc1(F0, T0, 0); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ set64(AT, (long)counter_addr); ++ __ lw(AT, AT, 0); ++ __ bne(T1, AT, slow); ++ __ delayed()->nop(); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ slowcase_entry_pclist[count++] = __ pc(); ++ __ bind (slow); ++ address slow_case_addr = NULL; ++ switch (type) { ++ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; ++ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; ++ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; ++ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; ++ case T_INT: slow_case_addr = jni_GetIntField_addr(); break; ++ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; ++ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; ++ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; ++ default: ShouldNotReachHere(); ++ } ++ __ jmp(slow_case_addr); ++ __ delayed()->nop(); ++ ++ __ flush (); ++ ++ return fast_entry; ++} ++ ++address JNI_FastGetField::generate_fast_get_boolean_field() { ++ return generate_fast_get_int_field0(T_BOOLEAN); ++} ++ ++address JNI_FastGetField::generate_fast_get_byte_field() { ++ return generate_fast_get_int_field0(T_BYTE); ++} ++ ++address JNI_FastGetField::generate_fast_get_char_field() { ++ return generate_fast_get_int_field0(T_CHAR); ++} ++ ++address JNI_FastGetField::generate_fast_get_short_field() { ++ return generate_fast_get_int_field0(T_SHORT); ++} ++ ++address JNI_FastGetField::generate_fast_get_int_field() { ++ return generate_fast_get_int_field0(T_INT); ++} ++ ++address JNI_FastGetField::generate_fast_get_long_field() { ++ return generate_fast_get_int_field0(T_LONG); ++} ++ ++address JNI_FastGetField::generate_fast_get_float_field() { ++ return generate_fast_get_int_field0(T_FLOAT); ++} ++ ++address JNI_FastGetField::generate_fast_get_double_field() { ++ return generate_fast_get_int_field0(T_DOUBLE); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/jniTypes_mips.hpp b/src/hotspot/cpu/mips/jniTypes_mips.hpp +--- a/src/hotspot/cpu/mips/jniTypes_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/jniTypes_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,144 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_JNITYPES_MIPS_HPP ++#define CPU_MIPS_VM_JNITYPES_MIPS_HPP ++ ++#include "jni.h" ++#include "memory/allocation.hpp" ++#include "oops/oop.hpp" ++ ++// This file holds platform-dependent routines used to write primitive jni ++// types to the array of arguments passed into JavaCalls::call ++ ++class JNITypes : AllStatic { ++ // These functions write a java primitive type (in native format) ++ // to a java stack slot array to be passed as an argument to JavaCalls:calls. ++ // I.e., they are functionally 'push' operations if they have a 'pos' ++ // formal parameter. Note that jlong's and jdouble's are written ++ // _in reverse_ of the order in which they appear in the interpreter ++ // stack. This is because call stubs (see stubGenerator_sparc.cpp) ++ // reverse the argument list constructed by JavaCallArguments (see ++ // javaCalls.hpp). ++ ++private: ++ ++ // 32bit Helper routines. ++ static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; ++ *(jint *)(to ) = from[0]; } ++ static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } ++ ++public: ++ // In MIPS64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[] ++ // is 8 bytes. ++ // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values. ++ // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded. ++ // This error occurs in ReflectInvoke.java ++ // The parameter of DD(int) should be 4 instead of 0x550000004. ++ // ++ // See: [runtime/javaCalls.hpp] ++ ++ static inline void put_int(jint from, intptr_t *to) { *(intptr_t *)(to + 0 ) = from; } ++ static inline void put_int(jint from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = from; } ++ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; } ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_long(jlong from, intptr_t *to) { ++ *(jlong*) (to + 1) = from; ++ *(jlong*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_long(jlong from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = from; ++ *(jlong*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_long(jlong *from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = *from; ++ *(jlong*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // Oops are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } ++ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } ++ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } ++ ++ // Floats are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } ++ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } ++ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } ++ ++#undef _JNI_SLOT_OFFSET ++#define _JNI_SLOT_OFFSET 0 ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_double(jdouble from, intptr_t *to) { ++ *(jdouble*) (to + 1) = from; ++ *(jdouble*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_double(jdouble from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = from; ++ *(jdouble*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = *from; ++ *(jdouble*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // The get_xxx routines, on the other hand, actually _do_ fetch ++ // java primitive types from the interpreter stack. ++ static inline jint get_int (intptr_t *from) { return *(jint *) from; } ++ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } ++ static inline oop get_obj (intptr_t *from) { return *(oop *) from; } ++ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } ++ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } ++#undef _JNI_SLOT_OFFSET ++}; ++ ++#endif // CPU_MIPS_VM_JNITYPES_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/macroAssembler_mips.cpp b/src/hotspot/cpu/mips/macroAssembler_mips.cpp +--- a/src/hotspot/cpu/mips/macroAssembler_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/macroAssembler_mips.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,4257 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "jvm.h" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "compiler/disassembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "memory/universe.hpp" ++#include "nativeInst_mips.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/safepoint.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++ ++#ifdef COMPILER2 ++#include "opto/intrinsicnode.hpp" ++#endif ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of MacroAssembler ++ ++intptr_t MacroAssembler::i[32] = {0}; ++float MacroAssembler::f[32] = {0.0}; ++ ++void MacroAssembler::print(outputStream *s) { ++ unsigned int k; ++ for(k=0; kprint_cr("i%d = 0x%.16lx", k, i[k]); ++ } ++ s->cr(); ++ ++ for(k=0; kprint_cr("f%d = %f", k, f[k]); ++ } ++ s->cr(); ++} ++ ++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } ++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } ++ ++void MacroAssembler::save_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ sw (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ swc1 (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++void MacroAssembler::restore_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ lw (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ lwc1 (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++ ++void MacroAssembler::pd_patch_instruction(address branch, address target) { ++ jint& stub_inst = *(jint*) branch; ++ jint *pc = (jint *)branch; ++ ++ if((opcode(stub_inst) == special_op) && (special(stub_inst) == daddu_op)) { ++ //b_far: ++ // move(AT, RA); // daddu ++ // emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); ++ // nop(); ++ // lui(T9, 0); // to be patched ++ // ori(T9, 0); ++ // daddu(T9, T9, RA); ++ // move(RA, AT); ++ // jr(T9); ++ ++ assert(opcode(pc[3]) == lui_op ++ && opcode(pc[4]) == ori_op ++ && special(pc[5]) == daddu_op, "Not a branch label patch"); ++ if(!(opcode(pc[3]) == lui_op ++ && opcode(pc[4]) == ori_op ++ && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); } ++ ++ int offset = target - branch; ++ if (!is_simm16(offset)) { ++ pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12); ++ pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12); ++ } else { ++ // revert to "beq + nop" ++ CodeBuffer cb(branch, 4 * 10); ++ MacroAssembler masm(&cb); ++#define __ masm. ++ __ b(target); ++ __ delayed()->nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ } ++ return; ++ } else if (special(pc[4]) == jr_op ++ && opcode(pc[4]) == special_op ++ && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) { ++ //jmp_far: ++ // patchable_set48(T9, target); ++ // jr(T9); ++ // nop(); ++ ++ CodeBuffer cb(branch, 4 * 4); ++ MacroAssembler masm(&cb); ++ masm.patchable_set48(T9, (long)(target)); ++ return; ++ } ++ ++#ifndef PRODUCT ++ if (!is_simm16((target - branch - 4) >> 2)) { ++ tty->print_cr("Illegal patching: branch = " INTPTR_FORMAT ", target = " INTPTR_FORMAT, p2i(branch), p2i(target)); ++ tty->print_cr("======= Start decoding at branch = " INTPTR_FORMAT " =======", p2i(branch)); ++ Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty); ++ tty->print_cr("======= End of decoding ======="); ++ } ++#endif ++ ++ stub_inst = patched_branch(target - branch, stub_inst, 0); ++} ++ ++static inline address first_cache_address() { ++ return CodeCache::low_bound() + sizeof(HeapBlock::Header); ++} ++ ++static inline address last_cache_address() { ++ return CodeCache::high_bound() - Assembler::InstructionSize; ++} ++ ++int MacroAssembler::call_size(address target, bool far, bool patchable) { ++ if (patchable) return 6 << Assembler::LogInstructionSize; ++ if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop ++ return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize; ++} ++ ++// Can we reach target using jal/j from anywhere ++// in the code cache (because code can be relocated)? ++bool MacroAssembler::reachable_from_cache(address target) { ++ address cl = first_cache_address(); ++ address ch = last_cache_address(); ++ ++ return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch); ++} ++ ++bool MacroAssembler::reachable_from_cache() { ++ if (ForceUnreachable) { ++ return false; ++ } else { ++ address cl = first_cache_address(); ++ address ch = last_cache_address(); ++ ++ return fit_in_jal(cl, ch); ++ } ++} ++ ++void MacroAssembler::general_jump(address target) { ++ if (reachable_from_cache(target)) { ++ j(target); ++ delayed()->nop(); ++ } else { ++ set64(T9, (long)target); ++ jr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_general_jump(address target) { ++ if (reachable_from_cache(target)) { ++ //j(target); ++ //nop(); ++ return 2; ++ } else { ++ //set64(T9, (long)target); ++ //jr(T9); ++ //nop(); ++ return insts_for_set64((jlong)target) + 2; ++ } ++} ++ ++void MacroAssembler::patchable_jump(address target) { ++ if (reachable_from_cache(target)) { ++ nop(); ++ nop(); ++ nop(); ++ nop(); ++ j(target); ++ delayed()->nop(); ++ } else { ++ patchable_set48(T9, (long)target); ++ jr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_patchable_jump(address target) { ++ return 6; ++} ++ ++void MacroAssembler::general_call(address target) { ++ if (reachable_from_cache(target)) { ++ jal(target); ++ delayed()->nop(); ++ } else { ++ set64(T9, (long)target); ++ jalr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_general_call(address target) { ++ if (reachable_from_cache(target)) { ++ //jal(target); ++ //nop(); ++ return 2; ++ } else { ++ //set64(T9, (long)target); ++ //jalr(T9); ++ //nop(); ++ return insts_for_set64((jlong)target) + 2; ++ } ++} ++ ++void MacroAssembler::patchable_call(address target) { ++ if (reachable_from_cache(target)) { ++ nop(); ++ nop(); ++ nop(); ++ nop(); ++ jal(target); ++ delayed()->nop(); ++ } else { ++ patchable_set48(T9, (long)target); ++ jalr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_patchable_call(address target) { ++ return 6; ++} ++ ++// Maybe emit a call via a trampoline. If the code cache is small ++// trampolines won't be emitted. ++ ++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) { ++ assert(JavaThread::current()->is_Compiler_thread(), "just checking"); ++ assert(entry.rspec().type() == relocInfo::runtime_call_type ++ || entry.rspec().type() == relocInfo::opt_virtual_call_type ++ || entry.rspec().type() == relocInfo::static_call_type ++ || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); ++ ++ address target = entry.target(); ++ if (!reachable_from_cache()) { ++ address stub = emit_trampoline_stub(offset(), target); ++ if (stub == NULL) { ++ return NULL; // CodeCache is full ++ } ++ } ++ ++ if (cbuf) cbuf->set_insts_mark(); ++ relocate(entry.rspec()); ++ ++ if (reachable_from_cache()) { ++ nop(); ++ nop(); ++ nop(); ++ nop(); ++ jal(target); ++ delayed()->nop(); ++ } else { ++ // load the call target from the trampoline stub ++ // branch ++ long dest = (long)pc(); ++ dest += (dest & 0x8000) << 1; ++ lui(T9, dest >> 32); ++ ori(T9, T9, split_low(dest >> 16)); ++ dsll(T9, T9, 16); ++ ld(T9, T9, simm16(split_low(dest))); ++ jalr(T9); ++ delayed()->nop(); ++ } ++ return pc(); ++} ++ ++// Emit a trampoline stub for a call to a target which is too far away. ++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, ++ address dest) { ++ // Max stub size: alignment nop, TrampolineStub. ++ address stub = start_a_stub(NativeInstruction::nop_instruction_size ++ + NativeCallTrampolineStub::instruction_size); ++ if (stub == NULL) { ++ return NULL; // CodeBuffer::expand failed ++ } ++ ++ // Create a trampoline stub relocation which relates this trampoline stub ++ // with the call instruction at insts_call_instruction_offset in the ++ // instructions code-section. ++ align(wordSize); ++ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() ++ + insts_call_instruction_offset)); ++ emit_int64((int64_t)dest); ++ end_a_stub(); ++ return stub; ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, address entry) { ++ u_char * cur_pc = pc(); ++ ++ // Near/Far jump ++ if(is_simm16((entry - pc() - 4) / 4)) { ++ Assembler::beq(rs, rt, offset(entry)); ++ } else { ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(entry); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ beq_far(rs, rt, target(L)); ++ } else { ++ u_char * cur_pc = pc(); ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(L); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, address entry) { ++ u_char * cur_pc = pc(); ++ ++ //Near/Far jump ++ if(is_simm16((entry - pc() - 4) / 4)) { ++ Assembler::bne(rs, rt, offset(entry)); ++ } else { ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(entry); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ bne_far(rs, rt, target(L)); ++ } else { ++ u_char * cur_pc = pc(); ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(L); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::beq_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ ++ bne(rs, rt, not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::bne_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ ++ beq(rs, rt, not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::bc1t_long(Label& L) { ++ Label not_taken; ++ ++ bc1f(not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::bc1f_long(Label& L) { ++ Label not_taken; ++ ++ bc1t(not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::b_far(Label& L) { ++ if (L.is_bound()) { ++ b_far(target(L)); ++ } else { ++ volatile address dest = target(L); ++// ++// MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8 ++// 0x00000055651ed514: daddu at, ra, zero ++// 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520 ++// ++// 0x00000055651ed51c: sll zero, zero, 0 ++// 0x00000055651ed520: lui t9, 0x0 ++// 0x00000055651ed524: ori t9, t9, 0x21b8 ++// 0x00000055651ed528: daddu t9, t9, ra ++// 0x00000055651ed52c: daddu ra, at, zero ++// 0x00000055651ed530: jr t9 ++// 0x00000055651ed534: sll zero, zero, 0 ++// ++ move(AT, RA); ++ emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); ++ nop(); ++ lui(T9, 0); // to be patched ++ ori(T9, T9, 0); ++ daddu(T9, T9, RA); ++ move(RA, AT); ++ jr(T9); ++ } ++} ++ ++void MacroAssembler::b_far(address entry) { ++ u_char * cur_pc = pc(); ++ ++ // Near/Far jump ++ if(is_simm16((entry - pc() - 4) / 4)) { ++ b(offset(entry)); ++ } else { ++ // address must be bounded ++ move(AT, RA); ++ emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); ++ nop(); ++ li32(T9, entry - pc()); ++ daddu(T9, T9, RA); ++ move(RA, AT); ++ jr(T9); ++ } ++} ++ ++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) { ++ addu_long(AT, base, offset); ++ ld_ptr(rt, AT, 0); ++} ++ ++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) { ++ guarantee(AT != rt, "AT must not equal rt"); ++ addu_long(AT, base, offset); ++ st_ptr(rt, AT, 0); ++} ++ ++Address MacroAssembler::as_Address(AddressLiteral adr) { ++ return Address(adr.target(), adr.rspec()); ++} ++ ++Address MacroAssembler::as_Address(ArrayAddress adr) { ++ return Address::make_array(adr); ++} ++ ++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). ++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { ++ Label again; ++ ++ li(tmp_reg1, counter_addr); ++ bind(again); ++ if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); ++ ll(tmp_reg2, tmp_reg1, 0); ++ addiu(tmp_reg2, tmp_reg2, inc); ++ sc(tmp_reg2, tmp_reg1, 0); ++ beq(tmp_reg2, R0, again); ++ delayed()->nop(); ++} ++ ++void MacroAssembler::reserved_stack_check() { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // testing if reserved zone needs to be enabled ++ Label no_reserved_zone_enabling; ++ ++ ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); ++ dsubu(AT, SP, AT); ++ bltz(AT, no_reserved_zone_enabling); ++ delayed()->nop(); ++ ++ enter(); // RA and FP are live. ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); ++ leave(); ++ ++ // We have already removed our own frame. ++ // throw_delayed_StackOverflowError will think that it's been ++ // called by our caller. ++ li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry()); ++ jr(AT); ++ delayed()->nop(); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++} ++ ++int MacroAssembler::biased_locking_enter(Register lock_reg, ++ Register obj_reg, ++ Register swap_reg, ++ Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, ++ Label* slow_case, ++ BiasedLockingCounters* counters) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ bool need_tmp_reg = false; ++ if (tmp_reg == noreg) { ++ need_tmp_reg = true; ++ tmp_reg = T9; ++ } ++ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); ++ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); ++ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); ++ Address saved_mark_addr(lock_reg, 0); ++ ++ // Biased locking ++ // See whether the lock is currently biased toward our thread and ++ // whether the epoch is still valid ++ // Note that the runtime guarantees sufficient alignment of JavaThread ++ // pointers to allow age to be placed into low bits ++ // First check to see whether biasing is even enabled for this object ++ Label cas_label; ++ int null_check_offset = -1; ++ if (!swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ ld_ptr(swap_reg, mark_addr); ++ } ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ move(tmp_reg, swap_reg); ++ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ daddiu(AT, R0, markOopDesc::biased_lock_pattern); ++ dsubu(AT, AT, tmp_reg); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ ++ bne(AT, R0, cas_label); ++ delayed()->nop(); ++ ++ ++ // The bias pattern is present in the object's header. Need to check ++ // whether the bias owner and the epoch are both still current. ++ // Note that because there is no current thread register on MIPS we ++ // need to store off the mark word we read out of the object to ++ // avoid reloading it and needing to recheck invariants below. This ++ // store is unfortunate but it makes the overall code shorter and ++ // simpler. ++ st_ptr(swap_reg, saved_mark_addr); ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ if (swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ xorr(tmp_reg, tmp_reg, swap_reg); ++#ifndef OPT_THREAD ++ get_thread(swap_reg); ++ xorr(swap_reg, swap_reg, tmp_reg); ++#else ++ xorr(swap_reg, TREG, tmp_reg); ++#endif ++ ++ move(AT, ~((int) markOopDesc::age_mask_in_place)); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(swap_reg, R0, L); ++ delayed()->nop(); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ beq(swap_reg, R0, done); ++ delayed()->nop(); ++ Label try_revoke_bias; ++ Label try_rebias; ++ ++ // At this point we know that the header has the bias pattern and ++ // that we are not the bias owner in the current epoch. We need to ++ // figure out more details about the state of the header in order to ++ // know what operations can be legally performed on the object's ++ // header. ++ ++ // If the low three bits in the xor result aren't clear, that means ++ // the prototype header is no longer biased and we have to revoke ++ // the bias on this object. ++ ++ move(AT, markOopDesc::biased_lock_mask_in_place); ++ andr(AT, swap_reg, AT); ++ bne(AT, R0, try_revoke_bias); ++ delayed()->nop(); ++ // Biasing is still enabled for this data type. See whether the ++ // epoch of the current bias is still valid, meaning that the epoch ++ // bits of the mark word are equal to the epoch bits of the ++ // prototype header. (Note that the prototype header's epoch bits ++ // only change at a safepoint.) If not, attempt to rebias the object ++ // toward the current thread. Note that we must be absolutely sure ++ // that the current epoch is invalid in order to do this because ++ // otherwise the manipulations it performs on the mark word are ++ // illegal. ++ ++ move(AT, markOopDesc::epoch_mask_in_place); ++ andr(AT,swap_reg, AT); ++ bne(AT, R0, try_rebias); ++ delayed()->nop(); ++ // The epoch of the current bias is still valid but we know nothing ++ // about the owner; it might be set or it might be clear. Try to ++ // acquire the bias of the object using an atomic operation. If this ++ // fails we will go in to the runtime to revoke the object's bias. ++ // Note that we first construct the presumed unbiased header so we ++ // don't accidentally blow away another thread's valid bias. ++ ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++#ifndef OPT_THREAD ++ get_thread(tmp_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++#else ++ orr(tmp_reg, TREG, swap_reg); ++#endif ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, this means that ++ // another thread succeeded in biasing it toward itself and we ++ // need to revoke that bias. The revocation will occur in the ++ // interpreter runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ delayed()->nop(); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ delayed()->nop(); ++ } ++ b(done); ++ delayed()->nop(); ++ ++ bind(try_rebias); ++ // At this point we know the epoch has expired, meaning that the ++ // current "bias owner", if any, is actually invalid. Under these ++ // circumstances _only_, we are allowed to use the current header's ++ // value as the comparison value when doing the cas to acquire the ++ // bias in the current epoch. In other words, we allow transfer of ++ // the bias from one thread to another directly in this situation. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++#ifndef OPT_THREAD ++ get_thread(swap_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++#else ++ orr(tmp_reg, tmp_reg, TREG); ++#endif ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, then another thread ++ // succeeded in biasing it toward itself and we need to revoke that ++ // bias. The revocation will occur in the runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ delayed()->nop(); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ delayed()->nop(); ++ } ++ ++ b(done); ++ delayed()->nop(); ++ bind(try_revoke_bias); ++ // The prototype mark in the klass doesn't have the bias bit set any ++ // more, indicating that objects of this data type are not supposed ++ // to be biased any more. We are going to try to reset the mark of ++ // this object to the prototype value and fall through to the ++ // CAS-based locking scheme. Note that if our CAS fails, it means ++ // that another thread raced us for the privilege of revoking the ++ // bias of this particular object, so it's okay to continue in the ++ // normal locking code. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // Fall through to the normal CAS-based lock, because no matter what ++ // the result of the above CAS, some thread must have succeeded in ++ // removing the bias bit from the object's header. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ delayed()->nop(); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ ++ bind(cas_label); ++ return null_check_offset; ++} ++ ++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ ++ // Check for biased locking unlock case, which is a no-op ++ // Note: we do not have to check the thread ID for two reasons. ++ // First, the interpreter checks for IllegalMonitorStateException at ++ // a higher level. Second, if the bias was revoked while we held the ++ // lock, the object could not be rebiased toward another thread, so ++ // the bias bit would be clear. ++ ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); ++ daddiu(AT, R0, markOopDesc::biased_lock_pattern); ++ ++ beq(AT, temp_reg, done); ++ delayed()->nop(); ++} ++ ++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf ++// this method will handle the stack problem, you need not to preserve the stack space for the argument now ++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { ++ Label L, E; ++ ++ assert(number_of_arguments <= 4, "just check"); ++ ++ andi(AT, SP, 0xf); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ daddiu(SP, SP, -8); ++ call(entry_point, relocInfo::runtime_call_type); ++ delayed()->nop(); ++ daddiu(SP, SP, 8); ++ b(E); ++ delayed()->nop(); ++ ++ bind(L); ++ call(entry_point, relocInfo::runtime_call_type); ++ delayed()->nop(); ++ bind(E); ++} ++ ++ ++void MacroAssembler::jmp(address entry) { ++ patchable_set48(T9, (long)entry); ++ jr(T9); ++} ++ ++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::runtime_call_type: ++ case relocInfo::none: ++ jmp(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ patchable_set48(T9, (long)entry); ++ jr(T9); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::jmp_far(Label& L) { ++ if (L.is_bound()) { ++ address entry = target(L); ++ assert(entry != NULL, "jmp most probably wrong"); ++ InstructionMark im(this); ++ ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(T9, (long)entry); ++ } else { ++ InstructionMark im(this); ++ L.add_patch_at(code(), locator()); ++ ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(T9, (long)pc()); ++ } ++ ++ jr(T9); ++ delayed()->nop(); ++} ++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_set48(AT, (long)obj); ++ sd(AT, dst); ++} ++ ++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_set48(dst, (long)obj); ++} ++ ++void MacroAssembler::call(address entry) { ++// c/c++ code assume T9 is entry point, so we just always move entry to t9 ++// maybe there is some more graceful method to handle this. FIXME ++// For more info, see class NativeCall. ++ patchable_set48(T9, (long)entry); ++ jalr(T9); ++} ++ ++void MacroAssembler::call(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::runtime_call_type: ++ case relocInfo::none: ++ call(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::call(address entry, RelocationHolder& rh) ++{ ++ switch (rh.type()) { ++ case relocInfo::runtime_call_type: ++ case relocInfo::none: ++ call(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rh); ++ call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::ic_call(address entry, jint method_index) { ++ RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); ++ patchable_set48(IC_Klass, (long)Universe::non_oop_word()); ++ assert(entry != NULL, "call most probably wrong"); ++ InstructionMark im(this); ++ trampoline_call(AddressLiteral(entry, rh)); ++} ++ ++void MacroAssembler::c2bool(Register r) { ++ sltu(r, R0, r); ++} ++ ++#ifndef PRODUCT ++extern "C" void findpc(intptr_t x); ++#endif ++ ++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { ++ if ( ShowMessageBoxOnError ) { ++ JavaThreadState saved_state = JavaThread::current()->thread_state(); ++ JavaThread::current()->set_thread_state(_thread_in_vm); ++ { ++ // In order to get locks work, we need to fake a in_VM state ++ ttyLocker ttyl; ++ ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); ++ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { ++ BytecodeCounter::print(); ++ } ++ ++ } ++ ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); ++ } ++ else ++ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); ++} ++ ++ ++void MacroAssembler::stop(const char* msg) { ++ li(A0, (long)msg); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ brk(17); ++} ++ ++void MacroAssembler::warn(const char* msg) { ++ pushad(); ++ li(A0, (long)msg); ++ push(S2); ++ move(AT, -(StackAlignmentInBytes)); ++ move(S2, SP); // use S2 as a sender SP holder ++ andr(SP, SP, AT); // align stack as required by ABI ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ move(SP, S2); // use S2 as a sender SP holder ++ pop(S2); ++ popad(); ++} ++ ++void MacroAssembler::increment(Register reg, int imm) { ++ if (!imm) return; ++ if (is_simm16(imm)) { ++ daddiu(reg, reg, imm); ++ } else { ++ move(AT, imm); ++ daddu(reg, reg, AT); ++ } ++} ++ ++void MacroAssembler::decrement(Register reg, int imm) { ++ increment(reg, -imm); ++} ++ ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions) { ++ call_VM_helper(oop_result, entry_point, 0, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ call_VM_helper(oop_result, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); ++ assert(arg_2 != A1, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ ++ address before_call_pc; ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ // debugging support ++ assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); ++ assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); ++ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); ++ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); ++ ++ assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp"); ++ ++ // set last Java frame before call ++ before_call_pc = (address)pc(); ++ set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc); ++ ++ // do the call ++ move(A0, java_thread); ++ call(entry_point, relocInfo::runtime_call_type); ++ delayed()->nop(); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ get_thread(java_thread); ++#else ++#ifdef ASSERT ++ { ++ Label L; ++ get_thread(AT); ++ beq(java_thread, AT, L); ++ delayed()->nop(); ++ stop("MacroAssembler::call_VM_base: TREG not callee saved?"); ++ bind(L); ++ } ++#endif ++#endif ++ ++ // discard thread and arguments ++ ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // reset last Java frame ++ reset_last_Java_frame(java_thread, false); ++ ++ check_and_handle_popframe(java_thread); ++ check_and_handle_earlyret(java_thread); ++ if (check_exceptions) { ++ // check for pending exceptions (java_thread is set upon return) ++ Label L; ++ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ li(AT, before_call_pc); ++ push(AT); ++ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ bind(L); ++ } ++ ++ // get oop result if there is one and reset the value in the thread ++ if (oop_result->is_valid()) { ++ ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ verify_oop(oop_result); ++ } ++} ++ ++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { ++ ++ move(V0, SP); ++ //we also reserve space for java_thread here ++ move(AT, -(StackAlignmentInBytes)); ++ andr(SP, SP, AT); ++ call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); ++ ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { ++ call_VM_leaf_base(entry_point, number_of_arguments); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { ++ if (arg_0 != A0) move(A0, arg_0); ++ call_VM_leaf(entry_point, 1); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ call_VM_leaf(entry_point, 2); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); ++ call_VM_leaf(entry_point, 3); ++} ++void MacroAssembler::super_call_VM_leaf(address entry_point) { ++ MacroAssembler::call_VM_leaf_base(entry_point, 0); ++} ++ ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1) { ++ if (arg_1 != A0) move(A0, arg_1); ++ MacroAssembler::call_VM_leaf_base(entry_point, 1); ++} ++ ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 2); ++} ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 3); ++} ++ ++void MacroAssembler::check_and_handle_earlyret(Register java_thread) { ++} ++ ++void MacroAssembler::check_and_handle_popframe(Register java_thread) { ++} ++ ++void MacroAssembler::null_check(Register reg, int offset) { ++ if (needs_explicit_null_check(offset)) { ++ // provoke OS NULL exception if reg = NULL by ++ // accessing M[reg] w/o changing any (non-CC) registers ++ // NOTE: cmpl is plenty here to provoke a segv ++ lw(AT, reg, 0); ++ } else { ++ // nothing to do, (later) access of M[reg + offset] ++ // will provoke OS NULL exception if reg = NULL ++ } ++} ++ ++void MacroAssembler::enter() { ++ push2(RA, FP); ++ move(FP, SP); ++} ++ ++void MacroAssembler::leave() { ++ move(SP, FP); ++ pop2(RA, FP); ++} ++ ++void MacroAssembler::unimplemented(const char* what) { ++ const char* buf = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("unimplemented: %s", what); ++ buf = code_string(ss.as_string()); ++ } ++ stop(buf); ++} ++ ++void MacroAssembler::get_thread(Register thread) { ++#ifdef MINIMIZE_RAM_USAGE ++// ++// In MIPS64, we don't use full 64-bit address space. ++// Only a small range is actually used. ++// ++// Example: ++// $ cat /proc/13352/maps ++// 120000000-120010000 r-xp 00000000 08:01 41077 /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java ++// 12001c000-120020000 rw-p 0000c000 08:01 41077 /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java ++// 120020000-1208dc000 rwxp 00000000 00:00 0 [heap] ++// 555d574000-555d598000 r-xp 00000000 08:01 2073768 /lib/ld-2.12.so ++// 555d598000-555d59c000 rw-p 00000000 00:00 0 ++// ...... ++// 558b1f8000-558b23c000 rwxp 00000000 00:00 0 ++// 558b23c000-558b248000 ---p 00000000 00:00 0 ++// 558b248000-558b28c000 rwxp 00000000 00:00 0 ++// ffff914000-ffff94c000 rwxp 00000000 00:00 0 [stack] ++// ffffffc000-10000000000 r-xp 00000000 00:00 0 [vdso] ++// ++// All stacks are positioned at 0x55________. ++// Therefore, we can utilize the same algorithm used in 32-bit. ++ // int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1); ++ // Thread* thread = _sp_map[index]; ++ Register tmp; ++ ++ if (thread == AT) ++ tmp = T9; ++ else ++ tmp = AT; ++ ++ move(thread, SP); ++ shr(thread, PAGE_SHIFT); ++ ++ push(tmp); ++ li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1)); ++ andr(thread, thread, tmp); ++ shl(thread, Address::times_ptr); // sizeof(Thread *) ++ li48(tmp, (long)ThreadLocalStorage::sp_map_addr()); ++ addu(tmp, tmp, thread); ++ ld_ptr(thread, tmp, 0); ++ pop(tmp); ++#else ++ if (thread != V0) { ++ push(V0); ++ } ++ pushad_except_v0(); ++ ++ push(S5); ++ move(S5, SP); ++ move(AT, -StackAlignmentInBytes); ++ andr(SP, SP, AT); ++ call(CAST_FROM_FN_PTR(address, Thread::current)); ++ //MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current), 0); ++ delayed()->nop(); ++ move(SP, S5); ++ pop(S5); ++ ++ popad_except_v0(); ++ if (thread != V0) { ++ move(thread, V0); ++ pop(V0); ++ } ++#endif // MINIMIZE_RAM_USAGE ++} ++ ++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T1; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // we must set sp to zero to clear frame ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is possible ++ // that we need it only for debugging ++ if(clear_fp) { ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); ++} ++ ++void MacroAssembler::reset_last_Java_frame(bool clear_fp) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // we must set sp to zero to clear frame ++ sd(R0, Address(thread, JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is ++ // possible that we need it only for debugging ++ if (clear_fp) { ++ sd(R0, Address(thread, JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ sd(R0, Address(thread, JavaThread::last_Java_pc_offset())); ++} ++ ++// Write serialization page so VM thread can do a pseudo remote membar. ++// We use the current thread pointer to calculate a thread specific ++// offset to write to within the page. This minimizes bus traffic ++// due to cache line collision. ++void MacroAssembler::serialize_memory(Register thread, Register tmp) { ++ int mask = os::vm_page_size() - sizeof(int); ++ assert_different_registers(AT, tmp); ++ assert(is_uimm(mask, 16), "Not a unsigned 16-bit"); ++ srl(AT, thread, os::get_serialize_page_shift_count()); ++ andi(AT, AT, mask); ++ li(tmp, os::get_memory_serialize_page()); ++ addu(tmp, tmp, AT); ++ sw(R0, tmp, 0); ++} ++ ++void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld(AT, thread_reg, in_bytes(Thread::polling_page_offset())); ++ andi(AT, AT, SafepointMechanism::poll_bit()); ++ bne(AT, R0, slow_path); ++ delayed()->nop(); ++ } else { ++ li(AT, SafepointSynchronize::address_of_state()); ++ lw(AT, AT, 0); ++ addiu(AT, AT, -SafepointSynchronize::_not_synchronized); ++ bne(AT, R0, slow_path); ++ delayed()->nop(); ++ } ++} ++ ++// Just like safepoint_poll, but use an acquiring load for thread- ++// local polling. ++// ++// We need an acquire here to ensure that any subsequent load of the ++// global SafepointSynchronize::_state flag is ordered after this load ++// of the local Thread::_polling page. We don't want this poll to ++// return false (i.e. not safepointing) and a later poll of the global ++// SafepointSynchronize::_state spuriously to return true. ++// ++// This is to avoid a race when we're in a native->Java transition ++// racing the code which wakes up from a safepoint. ++// ++void MacroAssembler::safepoint_poll_acquire(Label& slow_path, Register thread_reg) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld(AT, thread_reg, in_bytes(Thread::polling_page_offset())); ++ sync(); ++ andi(AT, AT, SafepointMechanism::poll_bit()); ++ bne(AT, R0, slow_path); ++ delayed()->nop(); ++ } else { ++ safepoint_poll(slow_path, thread_reg); ++ } ++} ++ ++// Calls to C land ++// ++// When entering C land, the fp, & sp of the last Java frame have to be recorded ++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp ++// has to be reset to 0. This is required to allow proper stack traversal. ++void MacroAssembler::set_last_Java_frame(Register java_thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // last_java_pc is optional ++ if (last_java_pc != NULL) { ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(AT, (long)last_java_pc); ++ st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++} ++ ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc) { ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset())); ++ } ++ ++ // last_java_pc is optional ++ if (last_java_pc != NULL) { ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(AT, (long)last_java_pc); ++ st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ ++ sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset())); ++} ++ ++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. ++void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1, Register t2, Label& slow_case) { ++ Unimplemented(); ++ //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ //bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1, Register t2, Label& slow_case) { ++ Unimplemented(); ++ //assert_different_registers(obj, var_size_in_bytes, t1, AT); ++ //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ //bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); ++} ++ ++void MacroAssembler::incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ if (!thread->is_valid()) { ++#ifndef OPT_THREAD ++ assert(t1->is_valid(), "need temp reg"); ++ thread = t1; ++ get_thread(thread); ++#else ++ thread = TREG; ++#endif ++ } ++ ++ ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++ if (var_size_in_bytes->is_valid()) { ++ addu(AT, AT, var_size_in_bytes); ++ } else { ++ addiu(AT, AT, con_size_in_bytes); ++ } ++ st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++} ++ ++void MacroAssembler::li(Register rd, long imm) { ++ if (imm <= max_jint && imm >= min_jint) { ++ li32(rd, (int)imm); ++ } else if (julong(imm) <= 0xFFFFFFFF) { ++ assert_not_delayed(); ++ // lui sign-extends, so we can't use that. ++ ori(rd, R0, julong(imm) >> 16); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm)); ++ } else if ((imm > 0) && is_simm16(imm >> 32)) { ++ // A 48-bit address ++ li48(rd, imm); ++ } else { ++ li64(rd, imm); ++ } ++} ++ ++void MacroAssembler::li32(Register reg, int imm) { ++ if (is_simm16(imm)) { ++ addiu(reg, R0, imm); ++ } else { ++ lui(reg, split_low(imm >> 16)); ++ if (split_low(imm)) ++ ori(reg, reg, split_low(imm)); ++ } ++} ++ ++void MacroAssembler::set64(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ } else { ++ lui(d, split_low(value >> 16)); ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ ori(d, R0, julong(value) >> 16); ++ dsll(d, d, 16); ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ } ++ } else if ((value> 0) && is_simm16(value >> 32)) { // li48 ++ // 4 insts ++ li48(d, value); ++ } else { // li64 ++ // 6 insts ++ li64(d, value); ++ } ++} ++ ++ ++int MacroAssembler::insts_for_set64(jlong value) { ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ //daddiu(d, R0, value); ++ count++; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ count++; ++ if (split_low(value)) { ++ //ori(d, d, split_low(value)); ++ count++; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ //ori(d, R0, julong(value) >> 16); ++ //dsll(d, d, 16); ++ count += 2; ++ if (split_low(value)) { ++ //ori(d, d, split_low(value)); ++ count++; ++ } ++ } else if ((value> 0) && is_simm16(value >> 32)) { // li48 ++ // 4 insts ++ //li48(d, value); ++ count += 4; ++ } else { // li64 ++ // 6 insts ++ //li64(d, value); ++ count += 6; ++ } ++ ++ return count; ++} ++ ++void MacroAssembler::patchable_set48(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ count += 1; ++ } else { ++ lui(d, split_low(value >> 16)); ++ count += 1; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ ori(d, R0, julong(value) >> 16); ++ dsll(d, d, 16); ++ count += 2; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && is_simm16(value >> 32)) { // li48 ++ // 4 insts ++ li48(d, value); ++ count += 4; ++ } else { // li64 ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::patchable_set32(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ count += 1; ++ } else { ++ lui(d, split_low(value >> 16)); ++ count += 1; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ ori(d, R0, julong(value) >> 16); ++ dsll(d, d, 16); ++ count += 2; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 3) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::patchable_call32(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ count += 1; ++ } else { ++ lui(d, split_low(value >> 16)); ++ count += 1; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 2) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { ++ assert(UseCompressedClassPointers, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int klass_index = oop_recorder()->find_index(k); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ long narrowKlass = (long)Klass::encode_klass(k); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_set48(dst, narrowKlass); ++} ++ ++ ++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { ++ assert(UseCompressedOops, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int oop_index = oop_recorder()->find_index(obj); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_set48(dst, oop_index); ++} ++ ++// ((OopHandle)result).resolve(); ++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { ++ // OopHandle::resolve is an indirection. ++ access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG); ++} ++ ++void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { ++ // get mirror ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ ld_ptr(mirror, method, in_bytes(Method::const_offset())); ++ ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset())); ++ ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes()); ++ ld_ptr(mirror, mirror, mirror_offset); ++ resolve_oop_handle(mirror, tmp); ++} ++ ++void MacroAssembler::li64(Register rd, long imm) { ++ assert_not_delayed(); ++ lui(rd, split_low(imm >> 48)); ++ ori(rd, rd, split_low(imm >> 32)); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm >> 16)); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm)); ++} ++ ++void MacroAssembler::li48(Register rd, long imm) { ++ assert_not_delayed(); ++ assert(is_simm16(imm >> 32), "Not a 48-bit address"); ++ lui(rd, imm >> 32); ++ ori(rd, rd, split_low(imm >> 16)); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm)); ++} ++ ++void MacroAssembler::verify_oop(Register reg, const char* s) { ++ if (!VerifyOops) return; ++ const char * b = NULL; ++ stringStream ss; ++ ss.print("verify_oop: %s: %s", reg->name(), s); ++ b = code_string(ss.as_string()); ++ pushad(); ++ move(A1, reg); ++ li(A0, (long)b); ++ li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); ++ ld(T9, AT, 0); ++ jalr(T9); ++ delayed()->nop(); ++ popad(); ++} ++ ++ ++void MacroAssembler::verify_oop_addr(Address addr, const char* s) { ++ if (!VerifyOops) { ++ nop(); ++ return; ++ } ++ // Pass register number to verify_oop_subroutine ++ const char * b = NULL; ++ stringStream ss; ++ ss.print("verify_oop_addr: %s", s); ++ b = code_string(ss.as_string()); ++ ++ addiu(SP, SP, - 7 * wordSize); ++ st_ptr(T0, SP, 6 * wordSize); ++ st_ptr(T1, SP, 5 * wordSize); ++ st_ptr(RA, SP, 4 * wordSize); ++ st_ptr(A0, SP, 3 * wordSize); ++ st_ptr(A1, SP, 2 * wordSize); ++ st_ptr(AT, SP, 1 * wordSize); ++ st_ptr(T9, SP, 0); ++ ++ // addr may contain sp so we will have to adjust it based on the ++ // pushes that we just did. ++ if (addr.uses(SP)) { ++ lea(A1, addr); ++ ld_ptr(A1, Address(A1, 7 * wordSize)); ++ } else { ++ ld_ptr(A1, addr); ++ } ++ li(A0, (long)b); ++ // call indirectly to solve generation ordering problem ++ li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); ++ ld_ptr(T9, AT, 0); ++ jalr(T9); ++ delayed()->nop(); ++ ld_ptr(T0, SP, 6* wordSize); ++ ld_ptr(T1, SP, 5* wordSize); ++ ld_ptr(RA, SP, 4* wordSize); ++ ld_ptr(A0, SP, 3* wordSize); ++ ld_ptr(A1, SP, 2* wordSize); ++ ld_ptr(AT, SP, 1* wordSize); ++ ld_ptr(T9, SP, 0* wordSize); ++ addiu(SP, SP, 7 * wordSize); ++} ++ ++// used registers : T0, T1 ++void MacroAssembler::verify_oop_subroutine() { ++ // RA: ra ++ // A0: char* error message ++ // A1: oop object to verify ++ ++ Label exit, error; ++ // increment counter ++ li(T0, (long)StubRoutines::verify_oop_count_addr()); ++ lw(AT, T0, 0); ++ daddiu(AT, AT, 1); ++ sw(AT, T0, 0); ++ ++ // make sure object is 'reasonable' ++ beq(A1, R0, exit); // if obj is NULL it is ok ++ delayed()->nop(); ++ ++ // Check if the oop is in the right area of memory ++ // const int oop_mask = Universe::verify_oop_mask(); ++ // const int oop_bits = Universe::verify_oop_bits(); ++ const uintptr_t oop_mask = Universe::verify_oop_mask(); ++ const uintptr_t oop_bits = Universe::verify_oop_bits(); ++ li(AT, oop_mask); ++ andr(T0, A1, AT); ++ li(AT, oop_bits); ++ bne(T0, AT, error); ++ delayed()->nop(); ++ ++ // make sure klass is 'reasonable' ++ // add for compressedoops ++ reinit_heapbase(); ++ // add for compressedoops ++ load_klass(T0, A1); ++ beq(T0, R0, error); // if klass is NULL it is broken ++ delayed()->nop(); ++ // return if everything seems ok ++ bind(exit); ++ ++ jr(RA); ++ delayed()->nop(); ++ ++ // handle errors ++ bind(error); ++ pushad(); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ popad(); ++ jr(RA); ++ delayed()->nop(); ++} ++ ++void MacroAssembler::verify_tlab(Register t1, Register t2) { ++#ifdef ASSERT ++ assert_different_registers(t1, t2, AT); ++ if (UseTLAB && VerifyOops) { ++ Label next, ok; ++ ++ get_thread(t1); ++ ++ ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); ++ sltu(AT, t2, AT); ++ beq(AT, R0, next); ++ delayed()->nop(); ++ ++ stop("assert(top >= start)"); ++ ++ bind(next); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); ++ sltu(AT, AT, t2); ++ beq(AT, R0, ok); ++ delayed()->nop(); ++ ++ stop("assert(top <= end)"); ++ ++ bind(ok); ++ ++ } ++#endif ++} ++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ intptr_t value = *delayed_value_addr; ++ if (value != 0) ++ return RegisterOrConstant(value + offset); ++ Unimplemented(); ++ //AddressLiteral a(delayed_value_addr); ++ // load indirectly to solve generation ordering problem ++ //movptr(tmp, ExternalAddress((address) delayed_value_addr)); ++ //ld(tmp, a); ++ if (offset != 0) ++ daddiu(tmp,tmp, offset); ++ ++ return RegisterOrConstant(tmp); ++} ++ ++void MacroAssembler::hswap(Register reg) { ++ //short ++ //andi(reg, reg, 0xffff); ++ srl(AT, reg, 8); ++ sll(reg, reg, 24); ++ sra(reg, reg, 16); ++ orr(reg, reg, AT); ++} ++ ++void MacroAssembler::huswap(Register reg) { ++ dsrl(AT, reg, 8); ++ dsll(reg, reg, 24); ++ dsrl(reg, reg, 16); ++ orr(reg, reg, AT); ++ andi(reg, reg, 0xffff); ++} ++ ++// something funny to do this will only one more register AT ++// 32 bits ++void MacroAssembler::swap(Register reg) { ++ srl(AT, reg, 8); ++ sll(reg, reg, 24); ++ orr(reg, reg, AT); ++ //reg : 4 1 2 3 ++ srl(AT, AT, 16); ++ xorr(AT, AT, reg); ++ andi(AT, AT, 0xff); ++ //AT : 0 0 0 1^3); ++ xorr(reg, reg, AT); ++ //reg : 4 1 2 1 ++ sll(AT, AT, 16); ++ xorr(reg, reg, AT); ++ //reg : 4 3 2 1 ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register resflag, bool retold, bool barrier) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ Label again, succ, fail; ++ bind(again); ++ lld(resflag, addr); ++ bne(resflag, oldval, fail); ++ delayed()->nop(); ++ move(resflag, newval); ++ scd(resflag, addr); ++ beq(resflag, R0, again); ++ delayed()->nop(); ++ b(succ); ++ delayed()->nop(); ++ bind(fail); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ move(resflag, R0); ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register tmp, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ lld(tmp, addr); ++ bne(tmp, oldval, neq); ++ delayed()->nop(); ++ move(tmp, newval); ++ scd(tmp, addr); ++ beq(tmp, R0, again); ++ delayed()->nop(); ++ b(succ); ++ delayed()->nop(); ++ ++ bind(neq); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) { ++ b(*fail); ++ delayed()->nop(); ++ } ++} ++ ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, ++ Register resflag, bool sign, bool retold, bool barrier) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ Label again, succ, fail; ++ bind(again); ++ ll(resflag, addr); ++ if (!sign) ++ dinsu(resflag, R0, 32, 32); ++ bne(resflag, oldval, fail); ++ delayed()->nop(); ++ ++ move(resflag, newval); ++ sc(resflag, addr); ++ beq(resflag, R0, again); ++ delayed()->nop(); ++ b(succ); ++ delayed()->nop(); ++ ++ bind(fail); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ move(resflag, R0); ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ ll(tmp, addr); ++ if (!sign) ++ dinsu(tmp, R0, 32, 32); ++ bne(tmp, oldval, neq); ++ delayed()->nop(); ++ move(tmp, newval); ++ sc(tmp, addr); ++ beq(tmp, R0, again); ++ delayed()->nop(); ++ b(succ); ++ delayed()->nop(); ++ ++ bind(neq); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) { ++ b(*fail); ++ delayed()->nop(); ++ } ++} ++ ++void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) { ++ Label done, again, nequal; ++ ++ Register x_reg = x_regLo; ++ dsll32(x_regHi, x_regHi, 0); ++ dsll32(x_regLo, x_regLo, 0); ++ dsrl32(x_regLo, x_regLo, 0); ++ orr(x_reg, x_regLo, x_regHi); ++ ++ Register c_reg = c_regLo; ++ dsll32(c_regHi, c_regHi, 0); ++ dsll32(c_regLo, c_regLo, 0); ++ dsrl32(c_regLo, c_regLo, 0); ++ orr(c_reg, c_regLo, c_regHi); ++ ++ bind(again); ++ ++ if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); ++ lld(AT, dest); ++ bne(AT, c_reg, nequal); ++ delayed()->nop(); ++ ++ //move(AT, x_reg); ++ daddu(AT, x_reg, R0); ++ scd(AT, dest); ++ beq(AT, R0, again); ++ delayed()->nop(); ++ b(done); ++ delayed()->nop(); ++ ++ // not xchged ++ bind(nequal); ++ sync(); ++ //move(c_reg, AT); ++ //move(AT, R0); ++ daddu(c_reg, AT, R0); ++ daddu(AT, R0, R0); ++ bind(done); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ assert_different_registers(tmp, fs, ft); ++ div_s(tmp, fs, ft); ++ trunc_l_s(tmp, tmp); ++ cvt_s_l(tmp, tmp); ++ mul_s(tmp, tmp, ft); ++ sub_s(fd, fs, tmp); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ assert_different_registers(tmp, fs, ft); ++ div_d(tmp, fs, ft); ++ trunc_l_d(tmp, tmp); ++ cvt_d_l(tmp, tmp); ++ mul_d(tmp, tmp, ft); ++ sub_d(fd, fs, tmp); ++} ++ ++#ifdef COMPILER2 ++// Fast_Lock and Fast_Unlock used by C2 ++ ++// Because the transitions from emitted code to the runtime ++// monitorenter/exit helper stubs are so slow it's critical that ++// we inline both the stack-locking fast-path and the inflated fast path. ++// ++// See also: cmpFastLock and cmpFastUnlock. ++// ++// What follows is a specialized inline transliteration of the code ++// in slow_enter() and slow_exit(). If we're concerned about I$ bloat ++// another option would be to emit TrySlowEnter and TrySlowExit methods ++// at startup-time. These methods would accept arguments as ++// (Obj, Self, box, Scratch) and return success-failure ++// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply ++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. ++// In practice, however, the # of lock sites is bounded and is usually small. ++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer ++// if the processor uses simple bimodal branch predictors keyed by EIP ++// Since the helper routines would be called from multiple synchronization ++// sites. ++// ++// An even better approach would be write "MonitorEnter()" and "MonitorExit()" ++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites ++// to those specialized methods. That'd give us a mostly platform-independent ++// implementation that the JITs could optimize and inline at their pleasure. ++// Done correctly, the only time we'd need to cross to native could would be ++// to park() or unpark() threads. We'd also need a few more unsafe operators ++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and ++// (b) explicit barriers or fence operations. ++// ++// TODO: ++// ++// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). ++// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. ++// Given TLAB allocation, Self is usually manifested in a register, so passing it into ++// the lock operators would typically be faster than reifying Self. ++// ++// * Ideally I'd define the primitives as: ++// fast_lock (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED. ++// fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED ++// Unfortunately ADLC bugs prevent us from expressing the ideal form. ++// Instead, we're stuck with a rather awkward and brittle register assignments below. ++// Furthermore the register assignments are overconstrained, possibly resulting in ++// sub-optimal code near the synchronization site. ++// ++// * Eliminate the sp-proximity tests and just use "== Self" tests instead. ++// Alternately, use a better sp-proximity test. ++// ++// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. ++// Either one is sufficient to uniquely identify a thread. ++// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. ++// ++// * Intrinsify notify() and notifyAll() for the common cases where the ++// object is locked by the calling thread but the waitlist is empty. ++// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). ++// ++// * use jccb and jmpb instead of jcc and jmp to improve code density. ++// But beware of excessive branch density on AMD Opterons. ++// ++// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success ++// or failure of the fast-path. If the fast-path fails then we pass ++// control to the slow-path, typically in C. In Fast_Lock and ++// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 ++// will emit a conditional branch immediately after the node. ++// So we have branches to branches and lots of ICC.ZF games. ++// Instead, it might be better to have C2 pass a "FailureLabel" ++// into Fast_Lock and Fast_Unlock. In the case of success, control ++// will drop through the node. ICC.ZF is undefined at exit. ++// In the case of failure, the node will branch directly to the ++// FailureLabel ++ ++ ++// obj: object to lock ++// box: on-stack box address (displaced header location) ++// tmp: tmp -- KILLED ++// scr: tmp -- KILLED ++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label IsInflated, DONE, DONE_SET; ++ ++ // Ensure the register assignents are disjoint ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg); ++ } ++ ++ if (EmitSync & 1) { ++ move(AT, 0x0); ++ return; ++ } else ++ if (EmitSync & 2) { ++ Label DONE_LABEL ; ++ if (UseBiasedLocking) { ++ // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); ++ } ++ ++ ld(tmpReg, Address(objReg, 0)) ; // fetch markword ++ ori(tmpReg, tmpReg, 0x1); ++ sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg ++ delayed()->nop(); ++ ++ // Recursive locking ++ dsubu(tmpReg, tmpReg, SP); ++ li(AT, (7 - os::vm_page_size() )); ++ andr(tmpReg, tmpReg, AT); ++ sd(tmpReg, Address(boxReg, 0)); ++ bind(DONE_LABEL) ; ++ } else { ++ // Possible cases that we'll encounter in fast_lock ++ // ------------------------------------------------ ++ // * Inflated ++ // -- unlocked ++ // -- Locked ++ // = by self ++ // = by other ++ // * biased ++ // -- by Self ++ // -- by other ++ // * neutral ++ // * stack-locked ++ // -- by self ++ // = sp-proximity test hits ++ // = sp-proximity test generates false-negative ++ // -- by other ++ // ++ ++ // TODO: optimize away redundant LDs of obj->mark and improve the markword triage ++ // order to reduce the number of conditional branches in the most common cases. ++ // Beware -- there's a subtle invariant that fetch of the markword ++ // at [FETCH], below, will never observe a biased encoding (*101b). ++ // If this invariant is not held we risk exclusion (safety) failure. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL); ++ b(fail); ++ delayed()->nop(); ++ bind(succ); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ bind(fail); ++ } ++ ++ ld(tmpReg, Address(objReg, 0)); //Fetch the markword of the object. ++ andi(AT, tmpReg, markOopDesc::monitor_value); ++ bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias ++ delayed()->nop(); ++ ++ // Attempt stack-locking ... ++ ori(tmpReg, tmpReg, markOopDesc::unlocked_value); ++ sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ ++ if (PrintBiasedLockingStatistics) { ++ Label SUCC, FAIL; ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg ++ bind(SUCC); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ bind(FAIL); ++ } else { ++ // If cmpxchg is succ, then scrReg = 1 ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg ++ } ++ ++ // Recursive locking ++ // The object is stack-locked: markword contains stack pointer to BasicLock. ++ // Locked by current thread if difference with current SP is less than one page. ++ dsubu(tmpReg, tmpReg, SP); ++ li(AT, 7 - os::vm_page_size()); ++ andr(tmpReg, tmpReg, AT); ++ sd(tmpReg, Address(boxReg, 0)); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ ++ bne(tmpReg, R0, L); ++ delayed()->nop(); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ bind(L); ++ } ++ b(DONE); ++ delayed()->sltiu(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0 ++ ++ bind(IsInflated); ++ // The object's monitor m is unlocked iff m->owner == NULL, ++ // otherwise m->owner may contain a thread or a stack address. ++ ++ // TODO: someday avoid the ST-before-CAS penalty by ++ // relocating (deferring) the following ST. ++ // We should also think about trying a CAS without having ++ // fetched _owner. If the CAS is successful we may ++ // avoid an RTO->RTS upgrade on the $line. ++ // Without cast to int32_t a movptr will destroy r10 which is typically obj ++ li(AT, (int32_t)intptr_t(markOopDesc::unused_mark())); ++ sd(AT, Address(boxReg, 0)); ++ ++ ld(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ // if (m->owner != 0) => AT = 0, goto slow path. ++ bne(AT, R0, DONE_SET); ++ delayed()->ori(scrReg, R0, 0); ++ ++#ifndef OPT_THREAD ++ get_thread(TREG); ++#endif ++ // It's inflated and appears unlocked ++ cmpxchg(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2), R0, TREG, scrReg, false, false) ; ++ // Intentional fall-through into DONE ... ++ ++ bind(DONE_SET); ++ move(resReg, scrReg); ++ ++ // DONE is a hot target - we'd really like to place it at the ++ // start of cache line by padding with NOPs. ++ // See the AMD and Intel software optimization manuals for the ++ // most efficient "long" NOP encodings. ++ // Unfortunately none of our alignment mechanisms suffice. ++ bind(DONE); ++ // At DONE the resReg is set as follows ... ++ // Fast_Unlock uses the same protocol. ++ // resReg == 1 -> Success ++ // resREg == 0 -> Failure - force control through the slow-path ++ ++ // Avoid branch-to-branch on AMD processors ++ // This appears to be superstition. ++ if (EmitSync & 32) nop() ; ++ ++ } ++} ++ ++// obj: object to unlock ++// box: box address (displaced header location), killed. ++// tmp: killed tmp; cannot be obj nor box. ++// ++// Some commentary on balanced locking: ++// ++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. ++// Methods that don't have provably balanced locking are forced to run in the ++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. ++// The interpreter provides two properties: ++// I1: At return-time the interpreter automatically and quietly unlocks any ++// objects acquired the current activation (frame). Recall that the ++// interpreter maintains an on-stack list of locks currently held by ++// a frame. ++// I2: If a method attempts to unlock an object that is not held by the ++// the frame the interpreter throws IMSX. ++// ++// Lets say A(), which has provably balanced locking, acquires O and then calls B(). ++// B() doesn't have provably balanced locking so it runs in the interpreter. ++// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O ++// is still locked by A(). ++// ++// The only other source of unbalanced locking would be JNI. The "Java Native Interface: ++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter ++// should not be unlocked by "normal" java-level locking and vice-versa. The specification ++// doesn't specify what will occur if a program engages in such mixed-mode locking, however. ++ ++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label DONE, DONE_SET, Stacked, Inflated; ++ ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastUnlock"); ++ ++ if (EmitSync & 4) { ++ // Disable - inhibit all inlining. Force control through the slow-path ++ move(AT, 0x0); ++ return; ++ } else ++ if (EmitSync & 8) { ++ Label DONE_LABEL ; ++ if (UseBiasedLocking) { ++ biased_locking_exit(objReg, tmpReg, DONE_LABEL); ++ } ++ // classic stack-locking code ... ++ ld(tmpReg, Address(boxReg, 0)) ; ++ beq(tmpReg, R0, DONE_LABEL) ; ++ move(AT, 0x1); // delay slot ++ ++ cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); ++ bind(DONE_LABEL); ++ } else { ++ Label CheckSucc; ++ ++ // Critically, the biased locking test must have precedence over ++ // and appear before the (box->dhw == 0) recursive stack-lock test. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_exit(objReg, tmpReg, succ); ++ b(fail); ++ delayed()->nop(); ++ bind(succ); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ bind(fail); ++ } ++ ++ ld(tmpReg, Address(boxReg, 0)); // Examine the displaced header ++ beq(tmpReg, R0, DONE_SET); // 0 indicates recursive stack-lock ++ delayed()->sltiu(AT, tmpReg, 1); ++ ++ ld(tmpReg, Address(objReg, 0)); // Examine the object's markword ++ andi(AT, tmpReg, markOopDesc::monitor_value); ++ beq(AT, R0, Stacked); // Inflated? ++ delayed()->nop(); ++ ++ bind(Inflated); ++ // It's inflated. ++ // Despite our balanced locking property we still check that m->_owner == Self ++ // as java routines or native JNI code called by this thread might ++ // have released the lock. ++ // Refer to the comments in synchronizer.cpp for how we might encode extra ++ // state in _succ so we can avoid fetching EntryList|cxq. ++ // ++ // I'd like to add more cases in fast_lock() and fast_unlock() -- ++ // such as recursive enter and exit -- but we have to be wary of ++ // I$ bloat, T$ effects and BP$ effects. ++ // ++ // If there's no contention try a 1-0 exit. That is, exit without ++ // a costly MEMBAR or CAS. See synchronizer.cpp for details on how ++ // we detect and recover from the race that the 1-0 exit admits. ++ // ++ // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier ++ // before it STs null into _owner, releasing the lock. Updates ++ // to data protected by the critical section must be visible before ++ // we drop the lock (and thus before any other thread could acquire ++ // the lock and observe the fields protected by the lock). ++#ifndef OPT_THREAD ++ get_thread(TREG); ++#endif ++ ++ // It's inflated ++ ld(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)) ; ++ xorr(scrReg, scrReg, TREG); ++ ++ ld(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2)) ; ++ orr(scrReg, scrReg, AT); ++ ++ bne(scrReg, R0, DONE_SET); ++ delayed()->ori(AT, R0, 0); ++ ++ ld(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2)); ++ ld(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2)); ++ orr(scrReg, scrReg, AT); ++ ++ bne(scrReg, R0, DONE_SET); ++ delayed()->ori(AT, R0, 0); ++ ++ sync(); ++ sd(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ ++ bind(Stacked); ++ ld(tmpReg, Address(boxReg, 0)); ++ cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); ++ ++ bind(DONE_SET); ++ move(resReg, AT); ++ ++ if (EmitSync & 65536) { ++ bind (CheckSucc); ++ } ++ ++ bind(DONE); ++ ++ // Avoid branch to branch on AMD processors ++ if (EmitSync & 32768) { nop() ; } ++ } ++} ++#endif // COMPILER2 ++ ++void MacroAssembler::align(int modulus) { ++ while (offset() % modulus != 0) nop(); ++} ++ ++ ++void MacroAssembler::verify_FPU(int stack_depth, const char* s) { ++ //Unimplemented(); ++} ++ ++Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; ++Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; ++ ++//In MIPS64, F0~23 are all caller-saved registers ++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13}; ++ ++// We preserve all caller-saved register ++void MacroAssembler::pushad(){ ++ int i; ++ ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) ++ { ++ sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) ++ { ++ sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++}; ++ ++void MacroAssembler::popad(){ ++ int i; ++ ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++}; ++ ++// We preserve all caller-saved register except V0 ++void MacroAssembler::pushad_except_v0() { ++ int i; ++ ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++} ++ ++void MacroAssembler::popad_except_v0() { ++ int i; ++ ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) { ++ ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ for (i = 0; i < len; i++) { ++ ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++} ++ ++void MacroAssembler::push2(Register reg1, Register reg2) { ++ daddiu(SP, SP, -16); ++ sd(reg1, SP, 8); ++ sd(reg2, SP, 0); ++} ++ ++void MacroAssembler::pop2(Register reg1, Register reg2) { ++ ld(reg1, SP, 8); ++ ld(reg2, SP, 0); ++ daddiu(SP, SP, 16); ++} ++ ++// for UseCompressedOops Option ++void MacroAssembler::load_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); ++ decode_klass_not_null(dst); ++ } else ++ ld(dst, src, oopDesc::klass_offset_in_bytes()); ++} ++ ++void MacroAssembler::store_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ encode_klass_not_null(src); ++ sw(src, dst, oopDesc::klass_offset_in_bytes()); ++ } else { ++ sd(src, dst, oopDesc::klass_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::load_prototype_header(Register dst, Register src) { ++ load_klass(dst, src); ++ ld(dst, Address(dst, Klass::prototype_header_offset())); ++} ++ ++void MacroAssembler::store_klass_gap(Register dst, Register src) { ++ if (UseCompressedClassPointers) { ++ sw(src, dst, oopDesc::klass_gap_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, ++ Register tmp1, Register thread_tmp) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } else { ++ bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } ++} ++ ++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, ++ Register tmp1, Register tmp2) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2); ++ } else { ++ bs->store_at(this, decorators, type, dst, src, tmp1, tmp2); ++ } ++} ++ ++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); ++} ++ ++// Doesn't do verfication, generates fixed size code ++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp); ++} ++ ++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, ++ Register tmp2, DecoratorSet decorators) { ++ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); ++} ++ ++// Used for storing NULLs. ++void MacroAssembler::store_heap_oop_null(Address dst) { ++ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); ++} ++ ++#ifdef ASSERT ++void MacroAssembler::verify_heapbase(const char* msg) { ++ assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++} ++#endif ++ ++ ++// Algorithm must match oop.inline.hpp encode_heap_oop. ++void MacroAssembler::encode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++ return; ++ } ++ ++ movz(r, S5_heapbase, r); ++ dsubu(r, r, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ dsrl(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) move(dst, src); ++ } ++ } else { ++ if (dst == src) { ++ movz(dst, S5_heapbase, dst); ++ dsubu(dst, dst, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ dsubu(dst, src, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++ movz(dst, R0, src); ++ } ++ } ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register r) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(r, R0, ok); ++ delayed()->nop(); ++ stop("null oop passed to encode_heap_oop_not_null"); ++ bind(ok); ++ } ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop_not_null"); ++ if (Universe::narrow_oop_base() != NULL) { ++ dsubu(r, r, S5_heapbase); ++ } ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++ ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(src, R0, ok); ++ delayed()->nop(); ++ stop("null oop passed to encode_heap_oop_not_null2"); ++ bind(ok); ++ } ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop_not_null2"); ++ ++ if (Universe::narrow_oop_base() != NULL) { ++ dsubu(dst, src, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ dsrl(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) move(dst, src); ++ } ++ } ++} ++ ++void MacroAssembler::decode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ move(AT, r); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ daddu(r, r, S5_heapbase); ++ movz(r, R0, AT); ++ } ++ verify_oop(r, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (dst != src) nop(); // DON'T DELETE THIS GUY. ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) move(dst, src); ++ } ++ } else { ++ if (dst == src) { ++ move(AT, dst); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(dst, LogMinObjAlignmentInBytes); ++ } ++ daddu(dst, dst, S5_heapbase); ++ movz(dst, R0, AT); ++ } else { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ daddu(dst, dst, S5_heapbase); ++ } else { ++ daddu(dst, src, S5_heapbase); ++ } ++ movz(dst, R0, src); ++ } ++ } ++ verify_oop(dst, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register r) { ++ // Note: it will change flags ++ assert (UseCompressedOops, "should only be used for compressed headers"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ if (Universe::narrow_oop_base() != NULL) { ++ daddu(r, r, S5_heapbase); ++ } ++ } else { ++ assert (Universe::narrow_oop_base() == NULL, "sanity"); ++ } ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { ++ assert (UseCompressedOops, "should only be used for compressed headers"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++ ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ //lea(dst, Address(S5_heapbase, src, Address::times_8, 0)); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (LogMinObjAlignmentInBytes == Address::times_8) { ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ daddu(dst, dst, S5_heapbase); ++ } else { ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ if (Universe::narrow_oop_base() != NULL) { ++ daddu(dst, dst, S5_heapbase); ++ } ++ } ++ } else { ++ assert (Universe::narrow_oop_base() == NULL, "sanity"); ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register r) { ++ if (Universe::narrow_klass_base() != NULL) { ++ assert(r != AT, "Encoding a klass in AT"); ++ set64(AT, (int64_t)Universe::narrow_klass_base()); ++ dsubu(r, r, AT); ++ } ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shr(r, LogKlassAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register dst, Register src) { ++ if (dst == src) { ++ encode_klass_not_null(src); ++ } else { ++ if (Universe::narrow_klass_base() != NULL) { ++ set64(dst, (int64_t)Universe::narrow_klass_base()); ++ dsubu(dst, src, dst); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shr(dst, LogKlassAlignmentInBytes); ++ } ++ } else { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ dsrl(dst, src, LogKlassAlignmentInBytes); ++ } else { ++ move(dst, src); ++ } ++ } ++ } ++} ++ ++// Function instr_size_for_decode_klass_not_null() counts the instructions ++// generated by decode_klass_not_null(register r) and reinit_heapbase(), ++// when (Universe::heap() != NULL). Hence, if the instructions they ++// generate change, then this method needs to be updated. ++int MacroAssembler::instr_size_for_decode_klass_not_null() { ++ assert (UseCompressedClassPointers, "only for compressed klass ptrs"); ++ if (Universe::narrow_klass_base() != NULL) { ++ // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). ++ return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10); ++ } else { ++ // longest load decode klass function, mov64, leaq ++ return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1); ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register r) { ++ assert (UseCompressedClassPointers, "should only be used for compressed headers"); ++ assert(r != AT, "Decoding a klass in AT"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shl(r, LogKlassAlignmentInBytes); ++ } ++ if (Universe::narrow_klass_base() != NULL) { ++ set64(AT, (int64_t)Universe::narrow_klass_base()); ++ daddu(r, r, AT); ++ //Not neccessary for MIPS at all. ++ //reinit_heapbase(); ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register dst, Register src) { ++ assert (UseCompressedClassPointers, "should only be used for compressed headers"); ++ ++ if (dst == src) { ++ decode_klass_not_null(dst); ++ } else { ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ set64(dst, (int64_t)Universe::narrow_klass_base()); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); ++ dsll(AT, src, Address::times_8); ++ daddu(dst, dst, AT); ++ } else { ++ daddu(dst, src, dst); ++ } ++ } ++} ++ ++void MacroAssembler::incrementl(Register reg, int value) { ++ if (value == min_jint) { ++ move(AT, value); ++ addu32(reg, reg, AT); ++ return; ++ } ++ if (value < 0) { decrementl(reg, -value); return; } ++ if (value == 0) { ; return; } ++ ++ move(AT, value); ++ addu32(reg, reg, AT); ++} ++ ++void MacroAssembler::decrementl(Register reg, int value) { ++ if (value == min_jint) { ++ move(AT, value); ++ subu32(reg, reg, AT); ++ return; ++ } ++ if (value < 0) { incrementl(reg, -value); return; } ++ if (value == 0) { ; return; } ++ ++ move(AT, value); ++ subu32(reg, reg, AT); ++} ++ ++void MacroAssembler::reinit_heapbase() { ++ if (UseCompressedOops || UseCompressedClassPointers) { ++ if (Universe::heap() != NULL) { ++ if (Universe::narrow_oop_base() == NULL) { ++ move(S5_heapbase, R0); ++ } else { ++ set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base()); ++ } ++ } else { ++ set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr()); ++ ld(S5_heapbase, S5_heapbase, 0); ++ } ++ } ++} ++ ++void MacroAssembler::check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success) { ++//implement ind gen_subtype_check ++ Label L_failure; ++ check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); ++ check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); ++ bind(L_failure); ++} ++ ++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset) { ++ assert_different_registers(sub_klass, super_klass, temp_reg); ++ bool must_load_sco = (super_check_offset.constant_or_zero() == -1); ++ if (super_check_offset.is_register()) { ++ assert_different_registers(sub_klass, super_klass, ++ super_check_offset.as_register()); ++ } else if (must_load_sco) { ++ assert(temp_reg != noreg, "supply either a temp or a register offset"); ++ } ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ // If the pointers are equal, we are done (e.g., String[] elements). ++ // This self-check enables sharing of secondary supertype arrays among ++ // non-primary types such as array-of-interface. Otherwise, each such ++ // type would need its own customized SSA. ++ // We move this check to the front of the fast path because many ++ // type checks are in fact trivially successful in this manner, ++ // so we get a nicely predicted branch right at the start of the check. ++ beq(sub_klass, super_klass, *L_success); ++ delayed()->nop(); ++ // Check the supertype display: ++ if (must_load_sco) { ++ lwu(temp_reg, super_klass, sco_offset); ++ super_check_offset = RegisterOrConstant(temp_reg); ++ } ++ daddu(AT, sub_klass, super_check_offset.register_or_noreg()); ++ ld(AT, AT, super_check_offset.constant_or_zero()); ++ ++ // This check has worked decisively for primary supers. ++ // Secondary supers are sought in the super_cache ('super_cache_addr'). ++ // (Secondary supers are interfaces and very deeply nested subtypes.) ++ // This works in the same check above because of a tricky aliasing ++ // between the super_cache and the primary super display elements. ++ // (The 'super_check_addr' can address either, as the case requires.) ++ // Note that the cache is updated below if it does not help us find ++ // what we need immediately. ++ // So if it was a primary super, we can just fail immediately. ++ // Otherwise, it's the slow path for us (no success at this point). ++ ++ if (super_check_offset.is_register()) { ++ beq(super_klass, AT, *L_success); ++ delayed()->nop(); ++ addiu(AT, super_check_offset.as_register(), -sc_offset); ++ if (L_failure == &L_fallthrough) { ++ beq(AT, R0, *L_slow_path); ++ delayed()->nop(); ++ } else { ++ bne_far(AT, R0, *L_failure); ++ delayed()->nop(); ++ b(*L_slow_path); ++ delayed()->nop(); ++ } ++ } else if (super_check_offset.as_constant() == sc_offset) { ++ // Need a slow path; fast failure is impossible. ++ if (L_slow_path == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ delayed()->nop(); ++ } else { ++ bne(super_klass, AT, *L_slow_path); ++ delayed()->nop(); ++ b(*L_success); ++ delayed()->nop(); ++ } ++ } else { ++ // No slow path; it's a fast decision. ++ if (L_failure == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ delayed()->nop(); ++ } else { ++ bne_far(super_klass, AT, *L_failure); ++ delayed()->nop(); ++ b(*L_success); ++ delayed()->nop(); ++ } ++ } ++ ++ bind(L_fallthrough); ++ ++} ++ ++ ++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes) { ++ if (temp2_reg == noreg) ++ temp2_reg = TSR; ++ assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); ++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ // a couple of useful fields in sub_klass: ++ int ss_offset = in_bytes(Klass::secondary_supers_offset()); ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ Address secondary_supers_addr(sub_klass, ss_offset); ++ Address super_cache_addr( sub_klass, sc_offset); ++ ++ // Do a linear scan of the secondary super-klass chain. ++ // This code is rarely used, so simplicity is a virtue here. ++ // The repne_scan instruction uses fixed registers, which we must spill. ++ // Don't worry too much about pre-existing connections with the input regs. ++ ++#ifndef PRODUCT ++ int* pst_counter = &SharedRuntime::_partial_subtype_ctr; ++ ExternalAddress pst_counter_addr((address) pst_counter); ++#endif //PRODUCT ++ ++ // We will consult the secondary-super array. ++ ld(temp_reg, secondary_supers_addr); ++ // Load the array length. ++ lw(temp2_reg, Address(temp_reg, Array::length_offset_in_bytes())); ++ // Skip to start of data. ++ daddiu(temp_reg, temp_reg, Array::base_offset_in_bytes()); ++ ++ // OpenJDK8 never compresses klass pointers in secondary-super array. ++ Label Loop, subtype; ++ bind(Loop); ++ beq(temp2_reg, R0, *L_failure); ++ delayed()->nop(); ++ ld(AT, temp_reg, 0); ++ beq(AT, super_klass, subtype); ++ delayed()->daddiu(temp_reg, temp_reg, 1 * wordSize); ++ b(Loop); ++ delayed()->daddiu(temp2_reg, temp2_reg, -1); ++ ++ bind(subtype); ++ sd(super_klass, super_cache_addr); ++ if (L_success != &L_fallthrough) { ++ b(*L_success); ++ delayed()->nop(); ++ } ++ ++ // Success. Cache the super we found and proceed in triumph. ++#undef IS_A_TEMP ++ ++ bind(L_fallthrough); ++} ++ ++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { ++ ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); ++ sd(R0, Address(java_thread, JavaThread::vm_result_offset())); ++ verify_oop(oop_result, "broken oop in call_VM_base"); ++} ++ ++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { ++ ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); ++ sd(R0, Address(java_thread, JavaThread::vm_result_2_offset())); ++} ++ ++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, ++ int extra_slot_offset) { ++ // cf. TemplateTable::prepare_invoke(), if (load_receiver). ++ int stackElementSize = Interpreter::stackElementSize; ++ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); ++#ifdef ASSERT ++ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); ++ assert(offset1 - offset == stackElementSize, "correct arithmetic"); ++#endif ++ Register scale_reg = NOREG; ++ Address::ScaleFactor scale_factor = Address::no_scale; ++ if (arg_slot.is_constant()) { ++ offset += arg_slot.as_constant() * stackElementSize; ++ } else { ++ scale_reg = arg_slot.as_register(); ++ scale_factor = Address::times_8; ++ } ++ // We don't push RA on stack in prepare_invoke. ++ // offset += wordSize; // return PC is on stack ++ if(scale_reg==NOREG) return Address(SP, offset); ++ else { ++ dsll(scale_reg, scale_reg, scale_factor); ++ daddu(scale_reg, SP, scale_reg); ++ return Address(scale_reg, offset); ++ } ++} ++ ++SkipIfEqual::~SkipIfEqual() { ++ _masm->bind(_label); ++} ++ ++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { ++ switch (size_in_bytes) { ++ case 8: ld(dst, src); break; ++ case 4: lw(dst, src); break; ++ case 2: is_signed ? lh(dst, src) : lhu(dst, src); break; ++ case 1: is_signed ? lb( dst, src) : lbu( dst, src); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { ++ switch (size_in_bytes) { ++ case 8: sd(src, dst); break; ++ case 4: sw(src, dst); break; ++ case 2: sh(src, dst); break; ++ case 1: sb(src, dst); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++// Look up the method for a megamorphic invokeinterface call. ++// The target method is determined by . ++// The receiver klass is in recv_klass. ++// On success, the result will be in method_result, and execution falls through. ++// On failure, execution transfers to the given label. ++void MacroAssembler::lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& L_no_such_interface, ++ bool return_method) { ++ assert_different_registers(recv_klass, intf_klass, scan_temp, AT); ++ assert_different_registers(method_result, intf_klass, scan_temp, AT); ++ assert(recv_klass != method_result || !return_method, ++ "recv_klass can be destroyed when method isn't needed"); ++ ++ assert(itable_index.is_constant() || itable_index.as_register() == method_result, ++ "caller must use same register for non-constant itable index as for method"); ++ ++ // Compute start of first itableOffsetEntry (which is at the end of the vtable) ++ int vtable_base = in_bytes(Klass::vtable_start_offset()); ++ int itentry_off = itableMethodEntry::method_offset_in_bytes(); ++ int scan_step = itableOffsetEntry::size() * wordSize; ++ int vte_size = vtableEntry::size() * wordSize; ++ Address::ScaleFactor times_vte_scale = Address::times_ptr; ++ assert(vte_size == wordSize, "else adjust times_vte_scale"); ++ ++ lw(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); ++ ++ // %%% Could store the aligned, prescaled offset in the klassoop. ++ dsll(scan_temp, scan_temp, times_vte_scale); ++ daddu(scan_temp, recv_klass, scan_temp); ++ daddiu(scan_temp, scan_temp, vtable_base); ++ if (HeapWordsPerLong > 1) { ++ // Round up to align_object_offset boundary ++ // see code for InstanceKlass::start_of_itable! ++ round_to(scan_temp, BytesPerLong); ++ } ++ ++ if (return_method) { ++ // Adjust recv_klass by scaled itable_index, so we can free itable_index. ++ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ if (itable_index.is_constant()) { ++ set64(AT, (int)itable_index.is_constant()); ++ dsll(AT, AT, (int)Address::times_ptr); ++ } else { ++ dsll(AT, itable_index.as_register(), (int)Address::times_ptr); ++ } ++ daddu(AT, AT, recv_klass); ++ daddiu(recv_klass, AT, itentry_off); ++ } ++ ++ Label search, found_method; ++ ++ for (int peel = 1; peel >= 0; peel--) { ++ ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); ++ ++ if (peel) { ++ beq(intf_klass, method_result, found_method); ++ delayed()->nop(); ++ } else { ++ bne(intf_klass, method_result, search); ++ delayed()->nop(); ++ // (invert the test to fall through to found_method...) ++ } ++ ++ if (!peel) break; ++ ++ bind(search); ++ ++ // Check that the previous entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ beq(method_result, R0, L_no_such_interface); ++ delayed()->nop(); ++ daddiu(scan_temp, scan_temp, scan_step); ++ } ++ ++ bind(found_method); ++ ++ if (return_method) { ++ // Got a hit. ++ lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); ++ if (UseLEXT1) { ++ gsldx(method_result, recv_klass, scan_temp, 0); ++ } else { ++ daddu(AT, recv_klass, scan_temp); ++ ld(method_result, AT, 0); ++ } ++ } ++} ++ ++// virtual method calling ++void MacroAssembler::lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result) { ++ Register tmp = GP; ++ push(tmp); ++ ++ if (vtable_index.is_constant()) { ++ assert_different_registers(recv_klass, method_result, tmp); ++ } else { ++ assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp); ++ } ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); ++ if (vtable_index.is_constant()) { ++ set64(AT, vtable_index.as_constant()); ++ dsll(AT, AT, (int)Address::times_ptr); ++ } else { ++ dsll(AT, vtable_index.as_register(), (int)Address::times_ptr); ++ } ++ set64(tmp, base + vtableEntry::method_offset_in_bytes()); ++ daddu(tmp, tmp, AT); ++ daddu(tmp, tmp, recv_klass); ++ ld(method_result, tmp, 0); ++ ++ pop(tmp); ++} ++ ++void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) { ++ switch (type) { ++ case T_LONG: ++ st_ptr(src_reg, tmp_reg, disp); ++ break; ++ case T_ARRAY: ++ case T_OBJECT: ++ if (UseCompressedOops && !wide) { ++ sw(src_reg, tmp_reg, disp); ++ } else { ++ st_ptr(src_reg, tmp_reg, disp); ++ } ++ break; ++ case T_ADDRESS: ++ st_ptr(src_reg, tmp_reg, disp); ++ break; ++ case T_INT: ++ sw(src_reg, tmp_reg, disp); ++ break; ++ case T_CHAR: ++ case T_SHORT: ++ sh(src_reg, tmp_reg, disp); ++ break; ++ case T_BYTE: ++ case T_BOOLEAN: ++ sb(src_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) { ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } ++} ++ ++void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) { ++ switch (type) { ++ case T_DOUBLE: ++ sdc1(src_reg, tmp_reg, disp); ++ break; ++ case T_FLOAT: ++ swc1(src_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) { ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } ++} ++ ++void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) { ++ switch (type) { ++ case T_LONG: ++ ld_ptr(dst_reg, tmp_reg, disp); ++ break; ++ case T_ARRAY: ++ case T_OBJECT: ++ if (UseCompressedOops && !wide) { ++ lwu(dst_reg, tmp_reg, disp); ++ } else { ++ ld_ptr(dst_reg, tmp_reg, disp); ++ } ++ break; ++ case T_ADDRESS: ++ if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) { ++ lwu(dst_reg, tmp_reg, disp); ++ } else { ++ ld_ptr(dst_reg, tmp_reg, disp); ++ } ++ break; ++ case T_INT: ++ lw(dst_reg, tmp_reg, disp); ++ break; ++ case T_CHAR: ++ lhu(dst_reg, tmp_reg, disp); ++ break; ++ case T_SHORT: ++ lh(dst_reg, tmp_reg, disp); ++ break; ++ case T_BYTE: ++ case T_BOOLEAN: ++ lb(dst_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) { ++ int code_offset = 0; ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } ++ ++ return code_offset; ++} ++ ++#ifdef COMPILER2 ++// Compare strings, used for char[] and byte[]. ++void MacroAssembler::string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ int ae) { ++ Label L, Loop, haveResult, done; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ bool isLU = ae == StrIntrinsicNode::LU; ++ bool isUL = ae == StrIntrinsicNode::UL; ++ ++ bool str1_isL = isLL || isLU; ++ bool str2_isL = isLL || isUL; ++ ++ if (!str1_isL) srl(cnt1, cnt1, 1); ++ if (!str2_isL) srl(cnt2, cnt2, 1); ++ ++ // compute the and difference of lengths (in result) ++ subu(result, cnt1, cnt2); // result holds the difference of two lengths ++ ++ // compute the shorter length (in cnt1) ++ slt(AT, cnt2, cnt1); ++ movn(cnt1, cnt2, AT); ++ ++ // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register ++ bind(Loop); // Loop begin ++ beq(cnt1, R0, done); ++ if (str1_isL) { ++ delayed()->lbu(AT, str1, 0); ++ } else { ++ delayed()->lhu(AT, str1, 0); ++ } ++ ++ // compare current character ++ if (str2_isL) { ++ lbu(cnt2, str2, 0); ++ } else { ++ lhu(cnt2, str2, 0); ++ } ++ bne(AT, cnt2, haveResult); ++ delayed()->addiu(str1, str1, str1_isL ? 1 : 2); ++ addiu(str2, str2, str2_isL ? 1 : 2); ++ b(Loop); ++ delayed()->addiu(cnt1, cnt1, -1); // Loop end ++ ++ bind(haveResult); ++ subu(result, AT, cnt2); ++ ++ bind(done); ++} ++ ++// Compare char[] or byte[] arrays or substrings. ++void MacroAssembler::arrays_equals(Register str1, Register str2, ++ Register cnt, Register tmp, Register result, ++ bool is_char) { ++ Label Loop, True, False; ++ ++ beq(str1, str2, True); // same char[] ? ++ delayed()->daddiu(result, R0, 1); ++ ++ beq(cnt, R0, True); ++ delayed()->nop(); // count == 0 ++ ++ bind(Loop); ++ ++ // compare current character ++ if (is_char) { ++ lhu(AT, str1, 0); ++ lhu(tmp, str2, 0); ++ } else { ++ lbu(AT, str1, 0); ++ lbu(tmp, str2, 0); ++ } ++ bne(AT, tmp, False); ++ delayed()->addiu(str1, str1, is_char ? 2 : 1); ++ addiu(cnt, cnt, -1); ++ bne(cnt, R0, Loop); ++ delayed()->addiu(str2, str2, is_char ? 2 : 1); ++ ++ b(True); ++ delayed()->nop(); ++ ++ bind(False); ++ daddiu(result, R0, 0); ++ ++ bind(True); ++} ++#endif // COMPILER2 ++ ++void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) { ++ switch (type) { ++ case T_DOUBLE: ++ ldc1(dst_reg, tmp_reg, disp); ++ break; ++ case T_FLOAT: ++ lwc1(dst_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) { ++ int code_offset = 0; ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } ++ ++ return code_offset; ++} ++ ++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { ++ const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); ++ STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code ++ // The inverted mask is sign-extended ++ move(AT, inverted_jweak_mask); ++ andr(possibly_jweak, AT, possibly_jweak); ++} ++ ++void MacroAssembler::resolve_jobject(Register value, ++ Register thread, ++ Register tmp) { ++ assert_different_registers(value, thread, tmp); ++ Label done, not_weak; ++ beq(value, R0, done); // Use NULL as-is. ++ delayed()->nop(); ++ move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag. ++ andr(AT, value, AT); ++ beq(AT, R0, not_weak); ++ delayed()->nop(); ++ // Resolve jweak. ++ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, ++ value, Address(value, -JNIHandles::weak_tag_value), tmp, thread); ++ verify_oop(value); ++ b(done); ++ delayed()->nop(); ++ bind(not_weak); ++ // Resolve (untagged) jobject. ++ access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); ++ verify_oop(value); ++ bind(done); ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp, ++ bool is_signed) { ++ switch (cmp) { ++ case EQ: ++ subu(AT, op1, op2); ++ movz(dst, src, AT); ++ break; ++ ++ case NE: ++ subu(AT, op1, op2); ++ movn(dst, src, AT); ++ break; ++ ++ case GT: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ movn(dst, src, AT); ++ break; ++ ++ case GE: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ movz(dst, src, AT); ++ break; ++ ++ case LT: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ movn(dst, src, AT); ++ break; ++ ++ case LE: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ movz(dst, src, AT); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ CMCompare cmp, ++ bool is_float) { ++ switch(cmp) { ++ case EQ: ++ if (is_float) { ++ c_eq_s(op1, op2); ++ } else { ++ c_eq_d(op1, op2); ++ } ++ movt(dst, src); ++ break; ++ ++ case NE: ++ if (is_float) { ++ c_eq_s(op1, op2); ++ } else { ++ c_eq_d(op1, op2); ++ } ++ movf(dst, src); ++ break; ++ ++ case GT: ++ if (is_float) { ++ c_ule_s(op1, op2); ++ } else { ++ c_ule_d(op1, op2); ++ } ++ movf(dst, src); ++ break; ++ ++ case GE: ++ if (is_float) { ++ c_ult_s(op1, op2); ++ } else { ++ c_ult_d(op1, op2); ++ } ++ movf(dst, src); ++ break; ++ ++ case LT: ++ if (is_float) { ++ c_ult_s(op1, op2); ++ } else { ++ c_ult_d(op1, op2); ++ } ++ movt(dst, src); ++ break; ++ ++ case LE: ++ if (is_float) { ++ c_ule_s(op1, op2); ++ } else { ++ c_ule_d(op1, op2); ++ } ++ movt(dst, src); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp, ++ bool is_float) { ++ switch(cmp) { ++ case EQ: ++ if (!is_float) { ++ c_eq_d(op1, op2); ++ movt_d(dst, src); ++ } else { ++ c_eq_s(op1, op2); ++ movt_s(dst, src); ++ } ++ break; ++ ++ case NE: ++ if (!is_float) { ++ c_eq_d(op1, op2); ++ movf_d(dst, src); ++ } else { ++ c_eq_s(op1, op2); ++ movf_s(dst, src); ++ } ++ break; ++ ++ case GT: ++ if (!is_float) { ++ c_ule_d(op1, op2); ++ movf_d(dst, src); ++ } else { ++ c_ule_s(op1, op2); ++ movf_s(dst, src); ++ } ++ break; ++ ++ case GE: ++ if (!is_float) { ++ c_ult_d(op1, op2); ++ movf_d(dst, src); ++ } else { ++ c_ult_s(op1, op2); ++ movf_s(dst, src); ++ } ++ break; ++ ++ case LT: ++ if (!is_float) { ++ c_ult_d(op1, op2); ++ movt_d(dst, src); ++ } else { ++ c_ult_s(op1, op2); ++ movt_s(dst, src); ++ } ++ break; ++ ++ case LE: ++ if (!is_float) { ++ c_ule_d(op1, op2); ++ movt_d(dst, src); ++ } else { ++ c_ule_s(op1, op2); ++ movt_s(dst, src); ++ } ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp, ++ bool is_float) { ++ Label L; ++ ++ switch(cmp) { ++ case EQ: ++ bne(op1, op2, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case NE: ++ beq(op1, op2, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case GT: ++ slt(AT, op2, op1); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case GE: ++ slt(AT, op1, op2); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case LT: ++ slt(AT, op1, op2); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case LE: ++ slt(AT, op2, op1); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::gs_loadstore(Register reg, Register base, Register index, int disp, int type) { ++ switch (type) { ++ case STORE_BYTE: ++ gssbx(reg, base, index, disp); ++ break; ++ case STORE_CHAR: ++ case STORE_SHORT: ++ gsshx(reg, base, index, disp); ++ break; ++ case STORE_INT: ++ gsswx(reg, base, index, disp); ++ break; ++ case STORE_LONG: ++ gssdx(reg, base, index, disp); ++ break; ++ case LOAD_BYTE: ++ gslbx(reg, base, index, disp); ++ break; ++ case LOAD_SHORT: ++ gslhx(reg, base, index, disp); ++ break; ++ case LOAD_INT: ++ gslwx(reg, base, index, disp); ++ break; ++ case LOAD_LONG: ++ gsldx(reg, base, index, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: ++ gsswxc1(reg, base, index, disp); ++ break; ++ case STORE_DOUBLE: ++ gssdxc1(reg, base, index, disp); ++ break; ++ case LOAD_FLOAT: ++ gslwxc1(reg, base, index, disp); ++ break; ++ case LOAD_DOUBLE: ++ gsldxc1(reg, base, index, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_BYTE: ++ sb(reg, base, disp); ++ break; ++ case STORE_CHAR: ++ case STORE_SHORT: ++ sh(reg, base, disp); ++ break; ++ case STORE_INT: ++ sw(reg, base, disp); ++ break; ++ case STORE_LONG: ++ sd(reg, base, disp); ++ break; ++ case LOAD_BYTE: ++ lb(reg, base, disp); ++ break; ++ case LOAD_U_BYTE: ++ lbu(reg, base, disp); ++ break; ++ case LOAD_SHORT: ++ lh(reg, base, disp); ++ break; ++ case LOAD_U_SHORT: ++ lhu(reg, base, disp); ++ break; ++ case LOAD_INT: ++ lw(reg, base, disp); ++ break; ++ case LOAD_U_INT: ++ lwu(reg, base, disp); ++ break; ++ case LOAD_LONG: ++ ld(reg, base, disp); ++ break; ++ case LOAD_LINKED_LONG: ++ lld(reg, base, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: ++ swc1(reg, base, disp); ++ break; ++ case STORE_DOUBLE: ++ sdc1(reg, base, disp); ++ break; ++ case LOAD_FLOAT: ++ lwc1(reg, base, disp); ++ break; ++ case LOAD_DOUBLE: ++ ldc1(reg, base, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/macroAssembler_mips.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.hpp +--- a/src/hotspot/cpu/mips/macroAssembler_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/macroAssembler_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,818 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP ++ ++#include "asm/assembler.hpp" ++#include "runtime/rtmLocking.hpp" ++#include "utilities/macros.hpp" ++ ++// MacroAssembler extends Assembler by frequently used macros. ++// ++// Instructions for which a 'better' code sequence exists depending ++// on arguments should also go in here. ++ ++class MacroAssembler: public Assembler { ++ friend class LIR_Assembler; ++ friend class Runtime1; // as_Address() ++ ++ public: ++ // Compare code ++ typedef enum { ++ EQ = 0x01, ++ NE = 0x02, ++ GT = 0x03, ++ GE = 0x04, ++ LT = 0x05, ++ LE = 0x06 ++ } CMCompare; ++ ++ protected: ++ ++ // Support for VM calls ++ // ++ // This is the base routine called by the different versions of call_VM_leaf. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ #define VIRTUAL virtual ++ ++ VIRTUAL void call_VM_leaf_base( ++ address entry_point, // the entry point ++ int number_of_arguments // the number of arguments to pop after the call ++ ); ++ ++ // This is the base routine called by the different versions of call_VM. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ // ++ // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base ++ // returns the register which contains the thread upon return. If a thread register has been ++ // specified, the return value will correspond to that register. If no last_java_sp is specified ++ // (noreg) than sp will be used instead. ++ VIRTUAL void call_VM_base( // returns the register containing the thread upon return ++ Register oop_result, // where an oop-result ends up if any; use noreg otherwise ++ Register java_thread, // the thread if computed before ; use noreg otherwise ++ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call ++ bool check_exceptions // whether to check for pending exceptions after return ++ ); ++ ++ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); ++ ++ // helpers for FPU flag access ++ // tmp is a temporary register, if none is available use noreg ++ ++ public: ++ MacroAssembler(CodeBuffer* code) : Assembler(code) {} ++ ++ // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. ++ // The implementation is only non-empty for the InterpreterMacroAssembler, ++ // as only the interpreter handles PopFrame and ForceEarlyReturn requests. ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ Address as_Address(AddressLiteral adr); ++ Address as_Address(ArrayAddress adr); ++ ++ static intptr_t i[32]; ++ static float f[32]; ++ static void print(outputStream *s); ++ ++ static int i_offset(unsigned int k); ++ static int f_offset(unsigned int k); ++ ++ static void save_registers(MacroAssembler *masm); ++ static void restore_registers(MacroAssembler *masm); ++ ++ // Support for NULL-checks ++ // ++ // Generates code that causes a NULL OS exception if the content of reg is NULL. ++ // If the accessed location is M[reg + offset] and the offset is known, provide the ++ // offset. No explicit code generation is needed if the offset is within a certain ++ // range (0 <= offset <= page_size). ++ ++ void null_check(Register reg, int offset = -1); ++ static bool needs_explicit_null_check(intptr_t offset); ++ ++ // Required platform-specific helpers for Label::patch_instructions. ++ // They _shadow_ the declarations in AbstractAssembler, which are undefined. ++ void pd_patch_instruction(address branch, address target); ++ ++ address emit_trampoline_stub(int insts_call_instruction_offset, address target); ++ ++ // Support for inc/dec with optimal instruction selection depending on value ++ void incrementl(Register reg, int value = 1); ++ void decrementl(Register reg, int value = 1); ++ ++ ++ // Alignment ++ void align(int modulus); ++ ++ ++ // Stack frame creation/removal ++ void enter(); ++ void leave(); ++ ++ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) ++ // The pointer will be loaded into the thread register. ++ void get_thread(Register thread); ++ ++ ++ // Support for VM calls ++ // ++ // It is imperative that all calls into the VM are handled via the call_VM macros. ++ // They make sure that the stack linkage is setup correctly. call_VM's correspond ++ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. ++ ++ ++ void call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ // Overloadings with last_Java_sp ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments = 0, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, bool ++ check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ void get_vm_result (Register oop_result, Register thread); ++ void get_vm_result_2(Register metadata_result, Register thread); ++ void call_VM_leaf(address entry_point, ++ int number_of_arguments = 0); ++ void call_VM_leaf(address entry_point, ++ Register arg_1); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2, Register arg_3); ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void super_call_VM_leaf(address entry_point); ++ void super_call_VM_leaf(address entry_point, Register arg_1); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); ++ ++ // last Java Frame (fills frame anchor) ++ void set_last_Java_frame(Register thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc); ++ ++ // thread in the default location (S6) ++ void set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc); ++ ++ void reset_last_Java_frame(Register thread, bool clear_fp); ++ ++ // thread in the default location (S6) ++ void reset_last_Java_frame(bool clear_fp); ++ ++ // jobjects ++ void clear_jweak_tag(Register possibly_jweak); ++ void resolve_jobject(Register value, Register thread, Register tmp); ++ ++ // C 'boolean' to Java boolean: x == 0 ? 0 : 1 ++ void c2bool(Register x); ++ ++ void resolve_oop_handle(Register result, Register tmp); ++ void load_mirror(Register dst, Register method, Register tmp); ++ ++ // oop manipulations ++ void load_klass(Register dst, Register src); ++ void store_klass(Register dst, Register src); ++ ++ void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, ++ Register tmp1, Register thread_tmp); ++ void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, ++ Register tmp1, Register tmp2); ++ ++ void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, ++ Register tmp2 = noreg, DecoratorSet decorators = 0); ++ ++ // Used for storing NULL. All other oop constants should be ++ // stored using routines that take a jobject. ++ void store_heap_oop_null(Address dst); ++ ++ void load_prototype_header(Register dst, Register src); ++ ++ void store_klass_gap(Register dst, Register src); ++ ++ void encode_heap_oop(Register r); ++ void encode_heap_oop(Register dst, Register src); ++ void decode_heap_oop(Register r); ++ void decode_heap_oop(Register dst, Register src); ++ void encode_heap_oop_not_null(Register r); ++ void decode_heap_oop_not_null(Register r); ++ void encode_heap_oop_not_null(Register dst, Register src); ++ void decode_heap_oop_not_null(Register dst, Register src); ++ ++ void encode_klass_not_null(Register r); ++ void decode_klass_not_null(Register r); ++ void encode_klass_not_null(Register dst, Register src); ++ void decode_klass_not_null(Register dst, Register src); ++ ++ // Returns the byte size of the instructions generated by decode_klass_not_null() ++ // when compressed klass pointers are being used. ++ static int instr_size_for_decode_klass_not_null(); ++ ++ // if heap base register is used - reinit it with the correct value ++ void reinit_heapbase(); ++ ++ DEBUG_ONLY(void verify_heapbase(const char* msg);) ++ ++ void set_narrow_klass(Register dst, Klass* k); ++ void set_narrow_oop(Register dst, jobject obj); ++ ++ ++ ++ ++ // Sign extension ++ void sign_extend_short(Register reg) { /*dsll32(reg, reg, 16); dsra32(reg, reg, 16);*/ seh(reg, reg); } ++ void sign_extend_byte(Register reg) { /*dsll32(reg, reg, 24); dsra32(reg, reg, 24);*/ seb(reg, reg); } ++ void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ ++ // allocation ++ void eden_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void tlab_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1 = noreg); ++ // interface method calling ++ void lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& no_such_interface, ++ bool return_method = true); ++ ++ // virtual method calling ++ void lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result); ++ ++ // Test sub_klass against super_klass, with fast and slow paths. ++ ++ // The fast path produces a tri-state answer: yes / no / maybe-slow. ++ // One of the three labels can be NULL, meaning take the fall-through. ++ // If super_check_offset is -1, the value is loaded up from super_klass. ++ // No registers are killed, except temp_reg. ++ void check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); ++ ++ // The rest of the type check; must be wired to a corresponding fast path. ++ // It does not repeat the fast path logic, so don't use it standalone. ++ // The temp_reg and temp2_reg can be noreg, if no temps are available. ++ // Updates the sub's secondary super cache as necessary. ++ // If set_cond_codes, condition codes will be Z on success, NZ on failure. ++ void check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes = false); ++ ++ // Simplified, combined version, good for typical uses. ++ // Falls through on failure. ++ void check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success); ++ ++ ++ // Debugging ++ ++ // only if +VerifyOops ++ void verify_oop(Register reg, const char* s = "broken oop"); ++ void verify_oop_addr(Address addr, const char * s = "broken oop addr"); ++ void verify_oop_subroutine(); ++ // TODO: verify method and klass metadata (compare against vptr?) ++ void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} ++ void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} ++ ++ #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) ++ #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) ++ ++ // only if +VerifyFPU ++ void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); ++ ++ // prints msg, dumps registers and stops execution ++ void stop(const char* msg); ++ ++ // prints msg and continues ++ void warn(const char* msg); ++ ++ static void debug(char* msg/*, RegistersForDebugging* regs*/); ++ static void debug64(char* msg, int64_t pc, int64_t regs[]); ++ ++ void print_reg(Register reg); ++ void print_reg(FloatRegister reg); ++ ++ void untested() { stop("untested"); } ++ ++ void unimplemented(const char* what = ""); ++ ++ void should_not_reach_here() { stop("should not reach here"); } ++ ++ void print_CPU_state(); ++ ++ // Stack overflow checking ++ void bang_stack_with_offset(int offset) { ++ // stack grows down, caller passes positive offset ++ assert(offset > 0, "must bang with negative offset"); ++ if (offset <= 32768) { ++ sw(A0, SP, -offset); ++ } else { ++ li(AT, offset); ++ dsubu(AT, SP, AT); ++ sw(A0, AT, 0); ++ } ++ } ++ ++ // Writes to stack successive pages until offset reached to check for ++ // stack overflow + shadow pages. Also, clobbers tmp ++ void bang_stack_size(Register size, Register tmp); ++ ++ // Check for reserved stack access in method being exited (for JIT) ++ void reserved_stack_check(); ++ ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset); ++ ++ // Support for serializing memory accesses between threads ++ void serialize_memory(Register thread, Register tmp); ++ ++ void safepoint_poll(Label& slow_path, Register thread_reg); ++ void safepoint_poll_acquire(Label& slow_path, Register thread_reg); ++ ++ //void verify_tlab(); ++ void verify_tlab(Register t1, Register t2); ++ ++ // Biased locking support ++ // lock_reg and obj_reg must be loaded up with the appropriate values. ++ // tmp_reg is optional. If it is supplied (i.e., != noreg) it will ++ // be killed; if not supplied, push/pop will be used internally to ++ // allocate a temporary (inefficient, avoid if possible). ++ // Optional slow case is for implementations (interpreter and C1) which branch to ++ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. ++ // Returns offset of first potentially-faulting instruction for null ++ // check info (currently consumed only by C1). If ++ // swap_reg_contains_mark is true then returns -1 as it is assumed ++ // the calling code has already passed any potential faults. ++ int biased_locking_enter(Register lock_reg, Register obj_reg, ++ Register swap_reg, Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, Label* slow_case = NULL, ++ BiasedLockingCounters* counters = NULL); ++ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); ++#ifdef COMPILER2 ++ void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr); ++ void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr); ++#endif ++ ++ ++ // Arithmetics ++ // Regular vs. d* versions ++ inline void addu_long(Register rd, Register rs, Register rt) { ++ daddu(rd, rs, rt); ++ } ++ inline void addu_long(Register rd, Register rs, long imm32_64) { ++ daddiu(rd, rs, imm32_64); ++ } ++ ++ void round_to(Register reg, int modulus) { ++ assert_different_registers(reg, AT); ++ increment(reg, modulus - 1); ++ move(AT, - modulus); ++ andr(reg, reg, AT); ++ } ++ ++ // the follow two might use AT register, be sure you have no meanful data in AT before you call them ++ void increment(Register reg, int imm); ++ void decrement(Register reg, int imm); ++ ++ void shl(Register reg, int sa) { dsll(reg, reg, sa); } ++ void shr(Register reg, int sa) { dsrl(reg, reg, sa); } ++ void sar(Register reg, int sa) { dsra(reg, reg, sa); } ++ ++ // Helper functions for statistics gathering. ++ void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2); ++ ++ // Calls ++ void call(address entry); ++ void call(address entry, relocInfo::relocType rtype); ++ void call(address entry, RelocationHolder& rh); ++ ++ address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL); ++ ++ // Emit the CompiledIC call idiom ++ void ic_call(address entry, jint method_index = 0); ++ ++ // Jumps ++ void jmp(address entry); ++ void jmp(address entry, relocInfo::relocType rtype); ++ void jmp_far(Label& L); // always long jumps ++ ++ /* branches may exceed 16-bit offset */ ++ void b_far(address entry); ++ void b_far(Label& L); ++ ++ void bne_far (Register rs, Register rt, address entry); ++ void bne_far (Register rs, Register rt, Label& L); ++ ++ void beq_far (Register rs, Register rt, address entry); ++ void beq_far (Register rs, Register rt, Label& L); ++ ++ // For C2 to support long branches ++ void beq_long (Register rs, Register rt, Label& L); ++ void bne_long (Register rs, Register rt, Label& L); ++ void bc1t_long (Label& L); ++ void bc1f_long (Label& L); ++ ++ void patchable_call(address target); ++ void general_call(address target); ++ ++ void patchable_jump(address target); ++ void general_jump(address target); ++ ++ static int insts_for_patchable_call(address target); ++ static int insts_for_general_call(address target); ++ ++ static int insts_for_patchable_jump(address target); ++ static int insts_for_general_jump(address target); ++ ++ // Floating ++ // Data ++ ++ // Load and store values by size and signed-ness ++ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); ++ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); ++ ++ // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs ++ inline void ld_ptr(Register rt, Address a) { ++ ld(rt, a); ++ } ++ ++ inline void ld_ptr(Register rt, Register base, int offset16) { ++ ld(rt, base, offset16); ++ } ++ ++ // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs ++ inline void st_ptr(Register rt, Address a) { ++ sd(rt, a); ++ } ++ ++ inline void st_ptr(Register rt, Register base, int offset16) { ++ sd(rt, base, offset16); ++ } ++ ++ void ld_ptr(Register rt, Register base, Register offset); ++ void st_ptr(Register rt, Register base, Register offset); ++ ++ // swap the two byte of the low 16-bit halfword ++ // this directive will use AT, be sure the high 16-bit of reg is zero ++ void hswap(Register reg); ++ void huswap(Register reg); ++ ++ // convert big endian integer to little endian integer ++ void swap(Register reg); ++ ++ // implement the x86 instruction semantic ++ // if c_reg == *dest then *dest <= x_reg ++ // else c_reg <= *dest ++ // the AT indicate if xchg occurred, 1 for xchged, else 0 ++ void cmpxchg(Address addr, Register oldval, Register newval, Register resflag, ++ bool retold, bool barrier); ++ void cmpxchg(Address addr, Register oldval, Register newval, Register tmp, ++ bool retold, bool barrier, Label& succ, Label* fail = NULL); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag, ++ bool sign, bool retold, bool barrier); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL); ++ void cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi); ++ ++ //pop & push ++ void extend_sign(Register rh, Register rl) { stop("extend_sign"); } ++ void neg(Register reg) { dsubu(reg, R0, reg); } ++ void push (Register reg) { daddiu(SP, SP, -8); sd (reg, SP, 0); } ++ void push (FloatRegister reg) { daddiu(SP, SP, -8); sdc1(reg, SP, 0); } ++ void pop (Register reg) { ld (reg, SP, 0); daddiu(SP, SP, 8); } ++ void pop (FloatRegister reg) { ldc1(reg, SP, 0); daddiu(SP, SP, 8); } ++ void pop () { daddiu(SP, SP, 8); } ++ void pop2 () { daddiu(SP, SP, 16); } ++ void push2(Register reg1, Register reg2); ++ void pop2 (Register reg1, Register reg2); ++ void dpush (Register reg) { daddiu(SP, SP, -8); sd (reg, SP, 0); } ++ void dpop (Register reg) { ld (reg, SP, 0); daddiu(SP, SP, 8); } ++ //we need 2 fun to save and resotre general register ++ void pushad(); ++ void popad(); ++ void pushad_except_v0(); ++ void popad_except_v0(); ++ ++ //move an 32-bit immediate to Register ++ void move(Register reg, int imm32) { li32(reg, imm32); } ++ void li (Register rd, long imm); ++ void li (Register rd, address addr) { li(rd, (long)addr); } ++ //replace move(Register reg, int imm) ++ void li32(Register rd, int imm32); // sign-extends to 64 bits on mips64 ++ void set64(Register d, jlong value); ++ static int insts_for_set64(jlong value); ++ ++ void patchable_set48(Register d, jlong value); ++ void patchable_set32(Register d, jlong value); ++ ++ void patchable_call32(Register d, jlong value); ++ ++ static int call_size(address target, bool far, bool patchable); ++ ++ static bool reachable_from_cache(address target); ++ static bool reachable_from_cache(); ++ ++ ++ void dli(Register rd, long imm) { li(rd, imm); } ++ void li64(Register rd, long imm); ++ void li48(Register rd, long imm); ++ ++ void move(Register rd, Register rs) { daddu(rd, rs, R0); } ++ void move_u32(Register rd, Register rs) { addu32(rd, rs, R0); } ++ void dmove(Register rd, Register rs) { daddu(rd, rs, R0); } ++ void mov_metadata(Register dst, Metadata* obj); ++ void mov_metadata(Address dst, Metadata* obj); ++ ++ void store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide); ++ void store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type); ++ void store_for_type(Register src_reg, Address addr, BasicType type = T_INT, bool wide = false); ++ void store_for_type(FloatRegister src_reg, Address addr, BasicType type = T_INT); ++ void load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide); ++ void load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type); ++ int load_for_type(Register dst_reg, Address addr, BasicType type = T_INT, bool wide = false); ++ int load_for_type(FloatRegister dst_reg, Address addr, BasicType type = T_INT); ++ ++#ifndef PRODUCT ++ static void pd_print_patched_instruction(address branch) { ++ jint stub_inst = *(jint*) branch; ++ print_instruction(stub_inst); ++ ::tty->print("%s", " (unresolved)"); ++ ++ } ++#endif ++ ++ //FIXME ++ void empty_FPU_stack(){/*need implemented*/}; ++ ++#ifdef COMPILER2 ++ // Compare strings. ++ void string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ int ae); ++ ++ // Compare char[] or byte[] arrays. ++ void arrays_equals(Register str1, Register str2, ++ Register cnt, Register tmp, Register result, ++ bool is_char); ++#endif ++ ++ // method handles (JSR 292) ++ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); ++ ++ // Conditional move ++ void cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp = EQ, ++ bool is_signed = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ ++#undef VIRTUAL ++ ++public: ++ ++// Memory Data Type ++#define INT_TYPE 0x100 ++#define FLOAT_TYPE 0x200 ++#define SIGNED_TYPE 0x10 ++#define UNSIGNED_TYPE 0x20 ++ ++ typedef enum { ++ LOAD_BYTE = INT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_CHAR = INT_TYPE | SIGNED_TYPE | 0x2, ++ LOAD_SHORT = INT_TYPE | SIGNED_TYPE | 0x3, ++ LOAD_INT = INT_TYPE | SIGNED_TYPE | 0x4, ++ LOAD_LONG = INT_TYPE | SIGNED_TYPE | 0x5, ++ STORE_BYTE = INT_TYPE | SIGNED_TYPE | 0x6, ++ STORE_CHAR = INT_TYPE | SIGNED_TYPE | 0x7, ++ STORE_SHORT = INT_TYPE | SIGNED_TYPE | 0x8, ++ STORE_INT = INT_TYPE | SIGNED_TYPE | 0x9, ++ STORE_LONG = INT_TYPE | SIGNED_TYPE | 0xa, ++ LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb, ++ ++ LOAD_U_BYTE = INT_TYPE | UNSIGNED_TYPE | 0x1, ++ LOAD_U_SHORT = INT_TYPE | UNSIGNED_TYPE | 0x2, ++ LOAD_U_INT = INT_TYPE | UNSIGNED_TYPE | 0x3, ++ ++ LOAD_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x2, ++ STORE_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x3, ++ STORE_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x4 ++ } CMLoadStoreDataType; ++ ++ void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) { ++ assert((type & INT_TYPE), "must be General reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++ void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) { ++ assert((type & FLOAT_TYPE), "must be Float reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++private: ++ ++ template ++ void loadstore_t(T reg, int base, int index, int scale, int disp, int type) { ++ if (index != 0) { ++ if (Assembler::is_simm16(disp)) { ++ if (UseLEXT1 && (type & SIGNED_TYPE) && Assembler::is_simm(disp, 8)) { ++ if (scale == 0) { ++ gs_loadstore(reg, as_Register(base), as_Register(index), disp, type); ++ } else { ++ dsll(AT, as_Register(index), scale); ++ gs_loadstore(reg, as_Register(base), AT, disp, type); ++ } ++ } else { ++ if (scale == 0) { ++ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ dsll(AT, as_Register(index), scale); ++ addu(AT, as_Register(base), AT); ++ } ++ loadstore(reg, AT, disp, type); ++ } ++ } else { ++ if (scale == 0) { ++ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ dsll(AT, as_Register(index), scale); ++ addu(AT, as_Register(base), AT); ++ } ++ move(RT9, disp); ++ if (UseLEXT1 && (type & SIGNED_TYPE)) { ++ gs_loadstore(reg, AT, RT9, 0, type); ++ } else { ++ addu(AT, AT, RT9); ++ loadstore(reg, AT, 0, type); ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ loadstore(reg, as_Register(base), disp, type); ++ } else { ++ move(RT9, disp); ++ if (UseLEXT1 && (type & SIGNED_TYPE)) { ++ gs_loadstore(reg, as_Register(base), RT9, 0, type); ++ } else { ++ addu(AT, as_Register(base), RT9); ++ loadstore(reg, AT, 0, type); ++ } ++ } ++ } ++ } ++ void loadstore(Register reg, Register base, int disp, int type); ++ void loadstore(FloatRegister reg, Register base, int disp, int type); ++ void gs_loadstore(Register reg, Register base, Register index, int disp, int type); ++ void gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type); ++}; ++ ++/** ++ * class SkipIfEqual: ++ * ++ * Instantiating this class will result in assembly code being output that will ++ * jump around any code emitted between the creation of the instance and it's ++ * automatic destruction at the end of a scope block, depending on the value of ++ * the flag passed to the constructor, which will be checked at run-time. ++ */ ++class SkipIfEqual { ++private: ++ MacroAssembler* _masm; ++ Label _label; ++ ++public: ++ inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) ++ : _masm(masm) { ++ _masm->li(AT, (address)flag_addr); ++ _masm->lb(AT, AT, 0); ++ if (value) { ++ _masm->bne(AT, R0, _label); ++ } else { ++ _masm->beq(AT, R0, _label); ++ } ++ _masm->delayed()->nop(); ++ } ++ ++ ~SkipIfEqual(); ++}; ++ ++#ifdef ASSERT ++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } ++#endif ++ ++ ++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp +--- a/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/methodHandles_mips.cpp b/src/hotspot/cpu/mips/methodHandles_mips.cpp +--- a/src/hotspot/cpu/mips/methodHandles_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/methodHandles_mips.cpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,576 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "classfile/javaClasses.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "utilities/preserveException.hpp" ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) // nothing ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#define STOP(error) block_comment(error); __ stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { ++ if (VerifyMethodHandles) ++ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), ++ "MH argument is a Class"); ++ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); ++} ++ ++#ifdef ASSERT ++static int check_nonzero(const char* xname, int x) { ++ assert(x != 0, "%s should be nonzero", xname); ++ return x; ++} ++#define NONZERO(x) check_nonzero(#x, x) ++#else //ASSERT ++#define NONZERO(x) (x) ++#endif //ASSERT ++ ++#ifdef ASSERT ++void MethodHandles::verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message) { ++} ++ ++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { ++ Label L; ++ BLOCK_COMMENT("verify_ref_kind {"); ++ __ lw(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes()))); ++ __ sra(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); ++ __ move(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); ++ __ andr(temp, temp, AT); ++ __ move(AT, ref_kind); ++ __ beq(temp, AT, L); ++ __ delayed()->nop(); ++ { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); ++ jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); ++ if (ref_kind == JVM_REF_invokeVirtual || ++ ref_kind == JVM_REF_invokeSpecial) ++ // could do this for all ref_kinds, but would explode assembly code size ++ trace_method_handle(_masm, buf); ++ __ STOP(buf); ++ } ++ BLOCK_COMMENT("} verify_ref_kind"); ++ __ bind(L); ++} ++ ++#endif //ASSERT ++ ++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry) { ++ assert(method == Rmethod, "interpreter calling convention"); ++ ++ Label L_no_such_method; ++ __ beq(method, R0, L_no_such_method); ++ __ delayed()->nop(); ++ ++ __ verify_method_ptr(method); ++ ++ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++ Register rthread = TREG; ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ __ lbu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset())); ++ __ beq(AT, R0, run_compiled_code); ++ __ delayed()->nop(); ++ __ ld(T9, method, in_bytes(Method::interpreter_entry_offset())); ++ __ jr(T9); ++ __ delayed()->nop(); ++ __ BIND(run_compiled_code); ++ } ++ ++ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : ++ Method::from_interpreted_offset(); ++ __ ld(T9, method, in_bytes(entry_offset)); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ __ bind(L_no_such_method); ++ address wrong_method = StubRoutines::throw_AbstractMethodError_entry(); ++ __ jmp(wrong_method, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++} ++ ++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry) { ++ BLOCK_COMMENT("jump_to_lambda_form {"); ++ // This is the initial entry point of a lazy method handle. ++ // After type checking, it picks up the invoker from the LambdaForm. ++ assert_different_registers(recv, method_temp, temp2); ++ assert(recv != noreg, "required register"); ++ assert(method_temp == Rmethod, "required register for loading method"); ++ ++ //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); ++ ++ // Load the invoker, as MH -> MH.form -> LF.vmentry ++ __ verify_oop(recv); ++ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()))); ++ __ verify_oop(method_temp); ++ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg); ++ ++ if (VerifyMethodHandles && !for_compiler_entry) { ++ // make sure recv is already on stack ++ __ ld(temp2, Address(method_temp, Method::const_offset())); ++ __ load_sized_value(temp2, ++ Address(temp2, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ Label L; ++ Address recv_addr = __ argument_address(temp2, -1); ++ __ ld(AT, recv_addr); ++ __ beq(recv, AT, L); ++ __ delayed()->nop(); ++ ++ recv_addr = __ argument_address(temp2, -1); ++ __ ld(V0, recv_addr); ++ __ STOP("receiver not on stack"); ++ __ BIND(L); ++ } ++ ++ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); ++ BLOCK_COMMENT("} jump_to_lambda_form"); ++} ++ ++ ++// Code generation ++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, ++ vmIntrinsics::ID iid) { ++ const bool not_for_compiler_entry = false; // this is the interpreter entry ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ if (iid == vmIntrinsics::_invokeGeneric || ++ iid == vmIntrinsics::_compiledLambdaForm) { ++ // Perhaps surprisingly, the symbolic references visible to Java are not directly used. ++ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. ++ // They all allow an appendix argument. ++ __ stop("empty stubs make SG sick"); ++ return NULL; ++ } ++ ++ // Rmethod: Method* ++ // T9: argument locator (parameter slot count, added to sp) ++ // S7: used as temp to hold mh or receiver ++ Register t9_argp = T9; // argument list ptr, live on error paths ++ Register s7_mh = S7; // MH receiver; dies quickly and is recycled ++ Register rm_method = Rmethod; // eventual target of this invocation ++ ++ // here's where control starts out: ++ __ align(CodeEntryAlignment); ++ address entry_point = __ pc(); ++ ++ if (VerifyMethodHandles) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ Label L; ++ BLOCK_COMMENT("verify_intrinsic_id {"); ++ __ lhu(AT, rm_method, Method::intrinsic_id_offset_in_bytes()); ++ guarantee(Assembler::is_simm16(iid), "Oops, iid is not simm16! Change the instructions."); ++ __ addiu(AT, AT, -1 * (int) iid); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ if (iid == vmIntrinsics::_linkToVirtual || ++ iid == vmIntrinsics::_linkToSpecial) { ++ // could do this for all kinds, but would explode assembly code size ++ trace_method_handle(_masm, "bad Method*::intrinsic_id"); ++ } ++ __ STOP("bad Method*::intrinsic_id"); ++ __ bind(L); ++ BLOCK_COMMENT("} verify_intrinsic_id"); ++ } ++ ++ // First task: Find out how big the argument list is. ++ Address t9_first_arg_addr; ++ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); ++ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); ++ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ __ ld(t9_argp, Address(rm_method, Method::const_offset())); ++ __ load_sized_value(t9_argp, ++ Address(t9_argp, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ t9_first_arg_addr = __ argument_address(t9_argp, -1); ++ } else { ++ DEBUG_ONLY(t9_argp = noreg); ++ } ++ ++ if (!is_signature_polymorphic_static(iid)) { ++ __ ld(s7_mh, t9_first_arg_addr); ++ DEBUG_ONLY(t9_argp = noreg); ++ } ++ ++ // t9_first_arg_addr is live! ++ ++ trace_method_handle_interpreter_entry(_masm, iid); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry); ++ ++ } else { ++ // Adjust argument list by popping the trailing MemberName argument. ++ Register r_recv = noreg; ++ if (MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. ++ __ ld(r_recv = T2, t9_first_arg_addr); ++ } ++ DEBUG_ONLY(t9_argp = noreg); ++ Register rm_member = rm_method; // MemberName ptr; incoming method ptr is dead now ++ __ pop(rm_member); // extract last argument ++ generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry); ++ } ++ ++ return entry_point; ++} ++ ++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, ++ vmIntrinsics::ID iid, ++ Register receiver_reg, ++ Register member_reg, ++ bool for_compiler_entry) { ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ Register rm_method = Rmethod; // eventual target of this invocation ++ // temps used in this code are not used in *either* compiled or interpreted calling sequences ++ Register j_rarg0 = T0; ++ Register j_rarg1 = A0; ++ Register j_rarg2 = A1; ++ Register j_rarg3 = A2; ++ Register j_rarg4 = A3; ++ Register j_rarg5 = A4; ++ ++ Register temp1 = T8; ++ Register temp2 = T9; ++ Register temp3 = V0; ++ if (for_compiler_entry) { ++ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); ++ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ } ++ else { ++ assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP ++ } ++ assert_different_registers(temp1, temp2, temp3, receiver_reg); ++ assert_different_registers(temp1, temp2, temp3, member_reg); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ // indirect through MH.form.vmentry.vmtarget ++ jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry); ++ ++ } else { ++ // The method is a member invoker used by direct method handles. ++ if (VerifyMethodHandles) { ++ // make sure the trailing argument really is a MemberName (caller responsibility) ++ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), ++ "MemberName required for invokeVirtual etc."); ++ } ++ ++ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); ++ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); ++ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())); ++ Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())); ++ ++ Register temp1_recv_klass = temp1; ++ if (iid != vmIntrinsics::_linkToStatic) { ++ __ verify_oop(receiver_reg); ++ if (iid == vmIntrinsics::_linkToSpecial) { ++ // Don't actually load the klass; just null-check the receiver. ++ __ null_check(receiver_reg); ++ } else { ++ // load receiver klass itself ++ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ BLOCK_COMMENT("check_receiver {"); ++ // The receiver for the MemberName must be in receiver_reg. ++ // Check the receiver against the MemberName.clazz ++ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { ++ // Did not load it above... ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { ++ Label L_ok; ++ Register temp2_defc = temp2; ++ __ load_heap_oop(temp2_defc, member_clazz, temp3); ++ load_klass_from_Class(_masm, temp2_defc); ++ __ verify_klass_ptr(temp2_defc); ++ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); ++ // If we get here, the type check failed! ++ __ STOP("receiver class disagrees with MemberName.clazz"); ++ __ bind(L_ok); ++ } ++ BLOCK_COMMENT("} check_receiver"); ++ } ++ if (iid == vmIntrinsics::_linkToSpecial || ++ iid == vmIntrinsics::_linkToStatic) { ++ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass ++ } ++ ++ // Live registers at this point: ++ // member_reg - MemberName that was the trailing argument ++ // temp1_recv_klass - klass of stacked receiver, if needed ++ ++ Label L_incompatible_class_change_error; ++ switch (iid) { ++ case vmIntrinsics::_linkToSpecial: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); ++ } ++ __ load_heap_oop(rm_method, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToStatic: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); ++ } ++ __ load_heap_oop(rm_method, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToVirtual: ++ { ++ // same as TemplateTable::invokevirtual, ++ // minus the CP setup and profiling: ++ ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); ++ } ++ ++ // pick out the vtable index from the MemberName, and then we can discard it: ++ Register temp2_index = temp2; ++ __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L_index_ok; ++ __ slt(AT, R0, temp2_index); ++ __ bne(AT, R0, L_index_ok); ++ __ delayed()->nop(); ++ __ STOP("no virtual index"); ++ __ BIND(L_index_ok); ++ } ++ ++ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget ++ // at this point. And VerifyMethodHandles has already checked clazz, if needed. ++ ++ // get target Method* & entry point ++ __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method); ++ break; ++ } ++ ++ case vmIntrinsics::_linkToInterface: ++ { ++ // same as TemplateTable::invokeinterface ++ // (minus the CP setup and profiling, with different argument motion) ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); ++ } ++ ++ Register temp3_intf = temp3; ++ __ load_heap_oop(temp3_intf, member_clazz); ++ load_klass_from_Class(_masm, temp3_intf); ++ __ verify_klass_ptr(temp3_intf); ++ ++ Register rm_index = rm_method; ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L; ++ __ slt(AT, rm_index, R0); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ STOP("invalid vtable index for MH.invokeInterface"); ++ __ bind(L); ++ } ++ ++ // given intf, index, and recv klass, dispatch to the implementation method ++ __ lookup_interface_method(temp1_recv_klass, temp3_intf, ++ // note: next two args must be the same: ++ rm_index, rm_method, ++ temp2, ++ L_incompatible_class_change_error); ++ break; ++ } ++ ++ default: ++ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); ++ break; ++ } ++ ++ // Live at this point: ++ // rm_method ++ ++ // After figuring out which concrete method to call, jump into it. ++ // Note that this works in the interpreter with no data motion. ++ // But the compiled version will require that r_recv be shifted out. ++ __ verify_method_ptr(rm_method); ++ jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry); ++ ++ if (iid == vmIntrinsics::_linkToInterface) { ++ __ bind(L_incompatible_class_change_error); ++ address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry(); ++ __ jmp(icce_entry, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ } ++} ++ ++#ifndef PRODUCT ++void trace_method_handle_stub(const char* adaptername, ++ oop mh, ++ intptr_t* saved_regs, ++ intptr_t* entry_sp) { ++ // called as a leaf from native code: do not block the JVM! ++ bool has_mh = (strstr(adaptername, "/static") == NULL && ++ strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH ++ const char* mh_reg_name = has_mh ? "s7_mh" : "s7"; ++ tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT, ++ adaptername, mh_reg_name, ++ p2i(mh), p2i(entry_sp)); ++ ++ if (Verbose) { ++ tty->print_cr("Registers:"); ++ const int saved_regs_count = RegisterImpl::number_of_registers; ++ for (int i = 0; i < saved_regs_count; i++) { ++ Register r = as_Register(i); ++ // The registers are stored in reverse order on the stack (by pusha). ++ tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); ++ if ((i + 1) % 4 == 0) { ++ tty->cr(); ++ } else { ++ tty->print(", "); ++ } ++ } ++ tty->cr(); ++ ++ { ++ // dumping last frame with frame::describe ++ ++ JavaThread* p = JavaThread::active(); ++ ++ ResourceMark rm; ++ PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here ++ FrameValues values; ++ ++ // Note: We want to allow trace_method_handle from any call site. ++ // While trace_method_handle creates a frame, it may be entered ++ // without a PC on the stack top (e.g. not just after a call). ++ // Walking that frame could lead to failures due to that invalid PC. ++ // => carefully detect that frame when doing the stack walking ++ ++ // Current C frame ++ frame cur_frame = os::current_frame(); ++ ++ // Robust search of trace_calling_frame (independant of inlining). ++ // Assumes saved_regs comes from a pusha in the trace_calling_frame. ++ assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?"); ++ frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame); ++ while (trace_calling_frame.fp() < saved_regs) { ++ trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame); ++ } ++ ++ // safely create a frame and call frame::describe ++ intptr_t *dump_sp = trace_calling_frame.sender_sp(); ++ intptr_t *dump_fp = trace_calling_frame.link(); ++ ++ bool walkable = has_mh; // whether the traced frame shoud be walkable ++ ++ if (walkable) { ++ // The previous definition of walkable may have to be refined ++ // if new call sites cause the next frame constructor to start ++ // failing. Alternatively, frame constructors could be ++ // modified to support the current or future non walkable ++ // frames (but this is more intrusive and is not considered as ++ // part of this RFE, which will instead use a simpler output). ++ frame dump_frame = frame(dump_sp, dump_fp); ++ dump_frame.describe(values, 1); ++ } else { ++ // Stack may not be walkable (invalid PC above FP): ++ // Add descriptions without building a Java frame to avoid issues ++ values.describe(-1, dump_fp, "fp for #1 "); ++ values.describe(-1, dump_sp, "sp for #1"); ++ } ++ values.describe(-1, entry_sp, "raw top of stack"); ++ ++ tty->print_cr("Stack layout:"); ++ values.print(p); ++ } ++ if (has_mh && oopDesc::is_oop(mh)) { ++ mh->print(); ++ if (java_lang_invoke_MethodHandle::is_instance(mh)) { ++ if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) ++ java_lang_invoke_MethodHandle::form(mh)->print(); ++ } ++ } ++ } ++} ++ ++// The stub wraps the arguments in a struct on the stack to avoid ++// dealing with the different calling conventions for passing 6 ++// arguments. ++struct MethodHandleStubArguments { ++ const char* adaptername; ++ oopDesc* mh; ++ intptr_t* saved_regs; ++ intptr_t* entry_sp; ++}; ++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { ++ trace_method_handle_stub(args->adaptername, ++ args->mh, ++ args->saved_regs, ++ args->entry_sp); ++} ++ ++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { ++} ++#endif //PRODUCT +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/methodHandles_mips.hpp b/src/hotspot/cpu/mips/methodHandles_mips.hpp +--- a/src/hotspot/cpu/mips/methodHandles_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/methodHandles_mips.hpp 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// Platform-specific definitions for method handles. ++// These definitions are inlined into class MethodHandles. ++ ++// Adapters ++enum /* platform_dependent_constants */ { ++ adapter_code_size = 32000 DEBUG_ONLY(+ 150000) ++}; ++ ++// Additional helper methods for MethodHandles code generation: ++public: ++ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); ++ ++ static void verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message = "wrong klass") NOT_DEBUG_RETURN; ++ ++ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { ++ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), ++ "reference is a MH"); ++ } ++ ++ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; ++ ++ // Similar to InterpreterMacroAssembler::jump_from_interpreted. ++ // Takes care of special dispatch from single stepping too. ++ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry); ++ ++ static void jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry); ++ ++ static Register saved_last_sp_register() { ++ // Should be in sharedRuntime, not here. ++ return I29; ++ } +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/mips_64.ad b/src/hotspot/cpu/mips/mips_64.ad +--- a/src/hotspot/cpu/mips/mips_64.ad 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/mips_64.ad 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,12243 @@ ++// ++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// GodSon3 Architecture Description File ++ ++//----------REGISTER DEFINITION BLOCK------------------------------------------ ++// This information is used by the matcher and the register allocator to ++// describe individual registers and classes of registers within the target ++// archtecture. ++ ++// format: ++// reg_def name (call convention, c-call convention, ideal type, encoding); ++// call convention : ++// NS = No-Save ++// SOC = Save-On-Call ++// SOE = Save-On-Entry ++// AS = Always-Save ++// ideal type : ++// see opto/opcodes.hpp for more info ++// reg_class name (reg, ...); ++// alloc_class name (reg, ...); ++register %{ ++ ++// General Registers ++// Integer Registers ++ reg_def R0 ( NS, NS, Op_RegI, 0, VMRegImpl::Bad()); ++ reg_def AT ( NS, NS, Op_RegI, 1, AT->as_VMReg()); ++ reg_def AT_H ( NS, NS, Op_RegI, 1, AT->as_VMReg()->next()); ++ reg_def V0 (SOC, SOC, Op_RegI, 2, V0->as_VMReg()); ++ reg_def V0_H (SOC, SOC, Op_RegI, 2, V0->as_VMReg()->next()); ++ reg_def V1 (SOC, SOC, Op_RegI, 3, V1->as_VMReg()); ++ reg_def V1_H (SOC, SOC, Op_RegI, 3, V1->as_VMReg()->next()); ++ reg_def A0 (SOC, SOC, Op_RegI, 4, A0->as_VMReg()); ++ reg_def A0_H (SOC, SOC, Op_RegI, 4, A0->as_VMReg()->next()); ++ reg_def A1 (SOC, SOC, Op_RegI, 5, A1->as_VMReg()); ++ reg_def A1_H (SOC, SOC, Op_RegI, 5, A1->as_VMReg()->next()); ++ reg_def A2 (SOC, SOC, Op_RegI, 6, A2->as_VMReg()); ++ reg_def A2_H (SOC, SOC, Op_RegI, 6, A2->as_VMReg()->next()); ++ reg_def A3 (SOC, SOC, Op_RegI, 7, A3->as_VMReg()); ++ reg_def A3_H (SOC, SOC, Op_RegI, 7, A3->as_VMReg()->next()); ++ reg_def A4 (SOC, SOC, Op_RegI, 8, A4->as_VMReg()); ++ reg_def A4_H (SOC, SOC, Op_RegI, 8, A4->as_VMReg()->next()); ++ reg_def A5 (SOC, SOC, Op_RegI, 9, A5->as_VMReg()); ++ reg_def A5_H (SOC, SOC, Op_RegI, 9, A5->as_VMReg()->next()); ++ reg_def A6 (SOC, SOC, Op_RegI, 10, A6->as_VMReg()); ++ reg_def A6_H (SOC, SOC, Op_RegI, 10, A6->as_VMReg()->next()); ++ reg_def A7 (SOC, SOC, Op_RegI, 11, A7->as_VMReg()); ++ reg_def A7_H (SOC, SOC, Op_RegI, 11, A7->as_VMReg()->next()); ++ reg_def T0 (SOC, SOC, Op_RegI, 12, T0->as_VMReg()); ++ reg_def T0_H (SOC, SOC, Op_RegI, 12, T0->as_VMReg()->next()); ++ reg_def T1 (SOC, SOC, Op_RegI, 13, T1->as_VMReg()); ++ reg_def T1_H (SOC, SOC, Op_RegI, 13, T1->as_VMReg()->next()); ++ reg_def T2 (SOC, SOC, Op_RegI, 14, T2->as_VMReg()); ++ reg_def T2_H (SOC, SOC, Op_RegI, 14, T2->as_VMReg()->next()); ++ reg_def T3 (SOC, SOC, Op_RegI, 15, T3->as_VMReg()); ++ reg_def T3_H (SOC, SOC, Op_RegI, 15, T3->as_VMReg()->next()); ++ reg_def S0 (SOC, SOE, Op_RegI, 16, S0->as_VMReg()); ++ reg_def S0_H (SOC, SOE, Op_RegI, 16, S0->as_VMReg()->next()); ++ reg_def S1 (SOC, SOE, Op_RegI, 17, S1->as_VMReg()); ++ reg_def S1_H (SOC, SOE, Op_RegI, 17, S1->as_VMReg()->next()); ++ reg_def S2 (SOC, SOE, Op_RegI, 18, S2->as_VMReg()); ++ reg_def S2_H (SOC, SOE, Op_RegI, 18, S2->as_VMReg()->next()); ++ reg_def S3 (SOC, SOE, Op_RegI, 19, S3->as_VMReg()); ++ reg_def S3_H (SOC, SOE, Op_RegI, 19, S3->as_VMReg()->next()); ++ reg_def S4 (SOC, SOE, Op_RegI, 20, S4->as_VMReg()); ++ reg_def S4_H (SOC, SOE, Op_RegI, 20, S4->as_VMReg()->next()); ++ reg_def S5 (SOC, SOE, Op_RegI, 21, S5->as_VMReg()); ++ reg_def S5_H (SOC, SOE, Op_RegI, 21, S5->as_VMReg()->next()); ++ reg_def S6 (SOC, SOE, Op_RegI, 22, S6->as_VMReg()); ++ reg_def S6_H (SOC, SOE, Op_RegI, 22, S6->as_VMReg()->next()); ++ reg_def S7 (SOC, SOE, Op_RegI, 23, S7->as_VMReg()); ++ reg_def S7_H (SOC, SOE, Op_RegI, 23, S7->as_VMReg()->next()); ++ reg_def T8 (SOC, SOC, Op_RegI, 24, T8->as_VMReg()); ++ reg_def T8_H (SOC, SOC, Op_RegI, 24, T8->as_VMReg()->next()); ++ reg_def T9 (SOC, SOC, Op_RegI, 25, T9->as_VMReg()); ++ reg_def T9_H (SOC, SOC, Op_RegI, 25, T9->as_VMReg()->next()); ++ ++// Special Registers ++ reg_def K0 ( NS, NS, Op_RegI, 26, K0->as_VMReg()); ++ reg_def K1 ( NS, NS, Op_RegI, 27, K1->as_VMReg()); ++ reg_def GP ( NS, NS, Op_RegI, 28, GP->as_VMReg()); ++ reg_def GP_H ( NS, NS, Op_RegI, 28, GP->as_VMReg()->next()); ++ reg_def SP ( NS, NS, Op_RegI, 29, SP->as_VMReg()); ++ reg_def SP_H ( NS, NS, Op_RegI, 29, SP->as_VMReg()->next()); ++ reg_def FP ( NS, NS, Op_RegI, 30, FP->as_VMReg()); ++ reg_def FP_H ( NS, NS, Op_RegI, 30, FP->as_VMReg()->next()); ++ reg_def RA ( NS, NS, Op_RegI, 31, RA->as_VMReg()); ++ reg_def RA_H ( NS, NS, Op_RegI, 31, RA->as_VMReg()->next()); ++ ++// Floating registers. ++reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()); ++reg_def F0_H ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next()); ++reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()); ++reg_def F1_H ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next()); ++reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()); ++reg_def F2_H ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next()); ++reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()); ++reg_def F3_H ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next()); ++reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()); ++reg_def F4_H ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next()); ++reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()); ++reg_def F5_H ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next()); ++reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()); ++reg_def F6_H ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next()); ++reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()); ++reg_def F7_H ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next()); ++reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()); ++reg_def F8_H ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next()); ++reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()); ++reg_def F9_H ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next()); ++reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()); ++reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next()); ++reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()); ++reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next()); ++reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()); ++reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next()); ++reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()); ++reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next()); ++reg_def F14 ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()); ++reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next()); ++reg_def F15 ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()); ++reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next()); ++reg_def F16 ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()); ++reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next()); ++reg_def F17 ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()); ++reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next()); ++reg_def F18 ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()); ++reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next()); ++reg_def F19 ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()); ++reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next()); ++reg_def F20 ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()); ++reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next()); ++reg_def F21 ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()); ++reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next()); ++reg_def F22 ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()); ++reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next()); ++reg_def F23 ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()); ++reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next()); ++reg_def F24 ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()); ++reg_def F24_H ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next()); ++reg_def F25 ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()); ++reg_def F25_H ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next()); ++reg_def F26 ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()); ++reg_def F26_H ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next()); ++reg_def F27 ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()); ++reg_def F27_H ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next()); ++reg_def F28 ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()); ++reg_def F28_H ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next()); ++reg_def F29 ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()); ++reg_def F29_H ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next()); ++reg_def F30 ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()); ++reg_def F30_H ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next()); ++reg_def F31 ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()); ++reg_def F31_H ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next()); ++ ++ ++// ---------------------------- ++// Special Registers ++//S6 is used for get_thread(S6) ++//S5 is uesd for heapbase of compressed oop ++alloc_class chunk0( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S5, S5_H, ++ S6, S6_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T8, T8_H, ++ T9, T9_H, ++ T1, T1_H, // inline_cache_reg ++ V1, V1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ V0, V0_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H, ++ GP, GP_H ++ RA, RA_H, ++ SP, SP_H, // stack_pointer ++ FP, FP_H // frame_pointer ++ ); ++ ++alloc_class chunk1( F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F19, F19_H, ++ F18, F18_H, ++ F17, F17_H, ++ F16, F16_H, ++ F15, F15_H, ++ F14, F14_H, ++ F13, F13_H, ++ F12, F12_H, ++ F29, F29_H, ++ F30, F30_H, ++ F31, F31_H); ++ ++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 ); ++reg_class s0_reg( S0 ); ++reg_class s1_reg( S1 ); ++reg_class s2_reg( S2 ); ++reg_class s3_reg( S3 ); ++reg_class s4_reg( S4 ); ++reg_class s5_reg( S5 ); ++reg_class s6_reg( S6 ); ++reg_class s7_reg( S7 ); ++ ++reg_class t_reg( T0, T1, T2, T3, T8, T9 ); ++reg_class t0_reg( T0 ); ++reg_class t1_reg( T1 ); ++reg_class t2_reg( T2 ); ++reg_class t3_reg( T3 ); ++reg_class t8_reg( T8 ); ++reg_class t9_reg( T9 ); ++ ++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 ); ++reg_class a0_reg( A0 ); ++reg_class a1_reg( A1 ); ++reg_class a2_reg( A2 ); ++reg_class a3_reg( A3 ); ++reg_class a4_reg( A4 ); ++reg_class a5_reg( A5 ); ++reg_class a6_reg( A6 ); ++reg_class a7_reg( A7 ); ++ ++reg_class v0_reg( V0 ); ++reg_class v1_reg( V1 ); ++ ++reg_class sp_reg( SP, SP_H ); ++reg_class fp_reg( FP, FP_H ); ++ ++reg_class v0_long_reg( V0, V0_H ); ++reg_class v1_long_reg( V1, V1_H ); ++reg_class a0_long_reg( A0, A0_H ); ++reg_class a1_long_reg( A1, A1_H ); ++reg_class a2_long_reg( A2, A2_H ); ++reg_class a3_long_reg( A3, A3_H ); ++reg_class a4_long_reg( A4, A4_H ); ++reg_class a5_long_reg( A5, A5_H ); ++reg_class a6_long_reg( A6, A6_H ); ++reg_class a7_long_reg( A7, A7_H ); ++reg_class t0_long_reg( T0, T0_H ); ++reg_class t1_long_reg( T1, T1_H ); ++reg_class t2_long_reg( T2, T2_H ); ++reg_class t3_long_reg( T3, T3_H ); ++reg_class t8_long_reg( T8, T8_H ); ++reg_class t9_long_reg( T9, T9_H ); ++reg_class s0_long_reg( S0, S0_H ); ++reg_class s1_long_reg( S1, S1_H ); ++reg_class s2_long_reg( S2, S2_H ); ++reg_class s3_long_reg( S3, S3_H ); ++reg_class s4_long_reg( S4, S4_H ); ++reg_class s5_long_reg( S5, S5_H ); ++reg_class s6_long_reg( S6, S6_H ); ++reg_class s7_long_reg( S7, S7_H ); ++ ++reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, A7, A6, A5, A4, V0, A3, A2, A1, A0, T0 ); ++ ++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, V0, T0 ); ++ ++reg_class p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T8, T8_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++reg_class no_T8_p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++reg_class long_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T8, T8_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++ ++// Floating point registers. ++// F31 are not used as temporary registers in D2I ++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F31); ++reg_class dbl_reg( F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F12, F12_H, ++ F13, F13_H, ++ F14, F14_H, ++ F15, F15_H, ++ F16, F16_H, ++ F17, F17_H, ++ F18, F18_H, ++ F19, F19_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F29, F29_H, ++ F31, F31_H); ++ ++reg_class flt_arg0( F12 ); ++reg_class dbl_arg0( F12, F12_H ); ++reg_class dbl_arg1( F14, F14_H ); ++ ++%} ++ ++//----------DEFINITION BLOCK--------------------------------------------------- ++// Define name --> value mappings to inform the ADLC of an integer valued name ++// Current support includes integer values in the range [0, 0x7FFFFFFF] ++// Format: ++// int_def ( , ); ++// Generated Code in ad_.hpp ++// #define () ++// // value == ++// Generated code in ad_.cpp adlc_verification() ++// assert( == , "Expect () to equal "); ++// ++definitions %{ ++ int_def DEFAULT_COST ( 100, 100); ++ int_def HUGE_COST (1000000, 1000000); ++ ++ // Memory refs are twice as expensive as run-of-the-mill. ++ int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); ++ ++ // Branches are even more expensive. ++ int_def BRANCH_COST ( 300, DEFAULT_COST * 3); ++ // we use jr instruction to construct call, so more expensive ++ int_def CALL_COST ( 500, DEFAULT_COST * 5); ++/* ++ int_def EQUAL ( 1, 1 ); ++ int_def NOT_EQUAL ( 2, 2 ); ++ int_def GREATER ( 3, 3 ); ++ int_def GREATER_EQUAL ( 4, 4 ); ++ int_def LESS ( 5, 5 ); ++ int_def LESS_EQUAL ( 6, 6 ); ++*/ ++%} ++ ++ ++ ++//----------SOURCE BLOCK------------------------------------------------------- ++// This is a block of C++ code which provides values, functions, and ++// definitions necessary in the rest of the architecture description ++ ++source_hpp %{ ++// Header information of the source block. ++// Method declarations/definitions which are used outside ++// the ad-scope can conveniently be defined here. ++// ++// To keep related declarations/definitions/uses close together, ++// we switch between source %{ }% and source_hpp %{ }% freely as needed. ++ ++class CallStubImpl { ++ ++ //-------------------------------------------------------------- ++ //---< Used for optimization in Compile::shorten_branches >--- ++ //-------------------------------------------------------------- ++ ++ public: ++ // Size of call trampoline stub. ++ static uint size_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++ ++ // number of relocations needed by a call trampoline stub ++ static uint reloc_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++}; ++ ++class HandlerImpl { ++ ++ public: ++ ++ static int emit_exception_handler(CodeBuffer &cbuf); ++ static int emit_deopt_handler(CodeBuffer& cbuf); ++ ++ static uint size_exception_handler() { ++ // NativeCall instruction size is the same as NativeJump. ++ // exception handler starts out as jump and can be patched to ++ // a call be deoptimization. (4932387) ++ // Note that this value is also credited (in output.cpp) to ++ // the size of the code section. ++ int size = NativeCall::instruction_size; ++ const uintx m = 16 - 1; ++ return mask_bits(size + m, ~m); ++ //return round_to(size, 16); ++ } ++ ++ static uint size_deopt_handler() { ++ int size = NativeCall::instruction_size; ++ const uintx m = 16 - 1; ++ return mask_bits(size + m, ~m); ++ //return round_to(size, 16); ++ } ++}; ++ ++%} // end source_hpp ++ ++source %{ ++ ++#define NO_INDEX 0 ++#define RELOC_IMM64 Assembler::imm_operand ++#define RELOC_DISP32 Assembler::disp32_operand ++ ++ ++#define __ _masm. ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++ ++// Emit exception handler code. ++// Stuff framesize into a register and call a VM stub routine. ++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_exception_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_exception_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point()); ++ __ align(16); ++ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++// Emit deopt handler code. ++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_deopt_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_deopt_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call(SharedRuntime::deopt_blob()->unpack()); ++ __ align(16); ++ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++ ++const bool Matcher::match_rule_supported(int opcode) { ++ if (!has_match_rule(opcode)) ++ return false; ++ ++ switch (opcode) { ++ //Op_CountLeadingZerosI Op_CountLeadingZerosL can be deleted, all MIPS CPUs support clz & dclz. ++ case Op_CountLeadingZerosI: ++ case Op_CountLeadingZerosL: ++ if (!UseCountLeadingZerosInstructionMIPS64) ++ return false; ++ break; ++ case Op_CountTrailingZerosI: ++ case Op_CountTrailingZerosL: ++ if (!UseCountTrailingZerosInstructionMIPS64) ++ return false; ++ break; ++ } ++ ++ return true; // Per default match rules are supported. ++} ++ ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++ // TODO ++ // identify extra cases that we might want to provide match rules for ++ // e.g. Op_ vector nodes and other intrinsics while guarding with vlen ++ bool ret_value = match_rule_supported(opcode); ++ // Add rules here. ++ ++ return ret_value; // Per default match rules are supported. ++} ++ ++const bool Matcher::has_predicated_vectors(void) { ++ return false; ++} ++ ++const int Matcher::float_pressure(int default_pressure_threshold) { ++ Unimplemented(); ++ return default_pressure_threshold; ++} ++ ++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { ++ int offs = offset - br_size + 4; ++ // To be conservative on MIPS ++ // branch node should be end with: ++ // branch inst ++ // delay slot ++ const int safety_zone = 3 * BytesPerInstWord; ++ return Assembler::is_simm16((offs<0 ? offs-safety_zone : offs+safety_zone) >> 2); ++} ++ ++ ++// No additional cost for CMOVL. ++const int Matcher::long_cmove_cost() { return 0; } ++ ++// No CMOVF/CMOVD with SSE2 ++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } ++ ++// Does the CPU require late expand (see block.cpp for description of late expand)? ++const bool Matcher::require_postalloc_expand = false; ++ ++// Do we need to mask the count passed to shift instructions or does ++// the cpu only look at the lower 5/6 bits anyway? ++const bool Matcher::need_masked_shift_count = false; ++ ++bool Matcher::narrow_oop_use_complex_address() { ++ assert(UseCompressedOops, "only for compressed oops code"); ++ return false; ++} ++ ++bool Matcher::narrow_klass_use_complex_address() { ++ assert(UseCompressedClassPointers, "only for compressed klass code"); ++ return false; ++} ++ ++bool Matcher::const_oop_prefer_decode() { ++ // Prefer ConN+DecodeN over ConP. ++ return true; ++} ++ ++bool Matcher::const_klass_prefer_decode() { ++ // TODO: Either support matching DecodeNKlass (heap-based) in operand ++ // or condisider the following: ++ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. ++ //return Universe::narrow_klass_base() == NULL; ++ return true; ++} ++ ++// This is UltraSparc specific, true just means we have fast l2f conversion ++const bool Matcher::convL2FSupported(void) { ++ return true; ++} ++ ++// Max vector size in bytes. 0 if not supported. ++const int Matcher::vector_width_in_bytes(BasicType bt) { ++ if (MaxVectorSize == 0) ++ return 0; ++ assert(MaxVectorSize == 8, ""); ++ return 8; ++} ++ ++// Vector ideal reg ++const uint Matcher::vector_ideal_reg(int size) { ++ assert(MaxVectorSize == 8, ""); ++ switch(size) { ++ case 8: return Op_VecD; ++ } ++ ShouldNotReachHere(); ++ return 0; ++} ++ ++// Only lowest bits of xmm reg are used for vector shift count. ++const uint Matcher::vector_shift_count_ideal_reg(int size) { ++ fatal("vector shift is not supported"); ++ return Node::NotAMachineReg; ++} ++ ++ ++const bool Matcher::convi2l_type_required = true; ++ ++// Should the Matcher clone shifts on addressing modes, expecting them ++// to be subsumed into complex addressing expressions or compute them ++// into registers? ++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { ++ return clone_base_plus_offset_address(m, mstack, address_visited); ++} ++ ++void Compile::reshape_address(AddPNode* addp) { ++} ++ ++// Limits on vector size (number of elements) loaded into vector. ++const int Matcher::max_vector_size(const BasicType bt) { ++ assert(is_java_primitive(bt), "only primitive type vectors"); ++ return vector_width_in_bytes(bt)/type2aelembytes(bt); ++} ++ ++const int Matcher::min_vector_size(const BasicType bt) { ++ return max_vector_size(bt); // Same as max. ++} ++ ++// MIPS supports misaligned vectors store/load? FIXME ++const bool Matcher::misaligned_vectors_ok() { ++ return false; ++ //return !AlignVector; // can be changed by flag ++} ++ ++// Register for DIVI projection of divmodI ++RegMask Matcher::divI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for MODI projection of divmodI ++RegMask Matcher::modI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for DIVL projection of divmodL ++RegMask Matcher::divL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++int Matcher::regnum_to_fpu_offset(int regnum) { ++ return regnum - 32; // The FP registers are in the second chunk ++} ++ ++ ++const bool Matcher::isSimpleConstant64(jlong value) { ++ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. ++ return true; ++} ++ ++ ++// Return whether or not this register is ever used as an argument. This ++// function is used on startup to build the trampoline stubs in generateOptoStub. ++// Registers not mentioned will be killed by the VM call in the trampoline, and ++// arguments in those registers not be available to the callee. ++bool Matcher::can_be_java_arg( int reg ) { ++ // Refer to: [sharedRuntime_mips_64.cpp] SharedRuntime::java_calling_convention() ++ if ( reg == T0_num || reg == T0_H_num ++ || reg == A0_num || reg == A0_H_num ++ || reg == A1_num || reg == A1_H_num ++ || reg == A2_num || reg == A2_H_num ++ || reg == A3_num || reg == A3_H_num ++ || reg == A4_num || reg == A4_H_num ++ || reg == A5_num || reg == A5_H_num ++ || reg == A6_num || reg == A6_H_num ++ || reg == A7_num || reg == A7_H_num ) ++ return true; ++ ++ if ( reg == F12_num || reg == F12_H_num ++ || reg == F13_num || reg == F13_H_num ++ || reg == F14_num || reg == F14_H_num ++ || reg == F15_num || reg == F15_H_num ++ || reg == F16_num || reg == F16_H_num ++ || reg == F17_num || reg == F17_H_num ++ || reg == F18_num || reg == F18_H_num ++ || reg == F19_num || reg == F19_H_num ) ++ return true; ++ ++ return false; ++} ++ ++bool Matcher::is_spillable_arg( int reg ) { ++ return can_be_java_arg(reg); ++} ++ ++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { ++ return false; ++} ++ ++// Register for MODL projection of divmodL ++RegMask Matcher::modL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++const RegMask Matcher::method_handle_invoke_SP_save_mask() { ++ return FP_REG_mask(); ++} ++ ++// MIPS doesn't support AES intrinsics ++const bool Matcher::pass_original_key_for_aes() { ++ return false; ++} ++ ++int CallStaticJavaDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallLeafDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallRuntimeDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++// If CPU can load and store mis-aligned doubles directly then no fixup is ++// needed. Else we split the double into 2 integer pieces and move it ++// piece-by-piece. Only happens when passing doubles into C code as the ++// Java calling convention forces doubles to be aligned. ++const bool Matcher::misaligned_doubles_ok = false; ++// Do floats take an entire double register or just half? ++//const bool Matcher::float_in_double = true; ++bool Matcher::float_in_double() { return false; } ++// Do ints take an entire long register or just half? ++const bool Matcher::int_in_long = true; ++// Is it better to copy float constants, or load them directly from memory? ++// Intel can load a float constant from a direct address, requiring no ++// extra registers. Most RISCs will have to materialize an address into a ++// register first, so they would do better to copy the constant from stack. ++const bool Matcher::rematerialize_float_constants = false; ++// Advertise here if the CPU requires explicit rounding operations ++// to implement the UseStrictFP mode. ++const bool Matcher::strict_fp_requires_explicit_rounding = false; ++// false => size gets scaled to BytesPerLong, ok. ++const bool Matcher::init_array_count_is_in_bytes = false; ++ ++// Indicate if the safepoint node needs the polling page as an input. ++// it does if the polling page is more than disp32 away. ++bool SafePointNode::needs_polling_address_input() { ++ return SafepointMechanism::uses_thread_local_poll(); ++} ++ ++#ifndef PRODUCT ++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("BRK"); ++} ++#endif ++ ++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { ++ MacroAssembler _masm(&cbuf); ++ __ brk(5); ++} ++ ++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++// !!!!! Special hack to get all type of calls to specify the byte offset ++// from the start of the call to the point where the return address ++// will point. ++int MachCallStaticJavaNode::ret_addr_offset() { ++ //lui ++ //ori ++ //nop ++ //nop ++ //jalr ++ //nop ++ return 24; ++} ++ ++int MachCallDynamicJavaNode::ret_addr_offset() { ++ //lui IC_Klass, ++ //ori IC_Klass, ++ //dsll IC_Klass ++ //ori IC_Klass ++ ++ //lui T9 ++ //ori T9 ++ //nop ++ //nop ++ //jalr T9 ++ //nop ++ return 4 * 4 + 4 * 6; ++} ++ ++//============================================================================= ++ ++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack ++enum RC { rc_bad, rc_int, rc_float, rc_stack }; ++static enum RC rc_class( OptoReg::Name reg ) { ++ if( !OptoReg::is_valid(reg) ) return rc_bad; ++ if (OptoReg::is_stack(reg)) return rc_stack; ++ VMReg r = OptoReg::as_VMReg(reg); ++ if (r->is_Register()) return rc_int; ++ assert(r->is_FloatRegister(), "must be"); ++ return rc_float; ++} ++ ++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { ++ // Get registers to move ++ OptoReg::Name src_second = ra_->get_reg_second(in(1)); ++ OptoReg::Name src_first = ra_->get_reg_first(in(1)); ++ OptoReg::Name dst_second = ra_->get_reg_second(this ); ++ OptoReg::Name dst_first = ra_->get_reg_first(this ); ++ ++ enum RC src_second_rc = rc_class(src_second); ++ enum RC src_first_rc = rc_class(src_first); ++ enum RC dst_second_rc = rc_class(dst_second); ++ enum RC dst_first_rc = rc_class(dst_first); ++ ++ assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); ++ ++ // Generate spill code! ++ ++ if( src_first == dst_first && src_second == dst_second ) ++ return 0; // Self copy, no move ++ ++ if (src_first_rc == rc_stack) { ++ // mem -> ++ if (dst_first_rc == rc_stack) { ++ // mem -> mem ++ assert(src_second != dst_first, "overlap"); ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld(AT, Address(SP, src_offset)); ++ __ sd(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("ld AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t" ++ "sd AT, [SP + #%d]", ++ src_offset, dst_offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ // No pushl/popl, so: ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ lw(AT, Address(SP, src_offset)); ++ __ sw(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("lw AT, [SP + #%d] spill 2\n\t" ++ "sw AT, [SP + #%d]\n\t", ++ src_offset, dst_offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // mem -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("ld %s, [SP + #%d]\t# spill 3", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ lw(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++ else ++ __ lwu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ if (this->ideal_reg() == Op_RegI) ++ st->print("lw %s, [SP + #%d]\t# spill 4", ++ Matcher::regName[dst_first], ++ offset); ++ else ++ st->print("lwu %s, [SP + #%d]\t# spill 5", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // mem-> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ldc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("ldc1 %s, [SP + #%d]\t# spill 6", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ lwc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("lwc1 %s, [SP + #%d]\t# spill 7", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } ++ } else if (src_first_rc == rc_int) { ++ // gpr -> ++ if (dst_first_rc == rc_stack) { ++ // gpr -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ sd(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("sd %s, [SP + #%d] # spill 8", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ sw(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("sw %s, [SP + #%d]\t# spill 9", ++ Matcher::regName[src_first], offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // gpr -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ move(as_Register(Matcher::_regEncode[dst_first]), ++ as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("move(64bit) %s <-- %s\t# spill 10", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++ else ++ __ daddu(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("move(32-bit) %s <-- %s\t# spill 11", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } ++ } else if (dst_first_rc == rc_float) { ++ // gpr -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ dmtc1(as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("dmtc1 %s, %s\t# spill 12", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mtc1( as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]) ); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mtc1 %s, %s\t# spill 13", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } else if (src_first_rc == rc_float) { ++ // xmm -> ++ if (dst_first_rc == rc_stack) { ++ // xmm -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ sdc1( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) ); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("sdc1 %s, [SP + #%d]\t# spill 14", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ swc1(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("swc1 %s, [SP + #%d]\t# spill 15", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // xmm -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ dmfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("dmfc1 %s, %s\t# spill 16", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mfc1 %s, %s\t# spill 17", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // xmm -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mov_d %s <-- %s\t# spill 18", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mov_s %s <-- %s\t# spill 19", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } ++ ++ assert(0," foo "); ++ Unimplemented(); ++ return 0; ++} ++ ++#ifndef PRODUCT ++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ implementation( NULL, ra_, false, st ); ++} ++#endif ++ ++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ implementation( &cbuf, ra_, false, NULL ); ++} ++ ++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile *C = ra_->C; ++ int framesize = C->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ st->print_cr("daddiu SP, SP, %d # Rlease stack @ MachEpilogNode", framesize); ++ st->print("\t"); ++ if (UseLEXT1) { ++ st->print_cr("gslq RA, FP, SP, %d # Restore FP & RA @ MachEpilogNode", -wordSize*2); ++ } else { ++ st->print_cr("ld RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize); ++ st->print("\t"); ++ st->print_cr("ld FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2); ++ } ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ st->print("\t"); ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ st->print_cr("ld AT, poll_offset[thread] #polling_page_address\n\t" ++ "lw AT, [AT]\t" ++ "# Safepoint: poll for GC"); ++ } else { ++ st->print_cr("Poll Safepoint # MachEpilogNode"); ++ } ++ } ++} ++#endif ++ ++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile *C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ int framesize = C->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ assert(Assembler::is_simm16(framesize), "daddiu uses a signed 16-bit int"); ++ ++ if (UseLEXT1) { ++ __ gslq(RA, FP, SP, framesize - wordSize * 2); ++ } else { ++ __ ld(RA, SP, framesize - wordSize ); ++ __ ld(FP, SP, framesize - wordSize * 2); ++ } ++ __ daddiu(SP, SP, framesize); ++ ++ if (StackReservedPages > 0 && C->has_reserved_stack_access()) { ++ __ reserved_stack_check(); ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ __ ld(AT, thread, in_bytes(Thread::polling_page_offset())); ++ __ relocate(relocInfo::poll_return_type); ++ __ lw(AT, AT, 0); ++ } else { ++ __ set64(AT, (long)os::get_polling_page()); ++ __ relocate(relocInfo::poll_return_type); ++ __ lw(AT, AT, 0); ++ } ++ } ++} ++ ++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way fujie debug ++} ++ ++int MachEpilogNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++const Pipeline * MachEpilogNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++int MachEpilogNode::safepoint_offset() const { return 0; } ++ ++//============================================================================= ++ ++#ifndef PRODUCT ++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_reg_first(this); ++ st->print("ADDI %s, SP, %d @BoxLockNode",Matcher::regName[reg],offset); ++} ++#endif ++ ++ ++uint BoxLockNode::size(PhaseRegAlloc *ra_) const { ++ return 4; ++} ++ ++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_encode(this); ++ ++ __ addiu(as_Register(reg), SP, offset); ++} ++ ++ ++//static int sizeof_FFree_Float_Stack_All = -1; ++ ++int MachCallRuntimeNode::ret_addr_offset() { ++ //lui ++ //ori ++ //dsll ++ //ori ++ //jalr ++ //nop ++ assert(NativeCall::instruction_size == 24, "in MachCallRuntimeNode::ret_addr_offset()"); ++ return NativeCall::instruction_size; ++} ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); ++} ++#endif ++ ++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { ++ MacroAssembler _masm(&cbuf); ++ int i = 0; ++ for(i = 0; i < _count; i++) ++ __ nop(); ++} ++ ++uint MachNopNode::size(PhaseRegAlloc *) const { ++ return 4 * _count; ++} ++const Pipeline* MachNopNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++//============================================================================= ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ st->print_cr("load_klass(T9, T0)"); ++ st->print_cr("\tbeq(T9, iCache, L)"); ++ st->print_cr("\tnop"); ++ st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)"); ++ st->print_cr("\tnop"); ++ st->print_cr("\tnop"); ++ st->print_cr(" L:"); ++} ++#endif ++ ++ ++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ int ic_reg = Matcher::inline_cache_reg_encode(); ++ Label L; ++ Register receiver = T0; ++ Register iCache = as_Register(ic_reg); ++ ++ __ load_klass(T9, receiver); ++ __ beq(T9, iCache, L); ++ __ delayed()->nop(); ++ __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ bind(L); ++} ++ ++uint MachUEPNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++//============================================================================= ++ ++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask(); ++ ++int Compile::ConstantTable::calculate_table_base_offset() const { ++ return 0; // absolute addressing, no offset ++} ++ ++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } ++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { ++ ShouldNotReachHere(); ++} ++ ++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { ++ Compile* C = ra_->C; ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ MacroAssembler _masm(&cbuf); ++ ++ Register Rtoc = as_Register(ra_->get_encode(this)); ++ CodeSection* consts_section = __ code()->consts(); ++ int consts_size = consts_section->align_at_start(consts_section->size()); ++ assert(constant_table.size() == consts_size, "must be equal"); ++ ++ if (consts_section->size()) { ++ // Materialize the constant table base. ++ address baseaddr = consts_section->start() + -(constant_table.table_base_offset()); ++ // RelocationHolder rspec = internal_word_Relocation::spec(baseaddr); ++ __ relocate(relocInfo::internal_word_type); ++ __ patchable_set48(Rtoc, (long)baseaddr); ++ } ++} ++ ++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { ++ // patchable_set48 (4 insts) ++ return 4 * 4; ++} ++ ++#ifndef PRODUCT ++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { ++ Register r = as_Register(ra_->get_encode(this)); ++ st->print("patchable_set48 %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name()); ++} ++#endif ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile* C = ra_->C; ++ ++ int framesize = C->frame_size_in_bytes(); ++ int bangsize = C->bang_size_in_bytes(); ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ // Calls to C2R adapters often do not accept exceptional returns. ++ // We require that their callers must bang for them. But be careful, because ++ // some VM calls (such as call site linkage) can use several kilobytes of ++ // stack. But the stack safety zone should account for that. ++ // See bugs 4446381, 4468289, 4497237. ++ if (C->need_stack_bang(bangsize)) { ++ st->print_cr("# stack bang"); st->print("\t"); ++ } ++ if (UseLEXT1) { ++ st->print("gssq RA, FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); ++ } else { ++ st->print("sd RA, %d(SP) @ MachPrologNode\n\t", -wordSize); ++ st->print("sd FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); ++ } ++ st->print("daddiu FP, SP, -%d \n\t", wordSize*2); ++ st->print("daddiu SP, SP, -%d \t",framesize); ++} ++#endif ++ ++ ++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile* C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ ++ int framesize = C->frame_size_in_bytes(); ++ int bangsize = C->bang_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ assert(Assembler::is_simm16(-framesize), "daddiu uses a signed 16-bit int"); ++ ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ ++ if (C->need_stack_bang(bangsize)) { ++ __ generate_stack_overflow_check(bangsize); ++ } ++ ++ __ daddiu(SP, SP, -framesize); ++ if (UseLEXT1) { ++ __ gssq(RA, FP, SP, framesize - wordSize * 2); ++ } else { ++ __ sd(RA, SP, framesize - wordSize); ++ __ sd(FP, SP, framesize - wordSize * 2); ++ } ++ __ daddiu(FP, SP, framesize - wordSize * 2); ++ ++ C->set_frame_complete(cbuf.insts_size()); ++ if (C->has_mach_constant_base_node()) { ++ // NOTE: We set the table base offset here because users might be ++ // emitted before MachConstantBaseNode. ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); ++ } ++} ++ ++ ++uint MachPrologNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way ++} ++ ++int MachPrologNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++%} ++ ++//----------ENCODING BLOCK----------------------------------------------------- ++// This block specifies the encoding classes used by the compiler to output ++// byte streams. Encoding classes generate functions which are called by ++// Machine Instruction Nodes in order to generate the bit encoding of the ++// instruction. Operands specify their base encoding interface with the ++// interface keyword. There are currently supported four interfaces, ++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an ++// operand to generate a function which returns its register number when ++// queried. CONST_INTER causes an operand to generate a function which ++// returns the value of the constant when queried. MEMORY_INTER causes an ++// operand to generate four functions which return the Base Register, the ++// Index Register, the Scale Value, and the Offset Value of the operand when ++// queried. COND_INTER causes an operand to generate six functions which ++// return the encoding code (ie - encoding bits for the instruction) ++// associated with each basic boolean condition for a conditional instruction. ++// Instructions specify two basic values for encoding. They use the ++// ins_encode keyword to specify their encoding class (which must be one of ++// the class names specified in the encoding block), and they use the ++// opcode keyword to specify, in order, their primary, secondary, and ++// tertiary opcode. Only the opcode sections which a particular instruction ++// needs for encoding need to be specified. ++encode %{ ++ ++ enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf ++ MacroAssembler _masm(&cbuf); ++ // This is the instruction starting address for relocation info. ++ __ block_comment("Java_To_Runtime"); ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call((address)$meth$$method); ++ %} ++ ++ enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL ++ // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine ++ // who we intended to call. ++ MacroAssembler _masm(&cbuf); ++ address addr = (address)$meth$$method; ++ address call; ++ __ block_comment("Java_Static_Call"); ++ ++ if ( !_method ) { ++ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. ++ call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf); ++ } else { ++ int method_index = resolved_method_index(cbuf); ++ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) ++ : static_call_Relocation::spec(method_index); ++ call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf); ++ ++ // Emit stub for static call ++ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); ++ if (stub == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} ++ ++ ++ // ++ // [Ref: LIR_Assembler::ic_call() ] ++ // ++ enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL ++ MacroAssembler _masm(&cbuf); ++ __ block_comment("Java_Dynamic_Call"); ++ __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); ++ %} ++ ++ ++ enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{ ++ Register result = $result$$Register; ++ Register sub = $sub$$Register; ++ Register super = $super$$Register; ++ Register length = $tmp$$Register; ++ Register tmp = T9; ++ Label miss; ++ ++ // result may be the same as sub ++ // 47c B40: # B21 B41 <- B20 Freq: 0.155379 ++ // 47c partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0 ++ // 4bc mov S2, NULL #@loadConP ++ // 4c0 beq S1, S2, B21 #@branchConP P=0.999999 C=-1.000000 ++ // ++ MacroAssembler _masm(&cbuf); ++ Label done; ++ __ check_klass_subtype_slow_path(sub, super, length, tmp, ++ NULL, &miss, ++ /*set_cond_codes:*/ true); ++ // Refer to X86_64's RDI ++ __ move(result, 0); ++ __ b(done); ++ __ delayed()->nop(); ++ ++ __ bind(miss); ++ __ move(result, 1); ++ __ bind(done); ++ %} ++ ++%} ++ ++ ++//---------MIPS FRAME-------------------------------------------------------------- ++// Definition of frame structure and management information. ++// ++// S T A C K L A Y O U T Allocators stack-slot number ++// | (to get allocators register number ++// G Owned by | | v add SharedInfo::stack0) ++// r CALLER | | ++// o | +--------+ pad to even-align allocators stack-slot ++// w V | pad0 | numbers; owned by CALLER ++// t -----------+--------+----> Matcher::_in_arg_limit, unaligned ++// h ^ | in | 5 ++// | | args | 4 Holes in incoming args owned by SELF ++// | | old | | 3 ++// | | SP-+--------+----> Matcher::_old_SP, even aligned ++// v | | ret | 3 return address ++// Owned by +--------+ ++// Self | pad2 | 2 pad to align old SP ++// | +--------+ 1 ++// | | locks | 0 ++// | +--------+----> SharedInfo::stack0, even aligned ++// | | pad1 | 11 pad to align new SP ++// | +--------+ ++// | | | 10 ++// | | spills | 9 spills ++// V | | 8 (pad0 slot for callee) ++// -----------+--------+----> Matcher::_out_arg_limit, unaligned ++// ^ | out | 7 ++// | | args | 6 Holes in outgoing args owned by CALLEE ++// Owned by new | | ++// Callee SP-+--------+----> Matcher::_new_SP, even aligned ++// | | ++// ++// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is ++// known from SELF's arguments and the Java calling convention. ++// Region 6-7 is determined per call site. ++// Note 2: If the calling convention leaves holes in the incoming argument ++// area, those holes are owned by SELF. Holes in the outgoing area ++// are owned by the CALLEE. Holes should not be nessecary in the ++// incoming area, as the Java calling convention is completely under ++// the control of the AD file. Doubles can be sorted and packed to ++// avoid holes. Holes in the outgoing arguments may be nessecary for ++// varargs C calling conventions. ++// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is ++// even aligned with pad0 as needed. ++// Region 6 is even aligned. Region 6-7 is NOT even aligned; ++// region 6-11 is even aligned; it may be padded out more so that ++// the region from SP to FP meets the minimum stack alignment. ++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack ++// alignment. Region 11, pad1, may be dynamically extended so that ++// SP meets the minimum alignment. ++ ++ ++frame %{ ++ ++ stack_direction(TOWARDS_LOW); ++ ++ // These two registers define part of the calling convention ++ // between compiled code and the interpreter. ++ // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention ++ // for more information. ++ ++ inline_cache_reg(T1); // Inline Cache Register ++ interpreter_method_oop_reg(S3); // Method Oop Register when calling interpreter ++ ++ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] ++ cisc_spilling_operand_name(indOffset32); ++ ++ // Number of stack slots consumed by locking an object ++ // generate Compile::sync_stack_slots ++ sync_stack_slots(2); ++ ++ frame_pointer(SP); ++ ++ // Interpreter stores its frame pointer in a register which is ++ // stored to the stack by I2CAdaptors. ++ // I2CAdaptors convert from interpreted java to compiled java. ++ ++ interpreter_frame_pointer(FP); ++ ++ // generate Matcher::stack_alignment ++ stack_alignment(StackAlignmentInBytes); //wordSize = sizeof(char*); ++ ++ // Number of stack slots between incoming argument block and the start of ++ // a new frame. The PROLOG must add this many slots to the stack. The ++ // EPILOG must remove this many slots. ++ in_preserve_stack_slots(4); //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp ++ ++ // Number of outgoing stack slots killed above the out_preserve_stack_slots ++ // for calls to C. Supports the var-args backing area for register parms. ++ varargs_C_out_slots_killed(0); ++ ++ // The after-PROLOG location of the return address. Location of ++ // return address specifies a type (REG or STACK) and a number ++ // representing the register number (i.e. - use a register name) or ++ // stack slot. ++ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. ++ // Otherwise, it is above the locks and verification slot and alignment word ++ //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong)); ++ return_addr(REG RA); ++ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ ++ // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing) ++ // StartNode::calling_convention call this. ++ calling_convention %{ ++ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); ++ %} ++ ++ ++ ++ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ ++ // SEE CallRuntimeNode::calling_convention for more information. ++ c_calling_convention %{ ++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); ++ %} ++ ++ ++ // Location of C & interpreter return values ++ // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR. ++ // SEE Matcher::match. ++ c_return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num }; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++ // Location of return values ++ // register(s) contain(s) return value for Op_StartC2I and Op_Start. ++ // SEE Matcher::match. ++ ++ return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num}; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++%} ++ ++//----------ATTRIBUTES--------------------------------------------------------- ++//----------Operand Attributes------------------------------------------------- ++op_attrib op_cost(0); // Required cost attribute ++ ++//----------Instruction Attributes--------------------------------------------- ++ins_attrib ins_cost(100); // Required cost attribute ++ins_attrib ins_size(32); // Required size attribute (in bits) ++ins_attrib ins_pc_relative(0); // Required PC Relative flag ++ins_attrib ins_short_branch(0); // Required flag: is this instruction a ++ // non-matching short branch variant of some ++ // long branch? ++ins_attrib ins_alignment(4); // Required alignment attribute (must be a power of 2) ++ // specifies the alignment that some part of the instruction (not ++ // necessarily the start) requires. If > 1, a compute_padding() ++ // function must be provided for the instruction ++ ++//----------OPERANDS----------------------------------------------------------- ++// Operand definitions must precede instruction definitions for correct parsing ++// in the ADLC because operands constitute user defined types which are used in ++// instruction definitions. ++ ++// Vectors ++operand vecD() %{ ++ constraint(ALLOC_IN_RC(dbl_reg)); ++ match(VecD); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Flags register, used as output of compare instructions ++operand FlagsReg() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegFlags); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++//----------Simple Operands---------------------------------------------------- ++// TODO: Should we need to define some more special immediate number ? ++// Immediate Operands ++// Integer Immediate ++operand immI() %{ ++ match(ConI); ++ // TODO: should not match immI8 here LEE ++ match(immI8); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI8() %{ ++ predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI16() %{ ++ predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); ++ match(ConI); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M65536() %{ ++ predicate(n->get_int() == -65536); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for decrement ++operand immI_M1() %{ ++ predicate(n->get_int() == -1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for test vs zero ++operand immI_0() %{ ++ predicate(n->get_int() == 0); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for increment ++operand immI_1() %{ ++ predicate(n->get_int() == 1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constants for increment ++operand immI_16() %{ ++ predicate(n->get_int() == 16); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_24() %{ ++ predicate(n->get_int() == 24); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for long shifts ++operand immI_32() %{ ++ predicate(n->get_int() == 32); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for byte-wide masking ++operand immI_255() %{ ++ predicate(n->get_int() == 255); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_65535() %{ ++ predicate(n->get_int() == 65535); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_MaxI() %{ ++ predicate(n->get_int() == 2147483647); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M32767_32768() %{ ++ predicate((-32767 <= n->get_int()) && (n->get_int() <= 32768)); ++ match(ConI); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Valid scale values for addressing modes ++operand immI_0_3() %{ ++ predicate(0 <= n->get_int() && (n->get_int() <= 3)); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_31() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 31); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_32767() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 32767); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_65535() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 65535); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_32_63() %{ ++ predicate(n->get_int() >= 32 && n->get_int() <= 63); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive integer mask ++operand immI_nonneg_mask() %{ ++ predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1)); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate ++operand immL() %{ ++ match(ConL); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate 8-bit ++operand immL8() %{ ++ predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L); ++ match(ConL); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer for polling page ++operand immP_poll() %{ ++ predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page()); ++ match(ConP); ++ op_cost(5); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL16() %{ ++ predicate((-32768 <= n->get_long()) && (n->get_long() <= 32767)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate 32-bit signed ++operand immL32() %{ ++ predicate(n->get_long() == (int)(n->get_long())); ++ match(ConL); ++ ++ op_cost(15); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 3..6 zero ++operand immL_M121() %{ ++ predicate(n->get_long() == -121L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..2 zero ++operand immL_M8() %{ ++ predicate(n->get_long() == -8L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 1..2 zero ++operand immL_M7() %{ ++ predicate(n->get_long() == -7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 2 zero ++operand immL_M5() %{ ++ predicate(n->get_long() == -5L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..1 zero ++operand immL_M4() %{ ++ predicate(n->get_long() == -4L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_M1() %{ ++ predicate(n->get_long() == -1L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate zero ++operand immL_0() %{ ++ predicate(n->get_long() == 0L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_7() %{ ++ predicate(n->get_long() == 7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate: low 32-bit mask ++operand immL_MaxUI() %{ ++ predicate(n->get_long() == 0xFFFFFFFFL); ++ match(ConL); ++ op_cost(20); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_M32767_32768() %{ ++ predicate((-32767 <= n->get_long()) && (n->get_long() <= 32768)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_0_65535() %{ ++ predicate(n->get_long() >= 0 && n->get_long() <= 65535); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive long mask ++operand immL_nonneg_mask() %{ ++ predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1)); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immP() %{ ++ match(ConP); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immP_0() %{ ++ predicate(n->get_ptr() == 0); ++ match(ConP); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate: 64-bit ++operand immP_no_oop_cheap() %{ ++ predicate(!n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set64(n->get_ptr()) <= 3)); ++ match(ConP); ++ ++ op_cost(5); ++ // formats are generated automatically for constants and base registers ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immN() %{ ++ match(ConN); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immNKlass() %{ ++ match(ConNKlass); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immN_0() %{ ++ predicate(n->get_narrowcon() == 0); ++ match(ConN); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point immediate ++operand immF() %{ ++ match(ConF); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point zero ++operand immF_0() %{ ++ predicate(jint_cast(n->getf()) == 0); ++ match(ConF); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point immediate ++operand immD() %{ ++ match(ConD); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point zero ++operand immD_0() %{ ++ predicate(jlong_cast(n->getd()) == 0); ++ match(ConD); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Register Operands ++// Integer Register ++operand mRegI() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_Ax_mRegI() %{ ++ constraint(ALLOC_IN_RC(no_Ax_int_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mS0RegI() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S0" %} ++ interface(REG_INTER); ++%} ++ ++operand mS1RegI() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S1" %} ++ interface(REG_INTER); ++%} ++ ++operand mS3RegI() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S3" %} ++ interface(REG_INTER); ++%} ++ ++operand mS4RegI() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S4" %} ++ interface(REG_INTER); ++%} ++ ++operand mS5RegI() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S5" %} ++ interface(REG_INTER); ++%} ++ ++operand mS6RegI() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S6" %} ++ interface(REG_INTER); ++%} ++ ++operand mS7RegI() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S7" %} ++ interface(REG_INTER); ++%} ++ ++ ++operand mT0RegI() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++operand mT1RegI() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T1" %} ++ interface(REG_INTER); ++%} ++ ++operand mT2RegI() %{ ++ constraint(ALLOC_IN_RC(t2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T2" %} ++ interface(REG_INTER); ++%} ++ ++operand mT3RegI() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T3" %} ++ interface(REG_INTER); ++%} ++ ++operand mT8RegI() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T8" %} ++ interface(REG_INTER); ++%} ++ ++operand mT9RegI() %{ ++ constraint(ALLOC_IN_RC(t9_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T9" %} ++ interface(REG_INTER); ++%} ++ ++operand mA0RegI() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand mA1RegI() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A1" %} ++ interface(REG_INTER); ++%} ++ ++operand mA2RegI() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A2" %} ++ interface(REG_INTER); ++%} ++ ++operand mA3RegI() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A3" %} ++ interface(REG_INTER); ++%} ++ ++operand mA4RegI() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A4" %} ++ interface(REG_INTER); ++%} ++ ++operand mA5RegI() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A5" %} ++ interface(REG_INTER); ++%} ++ ++operand mA6RegI() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A6" %} ++ interface(REG_INTER); ++%} ++ ++operand mA7RegI() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A7" %} ++ interface(REG_INTER); ++%} ++ ++operand mV0RegI() %{ ++ constraint(ALLOC_IN_RC(v0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "V0" %} ++ interface(REG_INTER); ++%} ++ ++operand mV1RegI() %{ ++ constraint(ALLOC_IN_RC(v1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "V1" %} ++ interface(REG_INTER); ++%} ++ ++operand mRegN() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegN() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegN() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegN() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegN() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t9_RegN() %{ ++ constraint(ALLOC_IN_RC(t9_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegN() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegN() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegN() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegN() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegN() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5_RegN() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegN() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegN() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0_RegN() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegN() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s2_RegN() %{ ++ constraint(ALLOC_IN_RC(s2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegN() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegN() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegN() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegN() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegN() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0_RegN() %{ ++ constraint(ALLOC_IN_RC(v0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1_RegN() %{ ++ constraint(ALLOC_IN_RC(v1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer Register ++operand mRegP() %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(RegP); ++ match(a0_RegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_T8_mRegP() %{ ++ constraint(ALLOC_IN_RC(no_T8_p_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t9_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t9_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++ ++operand a5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++/* ++operand mSPRegP(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(sp_reg)); ++ match(reg); ++ ++ format %{ "SP" %} ++ interface(REG_INTER); ++%} ++ ++operand mFPRegP(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(fp_reg)); ++ match(reg); ++ ++ format %{ "FP" %} ++ interface(REG_INTER); ++%} ++*/ ++ ++operand mRegL() %{ ++ constraint(ALLOC_IN_RC(long_reg)); ++ match(RegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0RegL() %{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1RegL() %{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0RegL() %{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand a1RegL() %{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2RegL() %{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3RegL() %{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0RegL() %{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1RegL() %{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3RegL() %{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8RegL() %{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4RegL() %{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5RegL() %{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6RegL() %{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7RegL() %{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0RegL() %{ ++ constraint(ALLOC_IN_RC(s0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1RegL() %{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3RegL() %{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4RegL() %{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7RegL() %{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Floating register operands ++operand regF() %{ ++ constraint(ALLOC_IN_RC(flt_reg)); ++ match(RegF); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//Double Precision Floating register operands ++operand regD() %{ ++ constraint(ALLOC_IN_RC(dbl_reg)); ++ match(RegD); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//----------Memory Operands---------------------------------------------------- ++// Indirect Memory Operand ++operand indirect(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(reg); ++ ++ format %{ "[$reg] @ indirect" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset8(mRegP reg, immL8 off) ++%{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg off); ++ ++ op_cost(10); ++ format %{ "[$reg + $off (8-bit)] @ indOffset8" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Times Scale Plus Index Register ++operand indIndexScale(mRegP reg, mRegL lreg, immI_0_3 scale) ++%{ ++ predicate(UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg (LShiftL lreg scale)); ++ ++ op_cost(10); ++ format %{"[$reg + $lreg << $scale] @ indIndexScale" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale($scale); ++ disp(0x0); ++ %} ++%} ++ ++ ++// [base + index + offset] ++operand baseIndexOffset8(mRegP base, mRegL index, immL8 off) ++%{ ++ predicate(UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(5); ++ match(AddP (AddP base index) off); ++ ++ format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8" %} ++ interface(MEMORY_INTER) %{ ++ base($base); ++ index($index); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// [base + index + offset] ++operand baseIndexOffset8_convI2L(mRegP base, mRegI index, immL8 off) ++%{ ++ predicate(UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(5); ++ match(AddP (AddP base (ConvI2L index)) off); ++ ++ format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8_convI2L" %} ++ interface(MEMORY_INTER) %{ ++ base($base); ++ index($index); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// [base + index<in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0); ++ op_cost(10); ++ match(AddP (AddP base (LShiftL (ConvI2L index) scale)) off); ++ ++ format %{ "[$base + $index << $scale + $off (8-bit)] @ basePosIndexScaleOffset8" %} ++ interface(MEMORY_INTER) %{ ++ base($base); ++ index($index); ++ scale($scale); ++ disp($off); ++ %} ++%} ++ ++//FIXME: I think it's better to limit the immI to be 16-bit at most! ++// Indirect Memory Plus Long Offset Operand ++operand indOffset32(mRegP reg, immL32 off) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(20); ++ match(AddP reg off); ++ ++ format %{ "[$reg + $off (32-bit)] @ indOffset32" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Plus Index Register ++operand indIndex(mRegP addr, mRegL index) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP addr index); ++ ++ op_cost(20); ++ format %{"[$addr + $index] @ indIndex" %} ++ interface(MEMORY_INTER) %{ ++ base($addr); ++ index($index); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indirectNarrowKlass(mRegN reg) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(DecodeNKlass reg); ++ ++ format %{ "[$reg] @ indirectNarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indOffset8NarrowKlass(mRegN reg, immL8 off) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeNKlass reg) off); ++ ++ format %{ "[$reg + $off (8-bit)] @ indOffset8NarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indOffset32NarrowKlass(mRegN reg, immL32 off) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeNKlass reg) off); ++ ++ format %{ "[$reg + $off (32-bit)] @ indOffset32NarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indIndexOffsetNarrowKlass(mRegN reg, mRegL lreg, immL32 off) ++%{ ++ predicate(UseLEXT1); ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP (AddP (DecodeNKlass reg) lreg) off); ++ ++ op_cost(10); ++ format %{"[$reg + $off + $lreg] @ indIndexOffsetNarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indIndexNarrowKlass(mRegN reg, mRegL lreg) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP (DecodeNKlass reg) lreg); ++ ++ op_cost(10); ++ format %{"[$reg + $lreg] @ indIndexNarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Operand ++operand indirectNarrow(mRegN reg) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(DecodeN reg); ++ ++ format %{ "[$reg] @ indirectNarrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset8Narrow(mRegN reg, immL8 off) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeN reg) off); ++ ++ format %{ "[$reg + $off (8-bit)] @ indOffset8Narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Plus Index Register Plus Offset Operand ++operand indIndexOffset8Narrow(mRegN reg, mRegL lreg, immL8 off) ++%{ ++ predicate((Universe::narrow_oop_shift() == 0) && UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP (AddP (DecodeN reg) lreg) off); ++ ++ op_cost(10); ++ format %{"[$reg + $off + $lreg] @ indIndexOffset8Narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++//----------Conditional Branch Operands---------------------------------------- ++// Comparison Op - This is the operation of the comparison, and is limited to ++// the following set of codes: ++// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) ++// ++// Other attributes of the comparison, such as unsignedness, are specified ++// by the comparison instruction that sets a condition code flags register. ++// That result is represented by a flags operand whose subtype is appropriate ++// to the unsignedness (etc.) of the comparison. ++// ++// Later, the instruction which matches both the Comparison Op (a Bool) and ++// the flags (produced by the Cmp) specifies the coding of the comparison op ++// by matching a specific subtype of Bool operand below, such as cmpOpU. ++ ++// Comparision Code ++operand cmpOp() %{ ++ match(Bool); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++ ++// Comparision Code ++// Comparison Code, unsigned compare. Used by FP also, with ++// C2 (unordered) turned into GT or LT already. The other bits ++// C0 and C3 are turned into Carry & Zero flags. ++operand cmpOpU() %{ ++ match(Bool); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++ ++//----------Special Memory Operands-------------------------------------------- ++// Stack Slot Operand - This operand is used for loading and storing temporary ++// values on the stack where a match requires a value to ++// flow through memory. ++operand stackSlotP(sRegP reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotI(sRegI reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotF(sRegF reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotD(sRegD reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotL(sRegL reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++ ++//------------------------OPERAND CLASSES-------------------------------------- ++//opclass memory( direct, indirect, indOffset16, indOffset32, indOffset32X, indIndexOffset ); ++opclass memory( indirect, indirectNarrow, indOffset8, indOffset32, indIndex, indIndexScale, baseIndexOffset8, baseIndexOffset8_convI2L, indOffset8Narrow, indIndexOffset8Narrow); ++ ++ ++//----------PIPELINE----------------------------------------------------------- ++// Rules which define the behavior of the target architectures pipeline. ++ ++pipeline %{ ++ ++ //----------ATTRIBUTES--------------------------------------------------------- ++ attributes %{ ++ fixed_size_instructions; // Fixed size instructions ++ branch_has_delay_slot; // branch have delay slot in gs2 ++ max_instructions_per_bundle = 1; // 1 instruction per bundle ++ max_bundles_per_cycle = 4; // Up to 4 bundles per cycle ++ bundle_unit_size=4; ++ instruction_unit_size = 4; // An instruction is 4 bytes long ++ instruction_fetch_unit_size = 16; // The processor fetches one line ++ instruction_fetch_units = 1; // of 16 bytes ++ ++ // List of nop instructions ++ nops( MachNop ); ++ %} ++ ++ //----------RESOURCES---------------------------------------------------------- ++ // Resources are the functional units available to the machine ++ ++ resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4, ALU1, ALU2, ALU = ALU1 | ALU2, FPU1, FPU2, FPU = FPU1 | FPU2, MEM, BR); ++ ++ //----------PIPELINE DESCRIPTION----------------------------------------------- ++ // Pipeline Description specifies the stages in the machine's pipeline ++ ++ // IF: fetch ++ // ID: decode ++ // RD: read ++ // CA: caculate ++ // WB: write back ++ // CM: commit ++ ++ pipe_desc(IF, ID, RD, CA, WB, CM); ++ ++ ++ //----------PIPELINE CLASSES--------------------------------------------------- ++ // Pipeline Classes describe the stages in which input and output are ++ // referenced by the hardware pipeline. ++ ++ //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{ ++ single_instruction; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+1; ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.19 Integer mult operation : dst <-- reg1 mult reg2 ++ pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer div operation : dst <-- reg1 div reg2 ++ pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer mod operation : dst <-- reg1 mod reg2 ++ pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{ ++ instruction_count(2); ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //no.16 load Long from memory : ++ pipe_class ialu_loadL(mRegL dst, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.17 Store Long to Memory : ++ pipe_class ialu_storeL(mRegL src, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{ ++ single_instruction; ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.3 Integer move operation : dst <-- reg ++ pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.4 No instructions : do nothing ++ pipe_class empty( ) %{ ++ instruction_count(0); ++ %} ++ ++ //No.5 UnConditional branch : ++ pipe_class pipe_jump( label labl ) %{ ++ multiple_bundles; ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //No.6 ALU Conditional branch : ++ pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //no.7 load integer from memory : ++ pipe_class ialu_loadI(mRegI dst, memory mem) %{ ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.8 Store Integer to Memory : ++ pipe_class ialu_storeI(mRegI src, memory mem) %{ ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ ++ //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ //No.22 Floating div operation : dst <-- reg1 div reg2 ++ pipe_class fpu_div(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ pipe_class fcvt_I2D(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class fcvt_D2I(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class pipe_mfc1(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ pipe_class pipe_mtc1(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD(5); ++ %} ++ ++ //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2 ++ pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ //No.11 Load Floating from Memory : ++ pipe_class fpu_loadF(regF dst, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.12 Store Floating to Memory : ++ pipe_class fpu_storeF(regF src, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.13 FPU Conditional branch : ++ pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++//No.14 Floating FPU reg operation : dst <-- op reg ++ pipe_class fpu1_regF(regF dst, regF src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ pipe_class long_memory_op() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(30); ++ %} ++ ++ pipe_class simple_call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ BR : RD; ++ %} ++ ++ pipe_class call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ %} ++ ++ //FIXME: ++ //No.9 Piple slow : for multi-instructions ++ pipe_class pipe_slow( ) %{ ++ instruction_count(20); ++ force_serialization; ++ multiple_bundles; ++ fixed_latency(50); ++ %} ++ ++%} ++ ++ ++ ++//----------INSTRUCTIONS------------------------------------------------------- ++// ++// match -- States which machine-independent subtree may be replaced ++// by this instruction. ++// ins_cost -- The estimated cost of this instruction is used by instruction ++// selection to identify a minimum cost tree of machine ++// instructions that matches a tree of machine-independent ++// instructions. ++// format -- A string providing the disassembly for this instruction. ++// The value of an instruction's operand may be inserted ++// by referring to it with a '$' prefix. ++// opcode -- Three instruction opcodes may be provided. These are referred ++// to within an encode class as $primary, $secondary, and $tertiary ++// respectively. The primary opcode is commonly used to ++// indicate the type of machine instruction, while secondary ++// and tertiary are often used for prefix options or addressing ++// modes. ++// ins_encode -- A list of encode classes with parameters. The encode class ++// name must have been defined in an 'enc_class' specification ++// in the encode section of the architecture description. ++ ++ ++// Load Integer ++instruct loadI(mRegI dst, memory mem) %{ ++ match(Set dst (LoadI mem)); ++ ++ ins_cost(125); ++ format %{ "lw $dst, $mem #@loadI" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadI_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadI mem))); ++ ++ ins_cost(125); ++ format %{ "lw $dst, $mem #@loadI_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Integer (32 bit signed) to Byte (8 bit signed) ++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem\t# int -> byte #@loadI2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) ++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "lbu $dst, $mem\t# int -> ubyte #@loadI2UB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Short (16 bit signed) ++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); ++ ++ ins_cost(125); ++ format %{ "lh $dst, $mem\t# int -> short #@loadI2S" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) ++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "lhu $dst, $mem\t# int -> ushort/char #@loadI2US" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Long. ++instruct loadL(mRegL dst, memory mem) %{ ++// predicate(!((LoadLNode*)n)->require_atomic_access()); ++ match(Set dst (LoadL mem)); ++ ++ ins_cost(250); ++ format %{ "ld $dst, $mem #@loadL" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Load Long - UNaligned ++instruct loadL_unaligned(mRegL dst, memory mem) %{ ++ match(Set dst (LoadL_unaligned mem)); ++ ++ // FIXME: Need more effective ldl/ldr ++ ins_cost(450); ++ format %{ "ld $dst, $mem #@loadL_unaligned\n\t" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Store Long ++instruct storeL_reg(memory mem, mRegL src) %{ ++ match(Set mem (StoreL mem src)); ++ ++ ins_cost(200); ++ format %{ "sd $mem, $src #@storeL_reg\n" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_immL_0(memory mem, immL_0 zero) %{ ++ match(Set mem (StoreL mem zero)); ++ ++ ins_cost(180); ++ format %{ "sd zero, $mem #@storeL_immL_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++// Load Compressed Pointer ++instruct loadN(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadN mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# compressed ptr @ loadN" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2P(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeN (LoadN mem))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# @ loadN2P" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Pointer ++instruct loadP(mRegP dst, memory mem) %{ ++ match(Set dst (LoadP mem)); ++ ++ ins_cost(125); ++ format %{ "ld $dst, $mem #@loadP" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Klass Pointer ++instruct loadKlass(mRegP dst, memory mem) %{ ++ match(Set dst (LoadKlass mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load narrow Klass Pointer ++instruct loadNKlass(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadNKlass mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# compressed klass ptr @ loadNKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2PKlass(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeNKlass (LoadNKlass mem))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Constant ++instruct loadConI(mRegI dst, immI src) %{ ++ match(Set dst src); ++ ++ ins_cost(150); ++ format %{ "mov $dst, $src #@loadConI" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ int value = $src$$constant; ++ __ move(dst, value); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct loadConL_set64(mRegL dst, immL src) %{ ++ match(Set dst src); ++ ins_cost(120); ++ format %{ "li $dst, $src @ loadConL_set64" %} ++ ins_encode %{ ++ __ set64($dst$$Register, $src$$constant); ++ %} ++ ins_pipe(ialu_regL_regL); ++%} ++ ++instruct loadConL16(mRegL dst, immL16 src) %{ ++ match(Set dst src); ++ ins_cost(105); ++ format %{ "mov $dst, $src #@loadConL16" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ int value = $src$$constant; ++ __ daddiu(dst_reg, R0, value); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct loadConL_immL_0(mRegL dst, immL_0 src) %{ ++ match(Set dst src); ++ ins_cost(100); ++ format %{ "mov $dst, zero #@loadConL_immL_0" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ __ daddu(dst_reg, R0, R0); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Load Range ++instruct loadRange(mRegI dst, memory mem) %{ ++ match(Set dst (LoadRange mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadRange" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct storeP(memory mem, mRegP src ) %{ ++ match(Set mem (StoreP mem src)); ++ ++ ins_cost(125); ++ format %{ "sd $src, $mem #@storeP" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store NULL Pointer, mark word, or other simple pointer constant. ++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{ ++ match(Set mem (StoreP mem zero)); ++ ++ ins_cost(125); ++ format %{ "mov $mem, $zero #@storeImmP_immP_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Compressed Pointer ++instruct storeN(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreN mem src)); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# compressed ptr @ storeN" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2N(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreN mem (EncodeP src))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# @ storeP2N" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeNKlass(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreNKlass mem src)); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# compressed klass ptr @ storeNKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2NKlass(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreNKlass mem (EncodePKlass src))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# @ storeP2NKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmN_immN_0(memory mem, immN_0 zero) ++%{ ++ match(Set mem (StoreN mem zero)); ++ ++ ins_cost(125); // XXX ++ format %{ "storeN0 zero, $mem\t# compressed ptr" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Byte ++instruct storeB_immB_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreB mem zero)); ++ ++ format %{ "mov $mem, zero #@storeB_immB_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeB(memory mem, mRegI src) %{ ++ match(Set mem (StoreB mem src)); ++ ++ ins_cost(125); ++ format %{ "sb $src, $mem #@storeB" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeB_convL2I(memory mem, mRegL src) %{ ++ match(Set mem (StoreB mem (ConvL2I src))); ++ ++ ins_cost(125); ++ format %{ "sb $src, $mem #@storeB_convL2I" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Byte (8bit signed) ++instruct loadB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadB mem)); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem #@loadB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadB mem))); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem #@loadB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Byte (8bit UNsigned) ++instruct loadUB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUB mem)); ++ ++ ins_cost(125); ++ format %{ "lbu $dst, $mem #@loadUB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUB mem))); ++ ++ ins_cost(125); ++ format %{ "lbu $dst, $mem #@loadUB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16bit signed) ++instruct loadS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadS mem)); ++ ++ ins_cost(125); ++ format %{ "lh $dst, $mem #@loadS" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16 bit signed) to Byte (8 bit signed) ++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem\t# short -> byte #@loadS2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadS mem))); ++ ++ ins_cost(125); ++ format %{ "lh $dst, $mem #@loadS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Integer Immediate ++instruct storeI_immI_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreI mem zero)); ++ ++ format %{ "mov $mem, zero #@storeI_immI_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Integer ++instruct storeI(memory mem, mRegI src) %{ ++ match(Set mem (StoreI mem src)); ++ ++ ins_cost(125); ++ format %{ "sw $mem, $src #@storeI" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeI_convL2I(memory mem, mRegL src) %{ ++ match(Set mem (StoreI mem (ConvL2I src))); ++ ++ ins_cost(125); ++ format %{ "sw $mem, $src #@storeI_convL2I" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Float ++instruct loadF(regF dst, memory mem) %{ ++ match(Set dst (LoadF mem)); ++ ++ ins_cost(150); ++ format %{ "loadF $dst, $mem #@loadF" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadConP_general(mRegP dst, immP src) %{ ++ match(Set dst src); ++ ++ ins_cost(120); ++ format %{ "li $dst, $src #@loadConP_general" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ long* value = (long*)$src$$constant; ++ ++ if($src->constant_reloc() == relocInfo::metadata_type){ ++ int klass_index = __ oop_recorder()->find_index((Klass*)value); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ ++ __ relocate(rspec); ++ __ patchable_set48(dst, (long)value); ++ } else if($src->constant_reloc() == relocInfo::oop_type){ ++ int oop_index = __ oop_recorder()->find_index((jobject)value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ __ relocate(rspec); ++ __ patchable_set48(dst, (long)value); ++ } else if ($src->constant_reloc() == relocInfo::none) { ++ __ set64(dst, (long)value); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{ ++ match(Set dst src); ++ ++ ins_cost(80); ++ format %{ "li $dst, $src @ loadConP_no_oop_cheap" %} ++ ++ ins_encode %{ ++ __ set64($dst$$Register, $src$$constant); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct loadConP_poll(mRegP dst, immP_poll src) %{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "li $dst, $src #@loadConP_poll" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ intptr_t value = (intptr_t)$src$$constant; ++ ++ __ set64(dst, (jlong)value); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_immP_0(mRegP dst, immP_0 src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "mov $dst, R0\t# ptr" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ __ daddu(dst_reg, R0, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{ ++ match(Set dst src); ++ format %{ "move $dst, R0\t# compressed NULL ptr" %} ++ ins_encode %{ ++ __ move($dst$$Register, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN(mRegN dst, immN src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed ptr @ loadConN" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_oop(dst, (jobject)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++instruct loadConNKlass(mRegN dst, immNKlass src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed klass ptr @ loadConNKlass" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_klass(dst, (Klass*)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++//FIXME ++// Tail Call; Jump from runtime stub to Java code. ++// Also known as an 'interprocedural jump'. ++// Target of jump will eventually return to caller. ++// TailJump below removes the return address. ++instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{ ++ match(TailCall jump_target method_oop ); ++ ins_cost(300); ++ format %{ "JMP $jump_target \t# @TailCalljmpInd" %} ++ ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ Register oop = $method_oop$$Register; ++ ++ // RA will be used in generate_forward_exception() ++ __ push(RA); ++ ++ __ move(S3, oop); ++ __ jr(target); ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++// Create exception oop: created by stack-crawling runtime code. ++// Created exception is now available to this handler, and is setup ++// just prior to jumping to this handler. No code emitted. ++instruct CreateException( a0_RegP ex_oop ) ++%{ ++ match(Set ex_oop (CreateEx)); ++ ++ // use the following format syntax ++ format %{ "# exception oop is in A0; no code emitted @CreateException" %} ++ ins_encode %{ ++ // X86 leaves this function empty ++ __ block_comment("CreateException is empty in MIPS"); ++ %} ++ ins_pipe( empty ); ++// ins_pipe( pipe_jump ); ++%} ++ ++ ++/* The mechanism of exception handling is clear now. ++ ++- Common try/catch: ++ [stubGenerator_mips.cpp] generate_forward_exception() ++ |- V0, V1 are created ++ |- T9 <= SharedRuntime::exception_handler_for_return_address ++ `- jr T9 ++ `- the caller's exception_handler ++ `- jr OptoRuntime::exception_blob ++ `- here ++- Rethrow(e.g. 'unwind'): ++ * The callee: ++ |- an exception is triggered during execution ++ `- exits the callee method through RethrowException node ++ |- The callee pushes exception_oop(T0) and exception_pc(RA) ++ `- The callee jumps to OptoRuntime::rethrow_stub() ++ * In OptoRuntime::rethrow_stub: ++ |- The VM calls _rethrow_Java to determine the return address in the caller method ++ `- exits the stub with tailjmpInd ++ |- pops exception_oop(V0) and exception_pc(V1) ++ `- jumps to the return address(usually an exception_handler) ++ * The caller: ++ `- continues processing the exception_blob with V0/V1 ++*/ ++ ++// Rethrow exception: ++// The exception oop will come in the first argument position. ++// Then JUMP (not call) to the rethrow stub code. ++instruct RethrowException() ++%{ ++ match(Rethrow); ++ ++ // use the following format syntax ++ format %{ "JMP rethrow_stub #@RethrowException" %} ++ ins_encode %{ ++ __ block_comment("@ RethrowException"); ++ ++ cbuf.set_insts_mark(); ++ cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec()); ++ ++ // call OptoRuntime::rethrow_stub to get the exception handler in parent method ++ __ patchable_jump((address)OptoRuntime::rethrow_stub()); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Branch Instructions --- long offset versions ++ ++// Jump Direct ++instruct jmpDir_long(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ __ jmp_far(*L); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ //ins_pc_relative(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = AT; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ ++ __ move(op2, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #mips uses T0 as equivalent to eflag @jmpCon_flags_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ __ bne_long($cr$$Register, R0, *L); ++ break; ++ case 0x02: //not equal ++ __ beq_long($cr$$Register, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_zero_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_zero_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConN2P_zero_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_zero_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) ++ { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_long" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_long" %} ++ ins_encode %{ ++ Register op1_reg = $op1$$Register; ++ Register op2_reg = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1_reg, op2_reg, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1_reg, op2_reg, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2_reg, op1_reg); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1_reg, op2_reg); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1_reg, op2_reg); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2_reg, op1_reg); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, AT, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, AT); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, AT); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, AT, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(170); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, R0, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, R0, *L); ++ break; ++ case 0x03: //greater ++ __ slt(AT, R0, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //greater_equal ++ __ slt(AT, op1, R0); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //less ++ __ slt(AT, op1, R0); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //less_equal ++ __ slt(AT, R0, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //greater ++ __ slt(AT, AT, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //greater_equal ++ __ slt(AT, op1, AT); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //less ++ __ slt(AT, op1, AT); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //less_equal ++ __ slt(AT, AT, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ ++ match( If cmp (CmpU src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, R0, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, R0, *L); ++ break; ++ case 0x03: //above ++ __ bne_long(R0, op1, *L); ++ break; ++ case 0x04: //above_equal ++ __ beq_long(R0, R0, *L); ++ break; ++ case 0x05: //below ++ return; ++ break; ++ case 0x06: //below_equal ++ __ beq_long(op1, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConIU_reg_immI16_long(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ ins_cost(180); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_immI16_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ move(AT, val); ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ move(AT, val); ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //above ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltiu(AT, op1, val); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltiu(AT, op1, val); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: // not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match( If cmp (CmpL src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_long" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = R0; ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match(If cmp (CmpUL src1 zero)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_long" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = R0; ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ case 0x04: // greater_equal ++ case 0x06: // less_equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: // not_equal ++ case 0x03: // greater ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x05: // less ++ __ beq_long(R0, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: // not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_s(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: // not_equal ++ __ c_eq_s(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: // greater ++ __ c_ule_s(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_s(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: // less ++ __ c_ult_s(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_s(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_d(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: // not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ c_eq_d(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: // greater ++ __ c_ule_d(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_d(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: // less ++ __ c_ult_d(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_d(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// ============================================================================ ++// Branch Instructions -- short offset versions ++ ++// Jump Direct ++instruct jmpDir_short(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ if(&L) ++ __ b(L); ++ else ++ __ b(int(0)); ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = AT; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ ++ __ move(op2, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #mips uses T0 as equivalent to eflag @jmpCon_flags_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ if (&L) ++ __ bne($cr$$Register, R0, L); ++ else ++ __ bne($cr$$Register, R0, (int)0); ++ break; ++ case 0x02: //not equal ++ if (&L) ++ __ beq($cr$$Register, R0, L); ++ else ++ __ beq($cr$$Register, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_zero_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_zero_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConN2P_zero_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_zero_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) ++ { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_short" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_short" %} ++ ins_encode %{ ++ Register op1_reg = $op1$$Register; ++ Register op2_reg = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1_reg, op2_reg, L); ++ else ++ __ beq(op1_reg, op2_reg, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1_reg, op2_reg, L); ++ else ++ __ bne(op1_reg, op2_reg, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2_reg, op1_reg); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1_reg, op2_reg); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1_reg, op2_reg); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2_reg, op1_reg); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, AT); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, AT); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(170); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, R0, L); ++ else ++ __ beq(op1, R0, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, R0, L); ++ else ++ __ bne(op1, R0, (int)0); ++ break; ++ case 0x03: //greater ++ if(&L) ++ __ bgtz(op1, L); ++ else ++ __ bgtz(op1, (int)0); ++ break; ++ case 0x04: //greater_equal ++ if(&L) ++ __ bgez(op1, L); ++ else ++ __ bgez(op1, (int)0); ++ break; ++ case 0x05: //less ++ if(&L) ++ __ bltz(op1, L); ++ else ++ __ bltz(op1, (int)0); ++ break; ++ case 0x06: //less_equal ++ if(&L) ++ __ blez(op1, L); ++ else ++ __ blez(op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //greater ++ __ slt(AT, AT, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //greater_equal ++ __ slt(AT, op1, AT); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //less ++ __ slt(AT, op1, AT); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //less_equal ++ __ slt(AT, AT, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ ++ match( If cmp (CmpU src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, R0, L); ++ else ++ __ beq(op1, R0, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, R0, L); ++ else ++ __ bne(op1, R0, (int)0); ++ break; ++ case 0x03: //above ++ if(&L) ++ __ bne(R0, op1, L); ++ else ++ __ bne(R0, op1, (int)0); ++ break; ++ case 0x04: //above_equal ++ if(&L) ++ __ beq(R0, R0, L); ++ else ++ __ beq(R0, R0, (int)0); ++ break; ++ case 0x05: //below ++ return; ++ break; ++ case 0x06: //below_equal ++ if(&L) ++ __ beq(op1, R0, L); ++ else ++ __ beq(op1, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConIU_reg_immI16_short(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ ins_cost(180); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_immI16_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ move(AT, val); ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ __ move(AT, val); ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //above ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltiu(AT, op1, val); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltiu(AT, op1, val); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match( If cmp (CmpUL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x02: // not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match( If cmp (CmpL src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_short" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, R0, target); ++ else ++ __ beq(opr1_reg, R0, int(0)); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, R0, target); ++ else ++ __ bne(opr1_reg, R0, (int)0); ++ break; ++ ++ case 0x03: //greater ++ if(&target) ++ __ bgtz(opr1_reg, target); ++ else ++ __ bgtz(opr1_reg, (int)0); ++ break; ++ ++ case 0x04: //greater_equal ++ if(&target) ++ __ bgez(opr1_reg, target); ++ else ++ __ bgez(opr1_reg, (int)0); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, R0); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x06: //less_equal ++ if (&target) ++ __ blez(opr1_reg, target); ++ else ++ __ blez(opr1_reg, int(0)); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match(If cmp (CmpUL src1 zero)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_short" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ case 0x04: // greater_equal ++ case 0x06: // less_equal ++ if (&target) ++ __ beq(opr1_reg, R0, target); ++ else ++ __ beq(opr1_reg, R0, int(0)); ++ break; ++ ++ case 0x02: // not_equal ++ case 0x03: // greater ++ if(&target) ++ __ bne(opr1_reg, R0, target); ++ else ++ __ bne(opr1_reg, R0, (int)0); ++ break; ++ ++ case 0x05: // less ++ if(&target) ++ __ beq(R0, R0, target); ++ else ++ __ beq(R0, R0, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: // equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x02: // not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label& L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_s(reg_op1, reg_op2); ++ if (&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x02: // not_equal ++ __ c_eq_s(reg_op1, reg_op2); ++ if (&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x03: // greater ++ __ c_ule_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x05: // less ++ __ c_ult_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label& L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_d(reg_op1, reg_op2); ++ if (&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x02: // not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ c_eq_d(reg_op1, reg_op2); ++ if (&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x03: // greater ++ __ c_ule_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x05: // less ++ __ c_ult_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++// =================== End of branch instructions ========================== ++ ++// Call Runtime Instruction ++instruct CallRuntimeDirect(method meth) %{ ++ match(CallRuntime ); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,runtime #@CallRuntimeDirect" %} ++ ins_encode( Java_To_Runtime( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_alignment(16); ++%} ++ ++ ++ ++//------------------------MemBar Instructions------------------------------- ++//Memory barrier flavors ++ ++instruct membar_acquire() %{ ++ match(MemBarAcquire); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-acquire @ membar_acquire" %} ++ ins_encode %{ ++ __ sync(); ++ %} ++ ins_pipe(empty); ++%} ++ ++instruct load_fence() %{ ++ match(LoadFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ load_fence" %} ++ ins_encode %{ ++ __ sync(); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_acquire_lock() ++%{ ++ match(MemBarAcquireLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct membar_release() %{ ++ match(MemBarRelease); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-release @ membar_release" %} ++ ++ ins_encode %{ ++ // Attention: DO NOT DELETE THIS GUY! ++ __ sync(); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct store_fence() %{ ++ match(StoreFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ store_fence" %} ++ ++ ins_encode %{ ++ __ sync(); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_release_lock() ++%{ ++ match(MemBarReleaseLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++ ++instruct membar_volatile() %{ ++ match(MemBarVolatile); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-volatile" %} ++ ins_encode %{ ++ if( !os::is_MP() ) return; // Not needed on single CPU ++ __ sync(); ++ ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct unnecessary_membar_volatile() %{ ++ match(MemBarVolatile); ++ predicate(Matcher::post_store_load_barrier(n)); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %} ++ ins_encode( ); ++ ins_pipe(empty); ++%} ++ ++instruct membar_storestore() %{ ++ match(MemBarStoreStore); ++ ++ ins_cost(400); ++ format %{ "MEMBAR-storestore @ membar_storestore" %} ++ ins_encode %{ ++ __ sync(); ++ %} ++ ins_pipe(empty); ++%} ++ ++//----------Move Instructions-------------------------------------------------- ++instruct castX2P(mRegP dst, mRegL src) %{ ++ match(Set dst (CastX2P src)); ++ format %{ "castX2P $dst, $src @ castX2P" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_cost(10); ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct castP2X(mRegL dst, mRegP src ) %{ ++ match(Set dst (CastP2X src)); ++ ++ format %{ "mov $dst, $src\t #@castP2X" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{ ++ match(Set dst (MoveF2I src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveF2I $dst, $src @ MoveF2I_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ mfc1(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{ ++ match(Set dst (MoveI2F src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveI2F $dst, $src @ MoveI2F_reg_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ mtc1(src, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{ ++ match(Set dst (MoveD2L src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveD2L $dst, $src @ MoveD2L_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ dmfc1(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{ ++ match(Set dst (MoveL2D src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveL2D $dst, $src @ MoveL2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ dmtc1(src, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Conditional Move--------------------------------------------------- ++// Conditional move ++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovD_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovF_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpP_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpP_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//FIXME ++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovF_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Manifest a CmpL result in an integer register. Very painful. ++// This is the test to avoid. ++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (CmpL3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpL3 $dst, $src1, $src2 @ cmpL3_reg_reg" %} ++ ins_encode %{ ++ Register opr1 = as_Register($src1$$reg); ++ Register opr2 = as_Register($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ slt(AT, opr1, opr2); ++ __ slt(dst, opr2, opr1); ++ __ subu(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ++// less_rsult = -1 ++// greater_result = 1 ++// equal_result = 0 ++// nan_result = -1 ++// ++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{ ++ match(Set dst (CmpF3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpF3 $dst, $src1, $src2 @ cmpF3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ ori(dst, R0, 1); ++ __ ori(AT, R0, 1); ++ __ c_olt_s(src2, src1); ++ __ movf(dst, R0); ++ __ c_ult_s(src1, src2); ++ __ movf(AT, R0); ++ __ subu(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{ ++ match(Set dst (CmpD3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpD3 $dst, $src1, $src2 @ cmpD3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ ori(dst, R0, 1); ++ __ ori(AT, R0, 1); ++ __ c_olt_d(src2, src1); ++ __ movf(dst, R0); ++ __ c_ult_d(src1, src2); ++ __ movf(AT, R0); ++ __ subu(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{ ++ match(Set dummy (ClearArray cnt base)); ++ format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} ++ ins_encode %{ ++ //Assume cnt is the number of bytes in an array to be cleared, ++ //and base points to the starting address of the array. ++ Register base = $base$$Register; ++ Register num = $cnt$$Register; ++ Label Loop, done; ++ ++ __ beq(num, R0, done); ++ __ delayed()->daddu(AT, base, R0); ++ ++ __ move(T9, num); /* T9 = words */ ++ ++ __ bind(Loop); ++ __ sd(R0, AT, 0); ++ __ daddiu(T9, T9, -1); ++ __ bne(T9, R0, Loop); ++ __ delayed()->daddiu(AT, AT, wordSize); ++ ++ __ bind(done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::LL); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareU" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::UU); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareLU" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::LU); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareUL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::UL); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// intrinsic optimization ++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{ ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp); ++ ++ format %{ "String Equal $str1, $str2, len:$cnt tmp:$temp -> $result @ string_equals" %} ++ ins_encode %{ ++ __ arrays_equals($str1$$Register, $str2$$Register, ++ $cnt$$Register, $temp$$Register, $result$$Register, ++ false/* byte */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Arithmetic Instructions------------------------------------------- ++//----------Addition Instructions--------------------------------------------- ++instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ addu32(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addI_Reg_imm(mRegI dst, mRegI src1, immI src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_imm" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ int imm = $src2$$constant; ++ ++ if(Assembler::is_simm16(imm)) { ++ __ addiu32(dst, src1, imm); ++ } else { ++ __ move(AT, imm); ++ __ addu32(dst, src1, AT); ++ } ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegL src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "dadd $dst, $src1, $src2 #@addP_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ daddu(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_reg_convI2L(mRegP dst, mRegP src1, mRegI src2) %{ ++ match(Set dst (AddP src1 (ConvI2L src2))); ++ ++ format %{ "dadd $dst, $src1, $src2 #@addP_reg_reg_convI2L" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ daddu(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_imm(mRegP dst, mRegP src1, immL16 src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "daddi $dst, $src1, $src2 #@addP_reg_imm" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ long src2 = $src2$$constant; ++ Register dst = $dst$$Register; ++ ++ __ daddiu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++// Add Long Register with Register ++instruct addL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (AddL src1 src2)); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_Reg\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_Reg_imm(mRegL dst, mRegL src1, immL16 src2) ++%{ ++ match(Set dst (AddL src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_imm " %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ int src2_imm = $src2$$constant; ++ ++ __ daddiu(dst_reg, src1_reg, src2_imm); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_RegI2L_imm(mRegL dst, mRegI src1, immL16 src2) ++%{ ++ match(Set dst (AddL (ConvI2L src1) src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_imm " %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ int src2_imm = $src2$$constant; ++ ++ __ daddiu(dst_reg, src1_reg, src2_imm); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{ ++ match(Set dst (AddL (ConvI2L src1) src2)); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_Reg\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AddL (ConvI2L src1) (ConvI2L src2))); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_RegI2L\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (AddL src1 (ConvI2L src2))); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_RegI2L\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//----------Abs Instructions------------------------------------------- ++ ++// Integer Absolute Instructions ++instruct absI_rReg(mRegI dst, mRegI src) ++%{ ++ match(Set dst (AbsI src)); ++ effect(TEMP dst); ++ format %{ "AbsI $dst, $src" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ sra(AT, src, 31); ++ __ xorr(dst, src, AT); ++ __ subu32(dst, dst, AT); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Long Absolute Instructions ++instruct absL_rReg(mRegL dst, mRegL src) ++%{ ++ match(Set dst (AbsL src)); ++ effect(TEMP dst); ++ format %{ "AbsL $dst, $src" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dsra32(AT, src, 31); ++ __ xorr(dst, src, AT); ++ __ subu(dst, dst, AT); ++ %} ++ ++ ins_pipe(ialu_regL_regL); ++%} ++ ++//----------Subtraction Instructions------------------------------------------- ++// Integer Subtraction Instructions ++instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(100); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ subu32(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subI_Reg_immI_M32767_32768(mRegI dst, mRegI src1, immI_M32767_32768 src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_immI_M32767_32768" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ addiu32(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negI_Reg(mRegI dst, immI_0 zero, mRegI src) %{ ++ match(Set dst (SubI zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negI_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ subu32(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negL_Reg(mRegL dst, immL_0 zero, mRegL src) %{ ++ match(Set dst (SubL zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negL_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ subu(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subL_Reg_immL_M32767_32768(mRegL dst, mRegL src1, immL_M32767_32768 src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subL_Reg_immL_M32767_32768" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ daddiu(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Subtract Long Register with Register. ++instruct subL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(100); ++ format %{ "SubL $dst, $src1, $src2 @ subL_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct subL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (SubL src1 (ConvI2L src2))); ++ ins_cost(100); ++ format %{ "SubL $dst, $src1, $src2 @ subL_Reg_RegI2L" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct subL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{ ++ match(Set dst (SubL (ConvI2L src1) src2)); ++ ins_cost(200); ++ format %{ "SubL $dst, $src1, $src2 @ subL_RegI2L_Reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct subL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (SubL (ConvI2L src1) (ConvI2L src2))); ++ ins_cost(200); ++ format %{ "SubL $dst, $src1, $src2 @ subL_RegI2L_RegI2L" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Integer MOD with Register ++instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (ModI src1 src2)); ++ ins_cost(300); ++ format %{ "modi $dst, $src1, $src2 @ modI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ //if (UseLEXT1) { ++ if (0) { ++ // Experiments show that gsmod is slower that div+mfhi. ++ // So I just disable it here. ++ __ gsmod(dst, src1, src2); ++ } else { ++ __ div(src1, src2); ++ __ mfhi(dst); ++ } ++ %} ++ ++ //ins_pipe( ialu_mod ); ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct modL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (ModL src1 src2)); ++ format %{ "modL $dst, $src1, $src2 @modL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsdmod(dst, op1, op2); ++ } else { ++ __ ddiv(op1, op2); ++ __ mfhi(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (MulI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "mul $dst, $src1, $src2 @ mulI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ __ mul(dst, src1, src2); ++ %} ++ ins_pipe( ialu_mult ); ++%} ++ ++instruct maddI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2, mRegI src3) %{ ++ match(Set dst (AddI (MulI src1 src2) src3)); ++ ++ ins_cost(999); ++ format %{ "madd $dst, $src1 * $src2 + $src3 #@maddI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register src3 = $src3$$Register; ++ Register dst = $dst$$Register; ++ ++ __ mtlo(src3); ++ __ madd(src1, src2); ++ __ mflo(dst); ++ %} ++ ins_pipe( ialu_mult ); ++%} ++ ++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (DivI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "div $dst, $src1, $src2 @ divI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ // In MIPS, div does not cause exception. ++ // We must trap an exception manually. ++ __ teq(R0, src2, 0x7); ++ ++ if (UseLEXT1) { ++ __ gsdiv(dst, src1, src2); ++ } else { ++ __ div(src1, src2); ++ ++ __ nop(); ++ __ nop(); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( ialu_mod ); ++%} ++ ++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (DivF src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divF $dst, $src1, $src2 @ divF_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ /* Here do we need to trap an exception manually ? */ ++ __ div_s(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (DivD src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divD $dst, $src1, $src2 @ divD_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ /* Here do we need to trap an exception manually ? */ ++ __ div_d(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (MulL src1 src2)); ++ format %{ "mulL $dst, $src1, $src2 @mulL_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsdmult(dst, op1, op2); ++ } else { ++ __ dmult(op1, op2); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulL_reg_regI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (MulL src1 (ConvI2L src2))); ++ format %{ "mulL $dst, $src1, $src2 @mulL_reg_regI2L" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsdmult(dst, op1, op2); ++ } else { ++ __ dmult(op1, op2); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (DivL src1 src2)); ++ format %{ "divL $dst, $src1, $src2 @divL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsddiv(dst, op1, op2); ++ } else { ++ __ ddiv(op1, op2); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (AddF src1 src2)); ++ format %{ "AddF $dst, $src1, $src2 @addF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ add_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (SubF src1 src2)); ++ format %{ "SubF $dst, $src1, $src2 @subF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sub_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (AddD src1 src2)); ++ format %{ "AddD $dst, $src1, $src2 @addD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ add_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (SubD src1 src2)); ++ format %{ "SubD $dst, $src1, $src2 @subD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sub_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negF_reg(regF dst, regF src) %{ ++ match(Set dst (NegF src)); ++ format %{ "negF $dst, $src @negF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ neg_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negD_reg(regD dst, regD src) %{ ++ match(Set dst (NegD src)); ++ format %{ "negD $dst, $src @negD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ neg_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (MulF src1 src2)); ++ format %{ "MULF $dst, $src1, $src2 @mulF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mul_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// Mul two double precision floating piont number ++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (MulD src1 src2)); ++ format %{ "MULD $dst, $src1, $src2 @mulD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mul_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct absF_reg(regF dst, regF src) %{ ++ match(Set dst (AbsF src)); ++ ins_cost(100); ++ format %{ "absF $dst, $src @absF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ abs_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// intrinsics for math_native. ++// AbsD SqrtD CosD SinD TanD LogD Log10D ++ ++instruct absD_reg(regD dst, regD src) %{ ++ match(Set dst (AbsD src)); ++ ins_cost(100); ++ format %{ "absD $dst, $src @absD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ abs_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtD_reg(regD dst, regD src) %{ ++ match(Set dst (SqrtD src)); ++ ins_cost(100); ++ format %{ "SqrtD $dst, $src @sqrtD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sqrt_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtF_reg(regF dst, regF src) %{ ++ match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); ++ ins_cost(100); ++ format %{ "SqrtF $dst, $src @sqrtF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sqrt_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// src1 * src2 + src3 ++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary src1 src2))); ++ ++ format %{ "madd_s $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ madd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 + src3 ++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary src1 src2))); ++ ++ format %{ "madd_d $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ madd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 - src3 ++instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary src1 src2))); ++ ++ format %{ "msub_s $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ msub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 - src3 ++instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary src1 src2))); ++ ++ format %{ "msub_d $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ msub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 - src3 ++instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); ++ match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); ++ ++ format %{ "nmadds $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 - src3 ++instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); ++ match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); ++ ++ format %{ "nmaddd $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 + src3 ++instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary (NegF src1) src2))); ++ match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); ++ ++ format %{ "nmsubs $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 + src3 ++instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary (NegD src1) src2))); ++ match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); ++ ++ format %{ "nmsubd $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++//----------------------------------Logical Instructions---------------------- ++//__________________________________Integer Logical Instructions------------- ++ ++//And Instuctions ++// And Register with Immediate ++instruct andI_Reg_immI(mRegI dst, mRegI src1, immI src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_immI" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ move(AT, val); ++ __ andr(dst, src, AT); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andI_Reg_imm_0_65535(mRegI dst, mRegI src1, immI_0_65535 src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ ext(dst, src, 0, size); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1, immL_nonneg_mask mask) %{ ++ match(Set dst (AndL src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_jlong_mask($mask$$constant); ++ ++ __ dext(dst, src, 0, size); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_imm_0_65535(mRegI dst, mRegI src1, immI_0_65535 src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorI_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1, immI_M1 M1) %{ ++ match(Set dst (XorI src1 M1)); ++ predicate(UseLEXT3); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorI_Reg_immI_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ gsorn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1, immI_M1 M1) %{ ++ match(Set dst (XorI (ConvL2I src1) M1)); ++ predicate(UseLEXT3); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ gsorn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorL_Reg_imm_0_65535(mRegL dst, mRegL src1, immL_0_65535 src2) %{ ++ match(Set dst (XorL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorL_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++/* ++instruct xorL_Reg_immL_M1(mRegL dst, mRegL src1, immL_M1 M1) %{ ++ match(Set dst (XorL src1 M1)); ++ predicate(UseLEXT3); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorL_Reg_immL_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ gsorn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++instruct lbu_and_lmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI mask (LoadB mem))); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_lmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct lbu_and_rmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadB mem) mask)); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_rmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct andI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ andr(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI src1 (XorI src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src1, $src2 #@andnI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI src1 (XorI src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src1, $src2 #@ornI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI (XorI src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src2, $src1 #@andnI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI (XorI src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src2, $src1 #@ornI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// And Long Register with Register ++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (AndL src1 src2)); ++ format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ andr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct andL_Reg_Reg_convI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (AndL src1 (ConvI2L src2))); ++ format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg_convI2L\n\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ andr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct andL_Reg_imm_0_65535(mRegL dst, mRegL src1, immL_0_65535 src2) %{ ++ match(Set dst (AndL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL2I_Reg_imm_0_65535(mRegI dst, mRegL src1, immL_0_65535 src2) %{ ++ match(Set dst (ConvL2I (AndL src1 src2))); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL2I_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++/* ++instruct andnL_Reg_nReg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (AndL src1 (XorL src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src1, $src2 #@andnL_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++/* ++instruct ornL_Reg_nReg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (OrL src1 (XorL src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src1, $src2 #@ornL_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++/* ++instruct andnL_nReg_Reg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (AndL (XorL src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src2, $src1 #@andnL_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++/* ++instruct ornL_nReg_Reg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (OrL (XorL src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src2, $src1 #@ornL_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++instruct andL_Reg_immL_M8(mRegL dst, immL_M8 M8) %{ ++ match(Set dst (AndL dst M8)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M8 #@andL_Reg_immL_M8" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 0, 3); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M5(mRegL dst, immL_M5 M5) %{ ++ match(Set dst (AndL dst M5)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M5 #@andL_Reg_immL_M5" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 2, 1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M7(mRegL dst, immL_M7 M7) %{ ++ match(Set dst (AndL dst M7)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M7 #@andL_Reg_immL_M7" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 1, 2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M4(mRegL dst, immL_M4 M4) %{ ++ match(Set dst (AndL dst M4)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M4 #@andL_Reg_immL_M4" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 0, 2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M121(mRegL dst, immL_M121 M121) %{ ++ match(Set dst (AndL dst M121)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M121 #@andL_Reg_immL_M121" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 3, 4); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Long Register with Register ++instruct orL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (OrL src1 src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegL src2) %{ ++ match(Set dst (OrL (CastP2X src1) src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Long Register with Register ++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (XorL src1 src2)); ++ format %{ "XOR $dst, $src1, $src2 @ xorL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ xorr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left by 8-bit immediate ++instruct salI_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ sll(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct salL2I_Reg_imm(mRegI dst, mRegL src, immI8 shift) %{ ++ match(Set dst (LShiftI (ConvL2I src) shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salL2I_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ sll(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{ ++ match(Set dst (AndI (LShiftI src shift) mask)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm_and_M65536" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ sll(dst, src, 16); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen)); ++ ++ format %{ "andi $dst, $src, 7\t# @land7_2_s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ andi(dst, src, 7); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. ++// This idiom is used by the compiler the i2s bytecode. ++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); ++ ++ format %{ "i2s $dst, $src\t# @i2s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ seh(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. ++// This idiom is used by the compiler for the i2b bytecode. ++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour) ++%{ ++ match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); ++ ++ format %{ "i2b $dst, $src\t# @i2b" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ seb(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immI8 shift) %{ ++ match(Set dst (LShiftI (ConvL2I src) shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_RegL2I_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ sll(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Shift Left by 8-bit immediate ++instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shamt = $shift$$Register; ++ __ sllv(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++// Shift Left Long ++instruct salL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ if (__ is_simm(shamt, 5)) ++ __ dsll(dst_reg, src_reg, shamt); ++ else { ++ int sa = Assembler::low(shamt, 6); ++ if (sa < 32) { ++ __ dsll(dst_reg, src_reg, sa); ++ } else { ++ __ dsll32(dst_reg, src_reg, sa - 32); ++ } ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct salL_RegI2L_imm(mRegL dst, mRegI src, immI8 shift) %{ ++ match(Set dst (LShiftL (ConvI2L src) shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_RegI2L_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ if (__ is_simm(shamt, 5)) ++ __ dsll(dst_reg, src_reg, shamt); ++ else { ++ int sa = Assembler::low(shamt, 6); ++ if (sa < 32) { ++ __ dsll(dst_reg, src_reg, sa); ++ } else { ++ __ dsll32(dst_reg, src_reg, sa - 32); ++ } ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left Long ++instruct salL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ dsllv(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long ++instruct sarL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = ($shift$$constant & 0x3f); ++ if (__ is_simm(shamt, 5)) ++ __ dsra(dst_reg, src_reg, shamt); ++ else { ++ int sa = Assembler::low(shamt, 6); ++ if (sa < 32) { ++ __ dsra(dst_reg, src_reg, sa); ++ } else { ++ __ dsra32(dst_reg, src_reg, sa - 32); ++ } ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegL src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (RShiftL src shift))); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsra32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long arithmetically ++instruct sarL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ dsrav(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long logically ++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(100); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ dsrlv(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegL src, immI_0_31 shift, immI_MaxI max_int) %{ ++ match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int)); ++ ins_cost(80); ++ format %{ "dext $dst, $src, $shift, 31 @ slrL_Reg_immI_0_31_and_max_int" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dext(dst_reg, src_reg, shamt, 31); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegL src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (URShiftL src shift))); ++ predicate(n->in(1)->in(2)->get_int() > 32); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_convL2I" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Instructions ++// Xor Register with Register ++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ++ format %{ "XOR $dst, $src1, $src2 #@xorI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ xorr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Instructions ++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_32767 src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_imm" %} ++ ins_encode %{ ++ __ ori($dst$$Register, $src1$$Register, $src2$$constant); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++// Or Register with Register ++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{ ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift))); ++ predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()))); ++ ++ format %{ "rotr $dst, $src, 1 ...\n\t" ++ "srl $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int rshift = $rshift$$constant; ++ ++ __ rotr(dst, src, 1); ++ if (rshift - 1) { ++ __ srl(dst, dst, rshift - 1); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{ ++ match(Set dst (OrI src1 (CastP2X src2))); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_castP2X" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right by 8-bit immediate ++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ ++ match(Set dst (URShiftI src shift)); ++ //effect(KILL cr); ++ ++ format %{ "SRL $dst, $src, $shift #@shr_logical_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ ++ __ srl(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI (URShiftI src shift) mask)); ++ ++ format %{ "ext $dst, $src, $shift, one-bits($mask) #@shr_logical_Reg_imm_nonneg_mask" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int pos = $shift$$constant; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ ext(dst, src, pos, size); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolI_Reg_immI_0_31(mRegI dst, immI_0_31 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $dst, $rshift #@rolI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotr(dst, dst, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_0_31(mRegL dst, mRegL src, immI_32_63 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rolL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_32_63(mRegL dst, mRegL src, immI_0_31 lshift, immI_32_63 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rolL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr32(dst, src, sa - 32); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rorI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 rshift, immI_32_63 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rorL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rorL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr32(dst, src, sa - 32); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right ++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (URShiftI src shift)); ++ ++ format %{ "SRL $dst, $src, $shift #@shr_logical_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ srlv(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRA $dst, $src, $shift #@shr_arith_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ __ sra(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRA $dst, $src, $shift #@shr_arith_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ srav(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++//----------Convert Int to Boolean--------------------------------------------- ++ ++instruct convI2B(mRegI dst, mRegI src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convI2B $dst, $src @ convI2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, src); ++ } else { ++ __ move(AT, src); ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct convI2L_reg( mRegL dst, mRegI src) %{ ++ match(Set dst (ConvI2L src)); ++ ++ ins_cost(100); ++ format %{ "SLL $dst, $src @ convI2L_reg\t" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if(dst != src) __ sll(dst, src, 0); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct convL2I_reg( mRegI dst, mRegL src ) %{ ++ match(Set dst (ConvL2I src)); ++ ++ format %{ "MOV $dst, $src @ convL2I_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ sll(dst, src, 0); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct convL2I2L_reg( mRegL dst, mRegL src ) %{ ++ match(Set dst (ConvI2L (ConvL2I src))); ++ ++ format %{ "sll $dst, $src, 0 @ convL2I2L_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ sll(dst, src, 0); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct convL2D_reg( regD dst, mRegL src ) %{ ++ match(Set dst (ConvL2D src)); ++ format %{ "convL2D $dst, $src @ convL2D_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ dmtc1(src, dst); ++ __ cvt_d_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convD2L_reg_fast( mRegL dst, regD src ) %{ ++ match(Set dst (ConvD2L src)); ++ ins_cost(150); ++ format %{ "convD2L $dst, $src @ convD2L_reg_fast" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ Label Done; ++ ++ __ trunc_l_d(F30, src); ++ // max_long: 0x7fffffffffffffff ++ // __ set64(AT, 0x7fffffffffffffff); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(dst, F30); ++ ++ __ bne(dst, AT, Done); ++ __ delayed()->mtc1(R0, F30); ++ ++ __ cvt_d_w(F30, F30); ++ __ c_ult_d(src, F30); ++ __ bc1f(Done); ++ __ delayed()->daddiu(T9, R0, -1); ++ ++ __ c_un_d(src, src); //NaN? ++ __ subu(dst, T9, AT); ++ __ movt(dst, R0); ++ ++ __ bind(Done); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convD2L_reg_slow( mRegL dst, regD src ) %{ ++ match(Set dst (ConvD2L src)); ++ ins_cost(250); ++ format %{ "convD2L $dst, $src @ convD2L_reg_slow" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ Label L; ++ ++ __ c_un_d(src, src); //NaN? ++ __ bc1t(L); ++ __ delayed(); ++ __ move(dst, R0); ++ ++ __ trunc_l_d(F30, src); ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->dmfc1(dst, F30); ++ ++ __ mov_d(F12, src); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1); ++ __ move(dst, V0); ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convF2I_reg_fast( mRegI dst, regF src ) %{ ++ match(Set dst (ConvF2I src)); ++ ins_cost(150); ++ format %{ "convf2i $dst, $src @ convF2I_reg_fast" %} ++ ins_encode %{ ++ Register dreg = $dst$$Register; ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ trunc_w_s(F30, fval); ++ __ move(AT, 0x7fffffff); ++ __ mfc1(dreg, F30); ++ __ c_un_s(fval, fval); //NaN? ++ __ movt(dreg, R0); ++ ++ __ bne(AT, dreg, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, fval); ++ __ andr(AT, AT, T9); ++ ++ __ movn(dreg, T9, AT); ++ ++ __ bind(L); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++ ++instruct convF2I_reg_slow( mRegI dst, regF src ) %{ ++ match(Set dst (ConvF2I src)); ++ ins_cost(250); ++ format %{ "convf2i $dst, $src @ convF2I_reg_slow" %} ++ ins_encode %{ ++ Register dreg = $dst$$Register; ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ c_un_s(fval, fval); //NaN? ++ __ bc1t(L); ++ __ delayed(); ++ __ move(dreg, R0); ++ ++ __ trunc_w_s(F30, fval); ++ ++ /* Call SharedRuntime:f2i() to do valid convention */ ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->mfc1(dreg, F30); ++ ++ __ mov_s(F12, fval); ++ ++ //This bug was found when running ezDS's control-panel. ++ // J 982 C2 javax.swing.text.BoxView.layoutMajorAxis(II[I[I)V (283 bytes) @ 0x000000555c46aa74 ++ // ++ // An interger array index has been assigned to V0, and then changed from 1 to Integer.MAX_VALUE. ++ // V0 is corrupted during call_VM_leaf(), and should be preserved. ++ // ++ __ push(fval); ++ if(dreg != V0) { ++ __ push(V0); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1); ++ if(dreg != V0) { ++ __ move(dreg, V0); ++ __ pop(V0); ++ } ++ __ pop(fval); ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convF2L_reg_fast( mRegL dst, regF src ) %{ ++ match(Set dst (ConvF2L src)); ++ ins_cost(150); ++ format %{ "convf2l $dst, $src @ convF2L_reg_fast" %} ++ ins_encode %{ ++ Register dreg = $dst$$Register; ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ trunc_l_s(F30, fval); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(dreg, F30); ++ __ c_un_s(fval, fval); //NaN? ++ __ movt(dreg, R0); ++ ++ __ bne(AT, dreg, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, fval); ++ __ andr(AT, AT, T9); ++ ++ __ dsll32(T9, T9, 0); ++ __ movn(dreg, T9, AT); ++ ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convF2L_reg_slow( mRegL dst, regF src ) %{ ++ match(Set dst (ConvF2L src)); ++ ins_cost(250); ++ format %{ "convf2l $dst, $src @ convF2L_reg_slow" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ c_un_s(fval, fval); //NaN? ++ __ bc1t(L); ++ __ delayed(); ++ __ move(dst, R0); ++ ++ __ trunc_l_s(F30, fval); ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->dmfc1(dst, F30); ++ ++ __ mov_s(F12, fval); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1); ++ __ move(dst, V0); ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convL2F_reg( regF dst, mRegL src ) %{ ++ match(Set dst (ConvL2F src)); ++ format %{ "convl2f $dst, $src @ convL2F_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ Register src = as_Register($src$$reg); ++ Label L; ++ ++ __ dmtc1(src, dst); ++ __ cvt_s_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convI2F_reg( regF dst, mRegI src ) %{ ++ match(Set dst (ConvI2F src)); ++ format %{ "convi2f $dst, $src @ convI2F_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mtc1(src, dst); ++ __ cvt_s_w(dst, dst); ++ %} ++ ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{ ++ match(Set dst (CmpLTMask p zero)); ++ ins_cost(100); ++ ++ format %{ "sra $dst, $p, 31 @ cmpLTMask_immI_0" %} ++ ins_encode %{ ++ Register src = $p$$Register; ++ Register dst = $dst$$Register; ++ ++ __ sra(dst, src, 31); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{ ++ match(Set dst (CmpLTMask p q)); ++ ins_cost(400); ++ ++ format %{ "cmpLTMask $dst, $p, $q @ cmpLTMask" %} ++ ins_encode %{ ++ Register p = $p$$Register; ++ Register q = $q$$Register; ++ Register dst = $dst$$Register; ++ ++ __ slt(dst, p, q); ++ __ subu(dst, R0, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convP2B(mRegI dst, mRegP src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convP2B $dst, $src @ convP2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, src); ++ } else { ++ __ move(AT, src); ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct convI2D_reg_reg(regD dst, mRegI src) %{ ++ match(Set dst (ConvI2D src)); ++ format %{ "conI2D $dst, $src @convI2D_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ __ mtc1(src, dst); ++ __ cvt_d_w(dst, dst); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convF2D_reg_reg(regD dst, regF src) %{ ++ match(Set dst (ConvF2D src)); ++ format %{ "convF2D $dst, $src\t# @convF2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ cvt_d_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convD2F_reg_reg(regF dst, regD src) %{ ++ match(Set dst (ConvD2F src)); ++ format %{ "convD2F $dst, $src\t# @convD2F_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ cvt_s_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// Convert a double to an int. If the double is a NAN, stuff a zero in instead. ++instruct convD2I_reg_reg_fast( mRegI dst, regD src ) %{ ++ match(Set dst (ConvD2I src)); ++ ++ ins_cost(150); ++ format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_fast" %} ++ ++ ins_encode %{ ++ FloatRegister src = $src$$FloatRegister; ++ Register dst = $dst$$Register; ++ ++ Label Done; ++ ++ __ trunc_w_d(F30, src); ++ // max_int: 2147483647 ++ __ move(AT, 0x7fffffff); ++ __ mfc1(dst, F30); ++ ++ __ bne(dst, AT, Done); ++ __ delayed()->mtc1(R0, F30); ++ ++ __ cvt_d_w(F30, F30); ++ __ c_ult_d(src, F30); ++ __ bc1f(Done); ++ __ delayed()->addiu(T9, R0, -1); ++ ++ __ c_un_d(src, src); //NaN? ++ __ subu32(dst, T9, AT); ++ __ movt(dst, R0); ++ ++ __ bind(Done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convD2I_reg_reg_slow( mRegI dst, regD src ) %{ ++ match(Set dst (ConvD2I src)); ++ ++ ins_cost(250); ++ format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_slow" %} ++ ++ ins_encode %{ ++ FloatRegister src = $src$$FloatRegister; ++ Register dst = $dst$$Register; ++ Label L; ++ ++ __ trunc_w_d(F30, src); ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->mfc1(dst, F30); ++ ++ __ mov_d(F12, src); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1); ++ __ move(dst, V0); ++ __ bind(L); ++ ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Convert oop pointer into compressed form ++instruct encodeHeapOop(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop $dst,$src" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ encode_heap_oop(dst, src); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %} ++ ins_encode %{ ++ __ encode_heap_oop_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && ++ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ ++ __ decode_heap_oop(d, s); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || ++ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_heap_oop_not_null(d, s); ++ } else { ++ __ decode_heap_oop_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{ ++ match(Set dst (EncodePKlass src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %} ++ ins_encode %{ ++ __ encode_klass_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{ ++ match(Set dst (DecodeNKlass src)); ++ format %{ "decode_heap_klass_not_null $dst,$src" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_klass_not_null(d, s); ++ } else { ++ __ decode_klass_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//FIXME ++instruct tlsLoadP(mRegP dst) %{ ++ match(Set dst (ThreadLocal)); ++ ++ ins_cost(0); ++ format %{ " get_thread in $dst #@tlsLoadP" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++#ifdef OPT_THREAD ++ __ move(dst, TREG); ++#else ++ __ get_thread(dst); ++#endif ++ %} ++ ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct checkCastPP( mRegP dst ) %{ ++ match(Set dst (CheckCastPP dst)); ++ ++ format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_pipe( empty ); ++%} ++ ++instruct castPP(mRegP dst) ++%{ ++ match(Set dst (CastPP dst)); ++ ++ size(0); ++ format %{ "# castPP of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_pipe(empty); ++%} ++ ++instruct castII( mRegI dst ) %{ ++ match(Set dst (CastII dst)); ++ format %{ "#castII of $dst empty encoding" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_cost(0); ++ ins_pipe( empty ); ++%} ++ ++// Return Instruction ++// Remove the return address & jump to it. ++instruct Ret() %{ ++ match(Return); ++ format %{ "RET #@Ret" %} ++ ++ ins_encode %{ ++ __ jr(RA); ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++/* ++// For Loongson CPUs, jr seems too slow, so this rule shouldn't be imported. ++instruct jumpXtnd(mRegL switch_val) %{ ++ match(Jump switch_val); ++ ++ ins_cost(350); ++ ++ format %{ "load T9 <-- [$constanttablebase, $switch_val, $constantoffset] @ jumpXtnd\n\t" ++ "jr T9\n\t" ++ "nop" %} ++ ins_encode %{ ++ Register table_base = $constanttablebase; ++ int con_offset = $constantoffset; ++ Register switch_reg = $switch_val$$Register; ++ ++ if (UseLEXT1) { ++ if (Assembler::is_simm(con_offset, 8)) { ++ __ gsldx(T9, table_base, switch_reg, con_offset); ++ } else if (Assembler::is_simm16(con_offset)) { ++ __ daddu(T9, table_base, switch_reg); ++ __ ld(T9, T9, con_offset); ++ } else { ++ __ move(T9, con_offset); ++ __ daddu(AT, table_base, switch_reg); ++ __ gsldx(T9, AT, T9, 0); ++ } ++ } else { ++ if (Assembler::is_simm16(con_offset)) { ++ __ daddu(T9, table_base, switch_reg); ++ __ ld(T9, T9, con_offset); ++ } else { ++ __ move(T9, con_offset); ++ __ daddu(AT, table_base, switch_reg); ++ __ daddu(AT, T9, AT); ++ __ ld(T9, AT, 0); ++ } ++ } ++ ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ %} ++ ins_pipe(pipe_jump); ++%} ++*/ ++ ++ ++// Tail Jump; remove the return address; jump to target. ++// TailCall above leaves the return address around. ++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). ++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a ++// "restore" before this instruction (in Epilogue), we need to materialize it ++// in %i0. ++//FIXME ++instruct tailjmpInd(mRegP jump_target,mRegP ex_oop) %{ ++ match( TailJump jump_target ex_oop ); ++ ins_cost(200); ++ format %{ "Jmp $jump_target ; ex_oop = $ex_oop #@tailjmpInd" %} ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ ++ // V0, V1 are indicated in: ++ // [stubGenerator_mips.cpp] generate_forward_exception() ++ // [runtime_mips.cpp] OptoRuntime::generate_exception_blob() ++ // ++ Register oop = $ex_oop$$Register; ++ Register exception_oop = V0; ++ Register exception_pc = V1; ++ ++ __ move(exception_pc, RA); ++ __ move(exception_oop, oop); ++ ++ __ jr(target); ++ __ delayed()->nop(); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Procedure Call/Return Instructions ++// Call Java Static Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallStaticJavaDirect(method meth) %{ ++ match(CallStaticJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,static #@CallStaticJavaDirect " %} ++ ins_encode( Java_Static_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++// Call Java Dynamic Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallDynamicJavaDirect(method meth) %{ ++ match(CallDynamicJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t" ++ "CallDynamic @ CallDynamicJavaDirect" %} ++ ins_encode( Java_Dynamic_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++instruct CallLeafNoFPDirect(method meth) %{ ++ match(CallLeafNoFP); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF_NOFP,runtime " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++// Prefetch instructions for allocation. ++ ++instruct prefetchAllocNTA( memory mem ) %{ ++ match(PrefetchAllocation mem); ++ ins_cost(125); ++ format %{ "pref $mem\t# Prefetch allocation @ prefetchAllocNTA" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// Call runtime without safepoint ++instruct CallLeafDirect(method meth) %{ ++ match(CallLeaf); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF,runtime #@CallLeafDirect " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++// Load Char (16bit unsigned) ++instruct loadUS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUS mem)); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadC" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUS mem))); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadUS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Char (16bit unsigned) ++instruct storeC(memory mem, mRegI src) %{ ++ match(Set mem (StoreC mem src)); ++ ++ ins_cost(125); ++ format %{ "storeC $src, $mem @ storeC" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeC_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreC mem zero)); ++ ++ ins_cost(125); ++ format %{ "storeC $zero, $mem @ storeC_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct loadConF_immF_0(regF dst, immF_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConF_immF_0\n"%} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mtc1(R0, dst); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConF(regF dst, immF src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "lwc1 $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm16(con_offset)) { ++ __ lwc1($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ set64(AT, con_offset); ++ if (UseLEXT1) { ++ __ gslwxc1($dst$$FloatRegister, $constanttablebase, AT, 0); ++ } else { ++ __ daddu(AT, $constanttablebase, AT); ++ __ lwc1($dst$$FloatRegister, AT, 0); ++ } ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConD_immD_0(regD dst, immD_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConD_immD_0"%} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ dmtc1(R0, dst); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++instruct loadConD(regD dst, immD src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "ldc1 $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm16(con_offset)) { ++ __ ldc1($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ set64(AT, con_offset); ++ if (UseLEXT1) { ++ __ gsldxc1($dst$$FloatRegister, $constanttablebase, AT, 0); ++ } else { ++ __ daddu(AT, $constanttablebase, AT); ++ __ ldc1($dst$$FloatRegister, AT, 0); ++ } ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++// Store register Float value (it is faster than store from FPU register) ++instruct storeF_reg( memory mem, regF src) %{ ++ match(Set mem (StoreF mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeF_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeF_immF_0( memory mem, immF_0 zero) %{ ++ match(Set mem (StoreF mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeF_immF_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Double ++instruct loadD(regD dst, memory mem) %{ ++ match(Set dst (LoadD mem)); ++ ++ ins_cost(150); ++ format %{ "loadD $dst, $mem #@loadD" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Double - UNaligned ++instruct loadD_unaligned(regD dst, memory mem ) %{ ++ match(Set dst (LoadD_unaligned mem)); ++ ins_cost(250); ++ // FIXME: Need more effective ldl/ldr ++ format %{ "loadD_unaligned $dst, $mem #@loadD_unaligned" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeD_reg( memory mem, regD src) %{ ++ match(Set mem (StoreD mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeD_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeD_immD_0( memory mem, immD_0 zero) %{ ++ match(Set mem (StoreD mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeD_immD_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct loadSSI(mRegI dst, stackSlotI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "lw $dst, $src\t# int stk @ loadSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSI) !"); ++ __ lw($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSI(stackSlotI dst, mRegI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sw $dst, $src\t# int stk @ storeSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSI) !"); ++ __ sw($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSL(mRegL dst, stackSlotL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld $dst, $src\t# long stk @ loadSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSL) !"); ++ __ ld($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSL(stackSlotL dst, mRegL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sd $dst, $src\t# long stk @ storeSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSL) !"); ++ __ sd($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSP(mRegP dst, stackSlotP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld $dst, $src\t# ptr stk @ loadSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSP) !"); ++ __ ld($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSP(stackSlotP dst, mRegP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sd $dst, $src\t# ptr stk @ storeSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSP) !"); ++ __ sd($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSF(regF dst, stackSlotF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "lwc1 $dst, $src\t# float stk @ loadSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSF) !"); ++ __ lwc1($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSF(stackSlotF dst, regF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "swc1 $dst, $src\t# float stk @ storeSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSF) !"); ++ __ swc1($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++// Use the same format since predicate() can not be used here. ++instruct loadSSD(regD dst, stackSlotD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ldc1 $dst, $src\t# double stk @ loadSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSD) !"); ++ __ ldc1($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSD(stackSlotD dst, regD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sdc1 $dst, $src\t# double stk @ storeSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSD) !"); ++ __ sdc1($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastLock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %} ++ ins_encode %{ ++ __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastUnlock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %} ++ ins_encode %{ ++ __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++// Store CMS card-mark Immediate 0 ++instruct storeImmCM(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreCM mem zero)); ++ ++ ins_cost(150); ++ format %{ "MEMBAR\n\t" ++ "sb $mem, zero\t! CMS card-mark imm0" %} ++ ins_encode %{ ++ __ sync(); ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Die now ++instruct ShouldNotReachHere( ) ++%{ ++ match(Halt); ++ ins_cost(300); ++ ++ // Use the following format syntax ++ format %{ "ILLTRAP ;#@ShouldNotReachHere" %} ++ ins_encode %{ ++ // Here we should emit illtrap ! ++ ++ __ stop("in ShoudNotReachHere"); ++ ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++instruct leaP8Narrow(mRegP dst, indOffset8Narrow mem) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# ptr off8narrow @ leaP8Narrow" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ int disp = $mem$$disp; ++ ++ __ daddiu(dst, base, disp); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct leaPPosIdxScaleOff8(mRegP dst, basePosIndexScaleOffset8 mem) ++%{ ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# @ PosIdxScaleOff8" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ Register index = as_Register($mem$$index); ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if (scale == 0) { ++ __ daddu(AT, base, index); ++ __ daddiu(dst, AT, disp); ++ } else { ++ __ dsll(AT, index, scale); ++ __ daddu(AT, base, AT); ++ __ daddiu(dst, AT, disp); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct leaPIdxScale(mRegP dst, indIndexScale mem) ++%{ ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# @ leaPIdxScale" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ Register index = as_Register($mem$$index); ++ int scale = $mem$$scale; ++ ++ if (scale == 0) { ++ __ daddu(dst, base, index); ++ } else { ++ __ dsll(AT, index, scale); ++ __ daddu(dst, base, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++ ++// ============================================================================ ++// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass ++// array for an instance of the superklass. Set a hidden internal cache on a ++// hit (cache is checked with exposed code in gen_subtype_check()). Return ++// NZ for a miss or zero for a hit. The encoding ALSO sets flags. ++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{ ++ match(Set result (PartialSubtypeCheck sub super)); ++ effect(KILL tmp); ++ ins_cost(1100); // slightly larger than the next version ++ format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %} ++ ++ ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) ); ++ ins_pipe( pipe_slow ); ++%} ++ ++// Conditional-store of the updated heap-top. ++// Used during allocation of the shared heap. ++ ++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{ ++ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); ++ ++ format %{ "move AT, $newval\n\t" ++ "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t" ++ "move $cr, AT\n" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp); ++ ++ int index = $heap_top_ptr$$index; ++ int scale = $heap_top_ptr$$scale; ++ int disp = $heap_top_ptr$$disp; ++ ++ guarantee(Assembler::is_simm16(disp), ""); ++ ++ if (index != 0) { ++ __ stop("in storePConditional: index != 0"); ++ } else { ++ __ move(AT, newval); ++ __ scd(AT, addr); ++ __ move($cr$$Register, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of an int value. ++// AT flag is set on success, reset otherwise. ++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{ ++ match(Set cr (StoreIConditional mem (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, $mem, $oldval \t# @storeIConditional" %} ++ ++ ins_encode %{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm16(disp), ""); ++ ++ if (index != 0) { ++ __ stop("in storeIConditional: index != 0"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg32(addr, oldval, newval, cr, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(cr, AT); ++ } ++ } ++%} ++ ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of a long value. ++// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. ++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr) ++%{ ++ match(Set cr (StoreLConditional mem (Binary oldval newval))); ++ ++ format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm16(disp), ""); ++ ++ if (index != 0) { ++ __ stop("in storeIConditional: index != 0"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg(addr, oldval, newval, cr, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(cr, AT); ++ } ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Implement LoadPLocked. Must be ordered against changes of the memory location ++// by storePConditional. ++instruct loadPLocked(mRegP dst, memory mem) %{ ++ match(Set dst (LoadPLocked mem)); ++ ins_cost(MEMORY_REF_COST); ++ ++ format %{ "lld $dst, $mem #@loadPLocked\n\t" %} ++ size(12); ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{ ++ match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{ ++ predicate(VM_Version::supports_cx8()); ++ match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{ ++ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{ ++ match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, false, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, false, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++//----------Max and Min-------------------------------------------------------- ++// Min Instructions ++//// ++// *** Min and Max using the conditional move are slower than the ++// *** branch version on a Pentium III. ++// // Conditional move for min ++//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ ++// effect( USE_DEF op2, USE op1, USE cr ); ++// format %{ "CMOVlt $op2,$op1\t! min" %} ++// opcode(0x4C,0x0F); ++// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); ++// ins_pipe( pipe_cmov_reg ); ++//%} ++// ++//// Min Register with Register (P6 version) ++//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{ ++// predicate(VM_Version::supports_cmov() ); ++// match(Set op2 (MinI op1 op2)); ++// ins_cost(200); ++// expand %{ ++// eFlagsReg cr; ++// compI_eReg(cr,op1,op2); ++// cmovI_reg_lt(op2,op1,cr); ++// %} ++//%} ++ ++// Min Register with Register (generic version) ++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MinI dst src)); ++ //effect(KILL flags); ++ ins_cost(80); ++ ++ format %{ "MIN $dst, $src @minI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, src, dst); ++ __ movn(dst, src, AT); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// Max Register with Register ++// *** Min and Max using the conditional move are slower than the ++// *** branch version on a Pentium III. ++// // Conditional move for max ++//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ ++// effect( USE_DEF op2, USE op1, USE cr ); ++// format %{ "CMOVgt $op2,$op1\t! max" %} ++// opcode(0x4F,0x0F); ++// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); ++// ins_pipe( pipe_cmov_reg ); ++//%} ++// ++// // Max Register with Register (P6 version) ++//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{ ++// predicate(VM_Version::supports_cmov() ); ++// match(Set op2 (MaxI op1 op2)); ++// ins_cost(200); ++// expand %{ ++// eFlagsReg cr; ++// compI_eReg(cr,op1,op2); ++// cmovI_reg_gt(op2,op1,cr); ++// %} ++//%} ++ ++// Max Register with Register (generic version) ++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MaxI dst src)); ++ ins_cost(80); ++ ++ format %{ "MAX $dst, $src @maxI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, dst, src); ++ __ movn(dst, src, AT); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{ ++ match(Set dst (MaxI dst zero)); ++ ins_cost(50); ++ ++ format %{ "MAX $dst, 0 @maxI_Reg_zero" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ slt(AT, dst, R0); ++ __ movn(dst, R0, AT); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL src mask)); ++ ++ format %{ "movl $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dext(dst, src, 0, 32); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32) ++%{ ++ match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32))); ++ ++ format %{ "combine_i2l $dst, $src2(H), $src1(L) @ combine_i2l" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ if (src1 == dst) { ++ __ dinsu(dst, src2, 32, 32); ++ } else if (src2 == dst) { ++ __ dsll32(dst, dst, 0); ++ __ dins(dst, src1, 0, 32); ++ } else { ++ __ dext(dst, src1, 0, 32); ++ __ dinsu(dst, src2, 32, 32); ++ } ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Zero-extend convert int to long ++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L src) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dext(dst, src, 0, 32); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L (ConvL2I src)) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dext(dst, src, 0, 32); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Match loading integer and casting it to unsigned int in long register. ++// LoadI + ConvI2L + AndL 0xffffffff. ++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ++ ++ format %{ "lwu $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL mask (ConvI2L (LoadI mem)))); ++ ++ format %{ "lwu $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++ ++// ============================================================================ ++// Safepoint Instruction ++ ++instruct safePoint_poll() %{ ++ predicate(SafepointMechanism::uses_global_page_poll()); ++ match(SafePoint); ++ ++ ins_cost(105); ++ format %{ "poll for GC @ safePoint_poll" %} ++ ++ ins_encode %{ ++ __ block_comment("Safepoint:"); ++ __ set64(T9, (long)os::get_polling_page()); ++ __ relocate(relocInfo::poll_type); ++ __ lw(AT, T9, 0); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct safePoint_poll_tls(mRegP poll) %{ ++ match(SafePoint poll); ++ predicate(SafepointMechanism::uses_thread_local_poll()); ++ effect(USE poll); ++ ++ ins_cost(125); ++ format %{ "lw AT, [$poll]\t" ++ "Safepoint @ [$poll] : poll for GC" %} ++ size(4); ++ ins_encode %{ ++ Register poll_reg = $poll$$Register; ++ ++ __ block_comment("Safepoint:"); ++ __ relocate(relocInfo::poll_type); ++ address pre_pc = __ pc(); ++ __ lw(AT, poll_reg, 0); ++ assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit lw AT, [$poll]"); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++//----------Arithmetic Conversion Instructions--------------------------------- ++ ++instruct roundFloat_nop(regF dst) ++%{ ++ match(Set dst (RoundFloat dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct roundDouble_nop(regD dst) ++%{ ++ match(Set dst (RoundDouble dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++//---------- Zeros Count Instructions ------------------------------------------ ++// CountLeadingZerosINode CountTrailingZerosINode ++instruct countLeadingZerosI(mRegI dst, mRegI src) %{ ++ predicate(UseCountLeadingZerosInstructionMIPS64); ++ match(Set dst (CountLeadingZerosI src)); ++ ++ format %{ "clz $dst, $src\t# count leading zeros (int)" %} ++ ins_encode %{ ++ __ clz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countLeadingZerosL(mRegI dst, mRegL src) %{ ++ predicate(UseCountLeadingZerosInstructionMIPS64); ++ match(Set dst (CountLeadingZerosL src)); ++ ++ format %{ "dclz $dst, $src\t# count leading zeros (long)" %} ++ ins_encode %{ ++ __ dclz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosI(mRegI dst, mRegI src) %{ ++ predicate(UseCountTrailingZerosInstructionMIPS64); ++ match(Set dst (CountTrailingZerosI src)); ++ ++ format %{ "ctz $dst, $src\t# count trailing zeros (int)" %} ++ ins_encode %{ ++ // ctz and dctz is gs instructions. ++ __ ctz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosL(mRegI dst, mRegL src) %{ ++ predicate(UseCountTrailingZerosInstructionMIPS64); ++ match(Set dst (CountTrailingZerosL src)); ++ ++ format %{ "dcto $dst, $src\t# count trailing zeros (long)" %} ++ ins_encode %{ ++ __ dctz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// ====================VECTOR INSTRUCTIONS===================================== ++ ++// Load vectors (8 bytes long) ++instruct loadV8(vecD dst, memory mem) %{ ++ predicate(n->as_LoadVector()->memory_size() == 8); ++ match(Set dst (LoadVector mem)); ++ ins_cost(125); ++ format %{ "load $dst, $mem\t! load vector (8 bytes)" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++// Store vectors (8 bytes long) ++instruct storeV8(memory mem, vecD src) %{ ++ predicate(n->as_StoreVector()->memory_size() == 8); ++ match(Set mem (StoreVector mem src)); ++ ins_cost(145); ++ format %{ "store $mem, $src\t! store vector (8 bytes)" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct Repl8B_DSP(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8 && UseLEXT3); ++ match(Set dst (ReplicateB src)); ++ ins_cost(100); ++ format %{ "replv_ob AT, $src\n\t" ++ "dmtc1 AT, $dst\t! replicate8B" %} ++ ins_encode %{ ++ __ replv_ob(AT, $src$$Register); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB src)); ++ ins_cost(140); ++ format %{ "move AT, $src\n\t" ++ "dins AT, AT, 8, 8\n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate8B" %} ++ ins_encode %{ ++ __ move(AT, $src$$Register); ++ __ dins(AT, AT, 8, 8); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_imm_DSP(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 8 && UseLEXT3 && VM_Version::supports_dsp()); ++ match(Set dst (ReplicateB con)); ++ ins_cost(110); ++ format %{ "repl_ob AT, [$con]\n\t" ++ "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %} ++ ins_encode %{ ++ int val = $con$$constant; ++ __ repl_ob(AT, val); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_imm(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB con)); ++ ins_cost(150); ++ format %{ "move AT, [$con]\n\t" ++ "dins AT, AT, 8, 8\n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %} ++ ins_encode %{ ++ __ move(AT, $con$$constant); ++ __ dins(AT, AT, 8, 8); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_zero(vecD dst, immI_0 zero) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB zero)); ++ ins_cost(90); ++ format %{ "dmtc1 R0, $dst\t! replicate8B zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_M1(vecD dst, immI_M1 M1) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB M1)); ++ ins_cost(80); ++ format %{ "dmtc1 -1, $dst\t! replicate8B -1" %} ++ ins_encode %{ ++ __ nor(AT, R0, R0); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_DSP(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp()); ++ match(Set dst (ReplicateS src)); ++ ins_cost(100); ++ format %{ "replv_qh AT, $src\n\t" ++ "dmtc1 AT, $dst\t! replicate4S" %} ++ ins_encode %{ ++ __ replv_qh(AT, $src$$Register); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS src)); ++ ins_cost(120); ++ format %{ "move AT, $src \n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate4S" %} ++ ins_encode %{ ++ __ move(AT, $src$$Register); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_imm_DSP(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp()); ++ match(Set dst (ReplicateS con)); ++ ins_cost(100); ++ format %{ "repl_qh AT, [$con]\n\t" ++ "dmtc1 AT, $dst\t! replicate4S($con)" %} ++ ins_encode %{ ++ int val = $con$$constant; ++ if ( Assembler::is_simm(val, 10)) { ++ //repl_qh supports 10 bits immediate ++ __ repl_qh(AT, val); ++ } else { ++ __ li32(AT, val); ++ __ replv_qh(AT, AT); ++ } ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_imm(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS con)); ++ ins_cost(110); ++ format %{ "move AT, [$con]\n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate4S($con)" %} ++ ins_encode %{ ++ __ move(AT, $con$$constant); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_zero(vecD dst, immI_0 zero) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS zero)); ++ format %{ "dmtc1 R0, $dst\t! replicate4S zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_M1(vecD dst, immI_M1 M1) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS M1)); ++ format %{ "dmtc1 -1, $dst\t! replicate4S -1" %} ++ ins_encode %{ ++ __ nor(AT, R0, R0); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar to be vector ++instruct Repl2I(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI src)); ++ format %{ "dins AT, $src, 0, 32\n\t" ++ "dinsu AT, $src, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate2I" %} ++ ins_encode %{ ++ __ dins(AT, $src$$Register, 0, 32); ++ __ dinsu(AT, $src$$Register, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. ++instruct Repl2I_imm(vecD dst, immI con, mA7RegI tmp) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI con)); ++ effect(KILL tmp); ++ format %{ "li32 AT, [$con], 32\n\t" ++ "dinsu AT, AT\n\t" ++ "dmtc1 AT, $dst\t! replicate2I($con)" %} ++ ins_encode %{ ++ int val = $con$$constant; ++ __ li32(AT, val); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar zero to be vector ++instruct Repl2I_zero(vecD dst, immI_0 zero) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI zero)); ++ format %{ "dmtc1 R0, $dst\t! replicate2I zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar -1 to be vector ++instruct Repl2I_M1(vecD dst, immI_M1 M1) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI M1)); ++ format %{ "dmtc1 -1, $dst\t! replicate2I -1, use AT" %} ++ ins_encode %{ ++ __ nor(AT, R0, R0); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate float (4 byte) scalar to be vector ++instruct Repl2F(vecD dst, regF src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateF src)); ++ format %{ "cvt.ps $dst, $src, $src\t! replicate2F" %} ++ ins_encode %{ ++ __ cvt_ps_s($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Replicate float (4 byte) scalar zero to be vector ++instruct Repl2F_zero(vecD dst, immF_0 zero) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateF zero)); ++ format %{ "dmtc1 R0, $dst\t! replicate2F zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++ ++// ====================VECTOR ARITHMETIC======================================= ++ ++// --------------------------------- ADD -------------------------------------- ++ ++// Floats vector add ++// kernel does not have emulation of PS instructions yet, so PS instructions is disabled. ++instruct vadd2F(vecD dst, vecD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVF dst src)); ++ format %{ "add.ps $dst,$src\t! add packed2F" %} ++ ins_encode %{ ++ __ add_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct vadd2F3(vecD dst, vecD src1, vecD src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVF src1 src2)); ++ format %{ "add.ps $dst,$src1,$src2\t! add packed2F" %} ++ ins_encode %{ ++ __ add_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// --------------------------------- SUB -------------------------------------- ++ ++// Floats vector sub ++instruct vsub2F(vecD dst, vecD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVF dst src)); ++ format %{ "sub.ps $dst,$src\t! sub packed2F" %} ++ ins_encode %{ ++ __ sub_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// --------------------------------- MUL -------------------------------------- ++ ++// Floats vector mul ++instruct vmul2F(vecD dst, vecD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (MulVF dst src)); ++ format %{ "mul.ps $dst, $src\t! mul packed2F" %} ++ ins_encode %{ ++ __ mul_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct vmul2F3(vecD dst, vecD src1, vecD src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (MulVF src1 src2)); ++ format %{ "mul.ps $dst, $src1, $src2\t! mul packed2F" %} ++ ins_encode %{ ++ __ mul_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// --------------------------------- DIV -------------------------------------- ++// MIPS do not have div.ps ++ ++// --------------------------------- MADD -------------------------------------- ++// Floats vector madd ++//instruct vmadd2F(vecD dst, vecD src1, vecD src2, vecD src3) %{ ++// predicate(n->as_Vector()->length() == 2); ++// match(Set dst (AddVF (MulVF src1 src2) src3)); ++// ins_cost(50); ++// format %{ "madd.ps $dst, $src3, $src1, $src2\t! madd packed2F" %} ++// ins_encode %{ ++// __ madd_ps($dst$$FloatRegister, $src3$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++// %} ++// ins_pipe( fpu_regF_regF ); ++//%} ++ ++ ++//----------PEEPHOLE RULES----------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++// ++// peepmatch ( root_instr_name [preceeding_instruction]* ); ++// ++// peepconstraint %{ ++// (instruction_number.operand_name relational_op instruction_number.operand_name ++// [, ...] ); ++// // instruction numbers are zero-based using left to right order in peepmatch ++// ++// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); ++// // provide an instruction_number.operand_name for each operand that appears ++// // in the replacement instruction's match rule ++// ++// ---------VM FLAGS--------------------------------------------------------- ++// ++// All peephole optimizations can be turned off using -XX:-OptoPeephole ++// ++// Each peephole rule is given an identifying number starting with zero and ++// increasing by one in the order seen by the parser. An individual peephole ++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# ++// on the command-line. ++// ++// ---------CURRENT LIMITATIONS---------------------------------------------- ++// ++// Only match adjacent instructions in same basic block ++// Only equality constraints ++// Only constraints between operands, not (0.dest_reg == EAX_enc) ++// Only one replacement instruction ++// ++// ---------EXAMPLE---------------------------------------------------------- ++// ++// // pertinent parts of existing instructions in architecture description ++// instruct movI(eRegI dst, eRegI src) %{ ++// match(Set dst (CopyI src)); ++// %} ++// ++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{ ++// match(Set dst (AddI dst src)); ++// effect(KILL cr); ++// %} ++// ++// // Change (inc mov) to lea ++// peephole %{ ++// // increment preceeded by register-register move ++// peepmatch ( incI_eReg movI ); ++// // require that the destination register of the increment ++// // match the destination register of the move ++// peepconstraint ( 0.dst == 1.dst ); ++// // construct a replacement instruction that sets ++// // the destination to ( move's source register + one ) ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// Implementation no longer uses movX instructions since ++// machine-independent system no longer uses CopyX nodes. ++// ++// peephole %{ ++// peepmatch ( incI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( decI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addI_eReg_imm movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addP_eReg_imm movP ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++ ++// // Change load of spilled value to only a spill ++// instruct storeI(memory mem, eRegI src) %{ ++// match(Set mem (StoreI mem src)); ++// %} ++// ++// instruct loadI(eRegI dst, memory mem) %{ ++// match(Set dst (LoadI mem)); ++// %} ++// ++//peephole %{ ++// peepmatch ( loadI storeI ); ++// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); ++// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); ++//%} ++ ++//----------SMARTSPILL RULES--------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/mips.ad b/src/hotspot/cpu/mips/mips.ad +--- a/src/hotspot/cpu/mips/mips.ad 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/mips.ad 2024-01-30 10:00:11.844765024 +0800 +@@ -0,0 +1,25 @@ ++// ++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/nativeInst_mips.cpp b/src/hotspot/cpu/mips/nativeInst_mips.cpp +--- a/src/hotspot/cpu/mips/nativeInst_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/nativeInst_mips.cpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,1821 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "compiler/disassembler.hpp" ++#include "code/codeCache.hpp" ++#include "code/compiledIC.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/ostream.hpp" ++ ++#include ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++void NativeInstruction::wrote(int offset) { ++ ICache::invalidate_word(addr_at(offset)); ++} ++ ++void NativeInstruction::set_long_at(int offset, long i) { ++ address addr = addr_at(offset); ++ *(long*)addr = i; ++ ICache::invalidate_range(addr, 8); ++} ++ ++static int illegal_instruction_bits = 0; ++ ++int NativeInstruction::illegal_instruction() { ++ if (illegal_instruction_bits == 0) { ++ ResourceMark rm; ++ char buf[40]; ++ CodeBuffer cbuf((address)&buf[0], 20); ++ MacroAssembler* a = new MacroAssembler(&cbuf); ++ address ia = a->pc(); ++ a->brk(11); ++ int bits = *(int*)ia; ++ illegal_instruction_bits = bits; ++ } ++ return illegal_instruction_bits; ++} ++ ++bool NativeInstruction::is_int_branch() { ++ switch(Assembler::opcode(insn_word())) { ++ case Assembler::beq_op: ++ case Assembler::beql_op: ++ case Assembler::bgtz_op: ++ case Assembler::bgtzl_op: ++ case Assembler::blez_op: ++ case Assembler::blezl_op: ++ case Assembler::bne_op: ++ case Assembler::bnel_op: ++ return true; ++ case Assembler::regimm_op: ++ switch(Assembler::rt(insn_word())) { ++ case Assembler::bgez_op: ++ case Assembler::bgezal_op: ++ case Assembler::bgezall_op: ++ case Assembler::bgezl_op: ++ case Assembler::bltz_op: ++ case Assembler::bltzal_op: ++ case Assembler::bltzall_op: ++ case Assembler::bltzl_op: ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++bool NativeInstruction::is_float_branch() { ++ if (!is_op(Assembler::cop1_op) || ++ !is_rs((Register)Assembler::bc1f_op)) return false; ++ ++ switch(Assembler::rt(insn_word())) { ++ case Assembler::bcf_op: ++ case Assembler::bcfl_op: ++ case Assembler::bct_op: ++ case Assembler::bctl_op: ++ return true; ++ } ++ ++ return false; ++} ++ ++ ++void NativeCall::verify() { ++ // make sure code pattern is actually a call instruction ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // jal target ++ // nop ++ if ( is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_op(int_at(16), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return; ++ } ++ ++ // jal targe ++ // nop ++ if ( is_op(int_at(0), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ return; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) && ++ is_special_op(int_at(24), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ // FIXME: why add jr_op here? ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ if (nativeInstruction_at(addr_at(0))->is_trampoline_call()) ++ return; ++ ++ fatal("not a call"); ++} ++ ++address NativeCall::target_addr_for_insn() const { ++ // jal target ++ // nop ++ if ( is_op(int_at(0), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(4))->is_nop()) { ++ int instr_index = int_at(0) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // jal target ++ // nop ++ if ( nativeInstruction_at(addr_at(0))->is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_op(int_at(16), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(20))->is_nop()) { ++ int instr_index = int_at(16) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff), ++ (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff)); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ld dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ld_op) ) { ++ ++ address dest = (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ return dest + Assembler::simm16((intptr_t)int_at(12) & 0xffff); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(0), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop <-- optional ++ //nop <-- optional ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop <-- optional ++ //nop <-- optional ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop <-- optional ++ //nop <-- optional ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ tty->print_cr("not a call: addr = " INTPTR_FORMAT , p2i(addr_at(0))); ++ tty->print_cr("======= Start decoding at addr = " INTPTR_FORMAT " =======", p2i(addr_at(0))); ++ Disassembler::decode(addr_at(0) - 2 * 4, addr_at(0) + 8 * 4, tty); ++ tty->print_cr("======= End of decoding ======="); ++ fatal("not a call"); ++ return NULL; // unreachable ++} ++ ++// Extract call destination from a NativeCall. The call might use a trampoline stub. ++address NativeCall::destination() const { ++ address addr = (address)this; ++ address destination = target_addr_for_insn(); ++ // Do we use a trampoline stub for this call? ++ // Trampoline stubs are located behind the main code. ++ if (destination > addr) { ++ // Filter out recursive method invocation (call to verified/unverified entry point). ++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. ++ assert(cb && cb->is_nmethod(), "sanity"); ++ nmethod *nm = (nmethod *)cb; ++ NativeInstruction* ni = nativeInstruction_at(addr); ++ if (nm->stub_contains(destination) && ni->is_trampoline_call()) { ++ // Yes we do, so get the destination from the trampoline stub. ++ const address trampoline_stub_addr = destination; ++ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); ++ } ++ } ++ return destination; ++} ++ ++// Similar to replace_mt_safe, but just changes the destination. The ++// important thing is that free-running threads are able to execute this ++// call instruction at all times. ++// ++// Used in the runtime linkage of calls; see class CompiledIC. ++// ++// Add parameter assert_lock to switch off assertion ++// during code generation, where no patching lock is needed. ++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { ++ assert(!assert_lock || ++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), ++ "concurrent code patching"); ++ ++ ResourceMark rm; ++ address addr_call = addr_at(0); ++ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); ++ // Patch the constant in the call's trampoline stub. ++ if (MacroAssembler::reachable_from_cache()) { ++ set_destination(dest); ++ } else { ++ address trampoline_stub_addr = nativeCall_at(addr_call)->target_addr_for_insn(); ++ assert (get_trampoline() != NULL && trampoline_stub_addr == get_trampoline(), "we need a trampoline"); ++ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); ++ } ++} ++ ++address NativeCall::get_trampoline() { ++ address call_addr = addr_at(0); ++ ++ CodeBlob *code = CodeCache::find_blob(call_addr); ++ assert(code != NULL, "Could not find the containing code blob"); ++ ++ if (code->is_nmethod()) { ++ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); ++ } ++ return NULL; ++} ++ ++// manual implementation of GSSQ ++// ++// 00000001200009c0 : ++// 1200009c0: 0085202d daddu a0, a0, a1 ++// 1200009c4: e8860027 gssq a2, a3, 0(a0) ++// 1200009c8: 03e00008 jr ra ++// 1200009cc: 00000000 nop ++// ++typedef void (* atomic_store128_ptr)(long *addr, int offset, long low64, long hi64); ++ ++static int *buf; ++ ++static atomic_store128_ptr get_atomic_store128_func() { ++ assert(UseLEXT1, "UseLEXT1 must be true"); ++ static atomic_store128_ptr p = NULL; ++ if (p != NULL) ++ return p; ++ ++ buf = (int *)mmap(NULL, 1024, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, ++ -1, 0); ++ buf[0] = 0x0085202d; ++ buf[1] = (0x3a << 26) | (4 << 21) | (6 << 16) | 0x27; /* gssq $a2, $a3, 0($a0) */ ++ buf[2] = 0x03e00008; ++ buf[3] = 0; ++ ++ asm("sync"); ++ p = (atomic_store128_ptr)buf; ++ return p; ++} ++ ++void NativeCall::patch_on_jal_only(address dst) { ++ long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint jal_inst = (Assembler::jal_op << 26) | dest; ++ set_int_at(0, jal_inst); ++ ICache::invalidate_range(addr_at(0), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeCall::patch_on_jal_gs(address dst) { ++ long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint jal_inst = (Assembler::jal_op << 26) | dest; ++ set_int_at(16, jal_inst); ++ ICache::invalidate_range(addr_at(16), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeCall::patch_on_jal(address dst) { ++ patch_on_jal_gs(dst); ++} ++ ++void NativeCall::patch_on_trampoline(address dest) { ++ assert(nativeInstruction_at(addr_at(0))->is_trampoline_call(), "unexpected code at call site"); ++ jlong dst = (jlong) dest; ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ld dst, dst, imm16 ++ if ((dst> 0) && Assembler::is_simm16(dst >> 32)) { ++ dst += (dst & 0x8000) << 1; ++ set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low(dst >> 32) & 0xffff)); ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(dst >> 16) & 0xffff)); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low(dst) & 0xffff)); ++ ++ ICache::invalidate_range(addr_at(0), 24); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeCall::patch_on_jalr_gs(address dst) { ++ patch_set48_gs(dst); ++} ++ ++void NativeCall::patch_on_jalr(address dst) { ++ patch_set48(dst); ++} ++ ++void NativeCall::patch_set48_gs(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ int count = 0; ++ int insts[4] = {0, 0, 0, 0}; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ insts[count] = 0; ++ count++; ++ } ++ ++ guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned"); ++ atomic_store128_ptr func = get_atomic_store128_func(); ++ (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]); ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeCall::patch_set32_gs(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ int insts[2] = {0, 0}; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ //daddiu(d, R0, value); ++ //set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ //set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ //set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 2) { ++ //nop(); ++ //set_int_at(count << 2, 0); ++ insts[count] = 0; ++ count++; ++ } ++ ++ long inst = insts[1]; ++ inst = inst << 32; ++ inst = inst + insts[0]; ++ ++ set_long_at(0, inst); ++} ++ ++void NativeCall::patch_set48(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ //daddiu(d, R0, value); ++ set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ //ori(d, R0, julong(value) >> 16); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); ++ count += 1; ++ //dsll(d, d, 16); ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ //lui(d, value >> 32); ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); ++ count += 1; ++ //ori(d, d, split_low(value >> 16)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ //dsll(d, d, 16); ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ //nop(); ++ set_int_at(count << 2, 0); ++ count++; ++ } ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeCall::patch_set32(address dest) { ++ patch_set32_gs(dest); ++} ++ ++void NativeCall::set_destination(address dest) { ++ OrderAccess::fence(); ++ ++ // li64 ++ if (is_special_op(int_at(16), Assembler::dsll_op)) { ++ int first_word = int_at(0); ++ set_int_at(0, 0x1000ffff); /* .1: b .1 */ ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 32) & 0xffff)); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 16) & 0xffff)); ++ set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)dest) & 0xffff)); ++ set_int_at(0, (first_word & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 48) & 0xffff)); ++ ICache::invalidate_range(addr_at(0), 24); ++ } else if (is_op(int_at(16), Assembler::jal_op)) { ++ if (UseLEXT1) { ++ patch_on_jal_gs(dest); ++ } else { ++ patch_on_jal(dest); ++ } ++ } else if (is_op(int_at(0), Assembler::jal_op)) { ++ patch_on_jal_only(dest); ++ } else if (is_special_op(int_at(16), Assembler::jalr_op)) { ++ if (UseLEXT1) { ++ patch_on_jalr_gs(dest); ++ } else { ++ patch_on_jalr(dest); ++ } ++ } else if (is_special_op(int_at(8), Assembler::jalr_op)) { ++ guarantee(!os::is_MP() || (((long)addr_at(0) % 8) == 0), "destination must be aligned by 8"); ++ if (UseLEXT1) { ++ patch_set32_gs(dest); ++ } else { ++ patch_set32(dest); ++ } ++ ICache::invalidate_range(addr_at(0), 8); ++ } else { ++ fatal("not a call"); ++ } ++} ++ ++void NativeCall::print() { ++ tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, ++ p2i(instruction_address()), p2i(destination())); ++} ++ ++// Inserts a native call instruction at a given pc ++void NativeCall::insert(address code_pos, address entry) { ++ NativeCall *call = nativeCall_at(code_pos); ++ CodeBuffer cb(call->addr_at(0), instruction_size); ++ MacroAssembler masm(&cb); ++#define __ masm. ++ __ li48(T9, (long)entry); ++ __ jalr (); ++ __ delayed()->nop(); ++#undef __ ++ ++ ICache::invalidate_range(call->addr_at(0), instruction_size); ++} ++ ++// MT-safe patching of a call instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { ++ Unimplemented(); ++} ++ ++//------------------------------------------------------------------- ++ ++void NativeMovConstReg::verify() { ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ return; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ return; ++ } ++ ++ fatal("not a mov reg, imm64/imm48"); ++} ++ ++void NativeMovConstReg::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, ++ p2i(instruction_address()), data()); ++} ++ ++intptr_t NativeMovConstReg::data() const { ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ ++ return Assembler::merge( (intptr_t)(int_at(20) & 0xffff), ++ (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff)); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ ++ return Assembler::merge( (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return Assembler::merge( (intptr_t)(int_at(8) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return Assembler::merge( (intptr_t)(0), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ fatal("not a mov reg, imm64/imm48"); ++ return 0; // unreachable ++} ++ ++void NativeMovConstReg::patch_set48(intptr_t x) { ++ jlong value = (jlong) x; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ //daddiu(d, R0, value); ++ set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ set_int_at(count << 2, 0); ++ count++; ++ } ++} ++ ++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) { ++ // li64 or li48 ++ if ((!nativeInstruction_at(addr_at(12))->is_nop()) && is_special_op(int_at(16), Assembler::dsll_op) && is_op(long_at(20), Assembler::ori_op)) { ++ set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 48) & 0xffff)); ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 32) & 0xffff)); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 16) & 0xffff)); ++ set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)x) & 0xffff)); ++ } else { ++ patch_set48(x); ++ } ++ ++ ICache::invalidate_range(addr_at(0), 24); ++ ++ // Find and replace the oop/metadata corresponding to this ++ // instruction in oops section. ++ CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address()); ++ nmethod* nm = blob->as_nmethod_or_null(); ++ if (nm != NULL) { ++ o = o ? o : x; ++ RelocIterator iter(nm, instruction_address(), next_instruction_address()); ++ while (iter.next()) { ++ if (iter.type() == relocInfo::oop_type) { ++ oop* oop_addr = iter.oop_reloc()->oop_addr(); ++ *oop_addr = cast_to_oop(o); ++ break; ++ } else if (iter.type() == relocInfo::metadata_type) { ++ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); ++ *metadata_addr = (Metadata*)o; ++ break; ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------- ++ ++int NativeMovRegMem::offset() const{ ++ if (is_immediate()) ++ return (short)(int_at(instruction_offset)&0xffff); ++ else ++ return Assembler::merge(int_at(hiword_offset)&0xffff, long_at(instruction_offset)&0xffff); ++} ++ ++void NativeMovRegMem::set_offset(int x) { ++ if (is_immediate()) { ++ assert(Assembler::is_simm16(x), "just check"); ++ set_int_at(0, (int_at(0)&0xffff0000) | (x&0xffff) ); ++ if (is_64ldst()) { ++ assert(Assembler::is_simm16(x+4), "just check"); ++ set_int_at(4, (int_at(4)&0xffff0000) | ((x+4)&0xffff) ); ++ } ++ } else { ++ set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_high(x) & 0xffff)); ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(x) & 0xffff)); ++ } ++ ICache::invalidate_range(addr_at(0), 8); ++} ++ ++void NativeMovRegMem::verify() { ++ int offset = 0; ++ ++ if ( Assembler::opcode(int_at(0)) == Assembler::lui_op ) { ++ ++ if ( Assembler::opcode(int_at(4)) != Assembler::ori_op ) { ++ fatal ("not a mov [reg+offs], reg instruction"); ++ } ++ ++ offset += 12; ++ } ++ ++ switch(Assembler::opcode(int_at(offset))) { ++ case Assembler::lb_op: ++ case Assembler::lbu_op: ++ case Assembler::lh_op: ++ case Assembler::lhu_op: ++ case Assembler::lw_op: ++ case Assembler::lwu_op: ++ case Assembler::ld_op: ++ case Assembler::lwc1_op: ++ case Assembler::ldc1_op: ++ case Assembler::sb_op: ++ case Assembler::sh_op: ++ case Assembler::sw_op: ++ case Assembler::sd_op: ++ case Assembler::swc1_op: ++ case Assembler::sdc1_op: ++ break; ++ default: ++ fatal ("not a mov [reg+offs], reg instruction"); ++ } ++} ++ ++ ++void NativeMovRegMem::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, [reg + %x]", p2i(instruction_address()), offset()); ++} ++ ++bool NativeInstruction::is_sigill_zombie_not_entrant() { ++ return uint_at(0) == NativeIllegalInstruction::instruction_code; ++} ++ ++void NativeIllegalInstruction::insert(address code_pos) { ++ *(juint*)code_pos = instruction_code; ++ ICache::invalidate_range(code_pos, instruction_size); ++} ++ ++void NativeJump::verify() { ++ assert(((NativeInstruction *)this)->is_jump() || ++ ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction"); ++} ++ ++void NativeJump::patch_set48_gs(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ int insts[4] = {0, 0, 0, 0}; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ insts[count] = 0; ++ count++; ++ } ++ ++ guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned"); ++ atomic_store128_ptr func = get_atomic_store128_func(); ++ (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]); ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeJump::patch_set48(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ set_int_at(count << 2, 0); ++ count++; ++ } ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeJump::patch_on_j_only(address dst) { ++ long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint j_inst = (Assembler::j_op << 26) | dest; ++ set_int_at(0, j_inst); ++ ICache::invalidate_range(addr_at(0), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void NativeJump::patch_on_j_gs(address dst) { ++ long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint j_inst = (Assembler::j_op << 26) | dest; ++ set_int_at(16, j_inst); ++ ICache::invalidate_range(addr_at(16), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeJump::patch_on_j(address dst) { ++ patch_on_j_gs(dst); ++} ++ ++void NativeJump::patch_on_jr_gs(address dst) { ++ patch_set48_gs(dst); ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeJump::patch_on_jr(address dst) { ++ patch_set48(dst); ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++ ++void NativeJump::set_jump_destination(address dest) { ++ OrderAccess::fence(); ++ ++ if (is_short()) { ++ assert(Assembler::is_simm16(dest-addr_at(4)), "change this code"); ++ set_int_at(0, (int_at(0) & 0xffff0000) | (dest - addr_at(4)) & 0xffff ); ++ ICache::invalidate_range(addr_at(0), 4); ++ } else if (is_b_far()) { ++ int offset = dest - addr_at(12); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (offset >> 16)); ++ set_int_at(16, (int_at(16) & 0xffff0000) | (offset & 0xffff)); ++ } else { ++ if (is_op(int_at(16), Assembler::j_op)) { ++ if (UseLEXT1) { ++ patch_on_j_gs(dest); ++ } else { ++ patch_on_j(dest); ++ } ++ } else if (is_op(int_at(0), Assembler::j_op)) { ++ patch_on_j_only(dest); ++ } else if (is_special_op(int_at(16), Assembler::jr_op)) { ++ if (UseLEXT1) { ++ //guarantee(!os::is_MP() || (((long)addr_at(0) % 16) == 0), "destination must be aligned for GSSD"); ++ //patch_on_jr_gs(dest); ++ patch_on_jr(dest); ++ } else { ++ patch_on_jr(dest); ++ } ++ } else { ++ fatal("not a jump"); ++ } ++ } ++} ++ ++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { ++ CodeBuffer cb(code_pos, instruction_size); ++ MacroAssembler masm(&cb); ++#define __ masm. ++ if (Assembler::is_simm16((entry - code_pos - 4) / 4)) { ++ __ b(entry); ++ __ delayed()->nop(); ++ } else { ++ // Attention: We have to use a relative jump here since PC reloc-operation isn't allowed here. ++ int offset = entry - code_pos; ++ ++ Label L; ++ __ bgezal(R0, L); ++ __ delayed()->lui(T9, (offset - 8) >> 16); ++ __ bind(L); ++ __ ori(T9, T9, (offset - 8) & 0xffff); ++ __ daddu(T9, T9, RA); ++ __ jr(T9); ++ __ delayed()->nop(); ++ } ++ ++#undef __ ++ ++ ICache::invalidate_range(code_pos, instruction_size); ++} ++ ++bool NativeJump::is_b_far() { ++// ++// 0x000000556809f198: daddu at, ra, zero ++// 0x000000556809f19c: [4110001]bgezal zero, 0x000000556809f1a4 ++// ++// 0x000000556809f1a0: nop ++// 0x000000556809f1a4: lui t9, 0xfffffffd ++// 0x000000556809f1a8: ori t9, t9, 0x14dc ++// 0x000000556809f1ac: daddu t9, t9, ra ++// 0x000000556809f1b0: daddu ra, at, zero ++// 0x000000556809f1b4: jr t9 ++// 0x000000556809f1b8: nop ++// ;; ImplicitNullCheckStub slow case ++// 0x000000556809f1bc: lui t9, 0x55 ++// ++ return is_op(int_at(12), Assembler::lui_op); ++} ++ ++address NativeJump::jump_destination() { ++ if ( is_short() ) { ++ return addr_at(4) + Assembler::imm_off(int_at(instruction_offset)) * 4; ++ } ++ // Assembler::merge() is not correct in MIPS_64! ++ // ++ // Example: ++ // hi16 = 0xfffd, ++ // lo16 = f7a4, ++ // ++ // offset=0xfffdf7a4 (Right) ++ // Assembler::merge = 0xfffcf7a4 (Wrong) ++ // ++ if ( is_b_far() ) { ++ int hi16 = int_at(12)&0xffff; ++ int low16 = int_at(16)&0xffff; ++ address target = addr_at(12) + (hi16 << 16) + low16; ++ return target; ++ } ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // j target ++ // nop ++ if ( nativeInstruction_at(addr_at(0))->is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_op(int_at(16), Assembler::j_op) && ++ nativeInstruction_at(addr_at(20))->is_nop()) { ++ int instr_index = int_at(16) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // j target ++ // nop ++ if ( is_op(int_at(0), Assembler::j_op) && ++ nativeInstruction_at(addr_at(4))->is_nop()) { ++ int instr_index = int_at(0) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff), ++ (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff)); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(0), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ fatal("not a jump"); ++ return NULL; // unreachable ++} ++ ++// MT-safe patching of a long jump instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { ++ NativeGeneralJump* h_jump = nativeGeneralJump_at (instr_addr); ++ assert((int)instruction_size == (int)NativeCall::instruction_size, ++ "note::Runtime1::patch_code uses NativeCall::instruction_size"); ++ ++ // ensure 100% atomicity ++ guarantee(!os::is_MP() || (((long)instr_addr % BytesPerWord) == 0), "destination must be aligned for SD"); ++ ++ int *p = (int *)instr_addr; ++ int jr_word = p[4]; ++ ++ p[4] = 0x1000fffb; /* .1: --; --; --; --; b .1; nop */ ++ memcpy(instr_addr, code_buffer, NativeCall::instruction_size - 8); ++ *(long *)(instr_addr + 16) = *(long *)(code_buffer + 16); ++} ++ ++// Must ensure atomicity ++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { ++ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); ++ assert(nativeInstruction_at(verified_entry + BytesPerInstWord)->is_nop(), "mips64 cannot replace non-nop with jump"); ++ ++ if (MacroAssembler::reachable_from_cache(dest)) { ++ CodeBuffer cb(verified_entry, 1 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.j(dest); ++ } else { ++ // We use an illegal instruction for marking a method as ++ // not_entrant or zombie ++ NativeIllegalInstruction::insert(verified_entry); ++ } ++ ++ ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord); ++} ++ ++bool NativeInstruction::is_jump() ++{ ++ if ((int_at(0) & NativeGeneralJump::b_mask) == NativeGeneralJump::beq_opcode) ++ return true; ++ if (is_op(int_at(4), Assembler::lui_op)) // simplified b_far ++ return true; ++ if (is_op(int_at(12), Assembler::lui_op)) // original b_far ++ return true; ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // j target ++ // nop ++ if ( is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ nativeInstruction_at(addr_at(16))->is_op(Assembler::j_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return true; ++ } ++ ++ if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::j_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ return true; ++ } ++ ++ // lui rd, imm(63...48); ++ // ori rd, rd, imm(47...32); ++ // dsll rd, rd, 16; ++ // ori rd, rd, imm(31...16); ++ // dsll rd, rd, 16; ++ // ori rd, rd, imm(15...0); ++ // jr rd ++ // nop ++ if (is_op(int_at(0), Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) && ++ is_special_op(int_at(24), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if (is_op(int_at(0), Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ return false; ++} ++ ++bool NativeInstruction::is_dtrace_trap() { ++ //return (*(int32_t*)this & 0xff) == 0xcc; ++ Unimplemented(); ++ return false; ++} ++ ++bool NativeInstruction::is_safepoint_poll() { ++ // ++ // 390 li T2, 0x0000000000400000 #@loadConP ++ // 394 sw [SP + #12], V1 # spill 9 ++ // 398 Safepoint @ [T2] : poll for GC @ safePoint_poll # spec.benchmarks.compress.Decompressor::decompress @ bci:224 L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1 ++ // ++ // 0x000000ffe5815130: lui t2, 0x40 ++ // 0x000000ffe5815134: sw v1, 0xc(sp) ; OopMap{a6=Oop off=920} ++ // ;*goto ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ // 0x000000ffe5815138: lw at, 0x0(t2) ;*goto <--- PC ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ ++ // Since there may be some spill instructions between the safePoint_poll and loadConP, ++ // we check the safepoint instruction like the this. ++ return is_op(Assembler::lw_op) && is_rt(AT); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/nativeInst_mips.hpp b/src/hotspot/cpu/mips/nativeInst_mips.hpp +--- a/src/hotspot/cpu/mips/nativeInst_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/nativeInst_mips.hpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,734 @@ ++/* ++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_NATIVEINST_MIPS_HPP ++#define CPU_MIPS_VM_NATIVEINST_MIPS_HPP ++ ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/os.hpp" ++#include "runtime/safepointMechanism.hpp" ++ ++// We have interfaces for the following instructions: ++// - NativeInstruction ++// - - NativeCall ++// - - NativeMovConstReg ++// - - NativeMovConstRegPatching ++// - - NativeMovRegMem ++// - - NativeMovRegMemPatching ++// - - NativeJump ++// - - NativeIllegalOpCode ++// - - NativeGeneralJump ++// - - NativeReturn ++// - - NativeReturnX (return with argument) ++// - - NativePushConst ++// - - NativeTstRegMem ++ ++// The base class for different kinds of native instruction abstractions. ++// Provides the primitive operations to manipulate code relative to this. ++ ++class NativeInstruction { ++ friend class Relocation; ++ ++ public: ++ enum mips_specific_constants { ++ nop_instruction_code = 0, ++ nop_instruction_size = 4, ++ sync_instruction_code = 0xf ++ }; ++ ++ bool is_nop() { return long_at(0) == nop_instruction_code; } ++ bool is_sync() { return long_at(0) == sync_instruction_code; } ++ bool is_dtrace_trap(); ++ inline bool is_call(); ++ inline bool is_illegal(); ++ inline bool is_return(); ++ bool is_jump(); ++ inline bool is_cond_jump(); ++ bool is_safepoint_poll(); ++ ++ //mips has no instruction to generate a illegal instrucion exception ++ //we define ours: break 11 ++ static int illegal_instruction(); ++ ++ bool is_int_branch(); ++ bool is_float_branch(); ++ ++ inline bool is_trampoline_call(); ++ ++ //We use an illegal instruction for marking a method as not_entrant or zombie. ++ bool is_sigill_zombie_not_entrant(); ++ ++ protected: ++ address addr_at(int offset) const { return address(this) + offset; } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(BytesPerInstWord); } ++ address prev_instruction_address() const { return addr_at(-BytesPerInstWord); } ++ ++ s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } ++ u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } ++ ++ jint int_at(int offset) const { return *(jint*) addr_at(offset); } ++ juint uint_at(int offset) const { return *(juint*) addr_at(offset); } ++ ++ intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); } ++ ++ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } ++ int long_at(int offset) const { return *(jint*)addr_at(offset); } ++ ++ ++ void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } ++ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } ++ void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } ++ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } ++ void set_long_at(int offset, long i); ++ ++ int insn_word() const { return long_at(0); } ++ static bool is_op (int insn, Assembler::ops op) { return Assembler::opcode(insn) == (int)op; } ++ bool is_op (Assembler::ops op) const { return is_op(insn_word(), op); } ++ bool is_rs (int insn, Register rs) const { return Assembler::rs(insn) == (int)rs->encoding(); } ++ bool is_rs (Register rs) const { return is_rs(insn_word(), rs); } ++ bool is_rt (int insn, Register rt) const { return Assembler::rt(insn) == (int)rt->encoding(); } ++ bool is_rt (Register rt) const { return is_rt(insn_word(), rt); } ++ ++ static bool is_special_op (int insn, Assembler::special_ops op) { ++ return is_op(insn, Assembler::special_op) && Assembler::special(insn)==(int)op; ++ } ++ bool is_special_op (Assembler::special_ops op) const { return is_special_op(insn_word(), op); } ++ ++ void wrote(int offset); ++ ++ public: ++ ++ // unit test stuff ++ static void test() {} // override for testing ++ ++ inline friend NativeInstruction* nativeInstruction_at(address address); ++}; ++ ++inline NativeInstruction* nativeInstruction_at(address address) { ++ NativeInstruction* inst = (NativeInstruction*)address; ++#ifdef ASSERT ++ //inst->verify(); ++#endif ++ return inst; ++} ++ ++inline NativeCall* nativeCall_at(address address); ++// The NativeCall is an abstraction for accessing/manipulating native call imm32/imm64 ++// instructions (used to manipulate inline caches, primitive & dll calls, etc.). ++// MIPS has no call instruction with imm32/imm64. Usually, a call was done like this: ++// 32 bits: ++// lui rt, imm16 ++// addiu rt, rt, imm16 ++// jalr rt ++// nop ++// ++// 64 bits: ++// lui rd, imm(63...48); ++// ori rd, rd, imm(47...32); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(31...16); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(15...0); ++// jalr rd ++// nop ++// ++ ++// we just consider the above for instruction as one call instruction ++class NativeCall: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 6 * BytesPerInstWord, ++ return_address_offset_short = 4 * BytesPerInstWord, ++ return_address_offset_long = 6 * BytesPerInstWord, ++ displacement_offset = 0 ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ address next_instruction_address() const { ++ if (is_special_op(int_at(8), Assembler::jalr_op)) { ++ return addr_at(return_address_offset_short); ++ } else { ++ return addr_at(return_address_offset_long); ++ } ++ } ++ ++ address return_address() const { ++ return next_instruction_address(); ++ } ++ ++ address target_addr_for_insn() const; ++ address destination() const; ++ void set_destination(address dest); ++ ++ void patch_set48_gs(address dest); ++ void patch_set48(address dest); ++ ++ void patch_on_jalr_gs(address dest); ++ void patch_on_jalr(address dest); ++ ++ void patch_on_jal_gs(address dest); ++ void patch_on_jal(address dest); ++ ++ void patch_on_trampoline(address dest); ++ ++ void patch_on_jal_only(address dest); ++ ++ void patch_set32_gs(address dest); ++ void patch_set32(address dest); ++ ++ void verify_alignment() { } ++ void verify(); ++ void print(); ++ ++ // Creation ++ inline friend NativeCall* nativeCall_at(address address); ++ inline friend NativeCall* nativeCall_before(address return_address); ++ ++ static bool is_call_at(address instr) { ++ return nativeInstruction_at(instr)->is_call(); ++ } ++ ++ static bool is_call_before(address return_address) { ++ return is_call_at(return_address - return_address_offset_short) | is_call_at(return_address - return_address_offset_long); ++ } ++ ++ static bool is_call_to(address instr, address target) { ++ return nativeInstruction_at(instr)->is_call() && ++nativeCall_at(instr)->destination() == target; ++ } ++ ++ // MT-safe patching of a call instruction. ++ static void insert(address code_pos, address entry); ++ ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++ ++ // Similar to replace_mt_safe, but just changes the destination. The ++ // important thing is that free-running threads are able to execute ++ // this call instruction at all times. If the call is an immediate jal ++ // instruction we can simply rely on atomicity of 32-bit writes to ++ // make sure other threads will see no intermediate states. ++ ++ // We cannot rely on locks here, since the free-running threads must run at ++ // full speed. ++ // ++ // Used in the runtime linkage of calls; see class CompiledIC. ++ ++ // The parameter assert_lock disables the assertion during code generation. ++ void set_destination_mt_safe(address dest, bool assert_lock = true); ++ ++ address get_trampoline(); ++}; ++ ++inline NativeCall* nativeCall_at(address address) { ++ NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++inline NativeCall* nativeCall_before(address return_address) { ++ NativeCall* call = NULL; ++ if (NativeCall::is_call_at(return_address - NativeCall::return_address_offset_long)) { ++ call = (NativeCall*)(return_address - NativeCall::return_address_offset_long); ++ } else { ++ call = (NativeCall*)(return_address - NativeCall::return_address_offset_short); ++ } ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++class NativeMovConstReg: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 4 * BytesPerInstWord, ++ next_instruction_offset = 4 * BytesPerInstWord, ++ }; ++ ++ int insn_word() const { return long_at(instruction_offset); } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(next_instruction_offset); } ++ intptr_t data() const; ++ void set_data(intptr_t x, intptr_t o = 0); ++ ++ void patch_set48(intptr_t x); ++ ++ void verify(); ++ void print(); ++ ++ // unit test stuff ++ static void test() {} ++ ++ // Creation ++ inline friend NativeMovConstReg* nativeMovConstReg_at(address address); ++ inline friend NativeMovConstReg* nativeMovConstReg_before(address address); ++}; ++ ++inline NativeMovConstReg* nativeMovConstReg_at(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++inline NativeMovConstReg* nativeMovConstReg_before(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovConstRegPatching: public NativeMovConstReg { ++ private: ++ friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { ++ NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++// An interface for accessing/manipulating native moves of the form: ++// lui AT, split_high(offset) ++// addiu AT, split_low(offset) ++// addu reg, reg, AT ++// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0 ++// [lw/sw/lwc1/swc1 dest, reg, 4] ++// or ++// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset ++// [lw/sw/lwc1/swc1 dest, reg, offset+4] ++// ++// Warning: These routines must be able to handle any instruction sequences ++// that are generated as a result of the load/store byte,word,long ++// macros. ++ ++class NativeMovRegMem: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ hiword_offset = 4, ++ ldst_offset = 12, ++ immediate_size = 4, ++ ldst_size = 16 ++ }; ++ ++ //offset is less than 16 bits. ++ bool is_immediate() const { return !is_op(long_at(instruction_offset), Assembler::lui_op); } ++ bool is_64ldst() const { ++ if (is_immediate()) { ++ return (Assembler::opcode(long_at(hiword_offset)) == Assembler::opcode(long_at(instruction_offset))) && ++ (Assembler::imm_off(long_at(hiword_offset)) == Assembler::imm_off(long_at(instruction_offset)) + wordSize); ++ } else { ++ return (Assembler::opcode(long_at(ldst_offset+hiword_offset)) == Assembler::opcode(long_at(ldst_offset))) && ++ (Assembler::imm_off(long_at(ldst_offset+hiword_offset)) == Assembler::imm_off(long_at(ldst_offset)) + wordSize); ++ } ++ } ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address next_instruction_address() const { ++ return addr_at( (is_immediate()? immediate_size : ldst_size) + (is_64ldst()? 4 : 0)); ++ } ++ ++ int offset() const; ++ ++ void set_offset(int x); ++ ++ void add_offset_in_bytes(int add_offset) { set_offset ( ( offset() + add_offset ) ); } ++ ++ void verify(); ++ void print (); ++ ++ // unit test stuff ++ static void test() {} ++ ++ private: ++ inline friend NativeMovRegMem* nativeMovRegMem_at (address address); ++}; ++ ++inline NativeMovRegMem* nativeMovRegMem_at (address address) { ++ NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovRegMemPatching: public NativeMovRegMem { ++ private: ++ friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) { ++ NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++ ++// Handles all kinds of jump on Loongson. Long/far, conditional/unconditional ++// 32 bits: ++// far jump: ++// lui reg, split_high(addr) ++// addiu reg, split_low(addr) ++// jr reg ++// nop ++// or ++// beq ZERO, ZERO, offset ++// nop ++// ++ ++//64 bits: ++// far jump: ++// lui rd, imm(63...48); ++// ori rd, rd, imm(47...32); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(31...16); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(15...0); ++// jalr rd ++// nop ++// ++class NativeJump: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ beq_opcode = 0x10000000,//000100|00000|00000|offset ++ b_mask = 0xffff0000, ++ short_size = 8, ++ instruction_size = 6 * BytesPerInstWord ++ }; ++ ++ bool is_short() const { return (long_at(instruction_offset) & b_mask) == beq_opcode; } ++ bool is_b_far(); ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address jump_destination(); ++ ++ void patch_set48_gs(address dest); ++ void patch_set48(address dest); ++ ++ void patch_on_jr_gs(address dest); ++ void patch_on_jr(address dest); ++ ++ void patch_on_j_gs(address dest); ++ void patch_on_j(address dest); ++ ++ void patch_on_j_only(address dest); ++ ++ void set_jump_destination(address dest); ++ ++ // Creation ++ inline friend NativeJump* nativeJump_at(address address); ++ ++ // Insertion of native jump instruction ++ static void insert(address code_pos, address entry) { Unimplemented(); } ++ // MT-safe insertion of native jump at verified method entry ++ static void check_verified_entry_alignment(address entry, address verified_entry) {} ++ static void patch_verified_entry(address entry, address verified_entry, address dest); ++ ++ void verify(); ++}; ++ ++inline NativeJump* nativeJump_at(address address) { ++ NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeGeneralJump: public NativeJump { ++ public: ++ // Creation ++ inline friend NativeGeneralJump* nativeGeneralJump_at(address address); ++ ++ // Insertion of native general jump instruction ++ static void insert_unconditional(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++}; ++ ++inline NativeGeneralJump* nativeGeneralJump_at(address address) { ++ NativeGeneralJump* jump = (NativeGeneralJump*)(address); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeIllegalInstruction: public NativeInstruction { ++public: ++ enum mips_specific_constants { ++ instruction_code = 0x42000029, // mips reserved instruction ++ instruction_size = 4, ++ instruction_offset = 0, ++ next_instruction_offset = 4 ++ }; ++ ++ // Insert illegal opcode as specific address ++ static void insert(address code_pos); ++}; ++ ++// return instruction that does not pop values of the stack ++// jr RA ++// delay slot ++class NativeReturn: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_size = 8, ++ instruction_offset = 0, ++ next_instruction_offset = 8 ++ }; ++}; ++ ++ ++ ++ ++class NativeCondJump; ++inline NativeCondJump* nativeCondJump_at(address address); ++class NativeCondJump: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_size = 16, ++ instruction_offset = 12, ++ next_instruction_offset = 20 ++ }; ++ ++ ++ int insn_word() const { return long_at(instruction_offset); } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(next_instruction_offset); } ++ ++ // Creation ++ inline friend NativeCondJump* nativeCondJump_at(address address); ++ ++ address jump_destination() const { ++ return ::nativeCondJump_at(addr_at(12))->jump_destination(); ++ } ++ ++ void set_jump_destination(address dest) { ++ ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest); ++ } ++ ++}; ++ ++inline NativeCondJump* nativeCondJump_at(address address) { ++ NativeCondJump* jump = (NativeCondJump*)(address); ++ return jump; ++} ++ ++ ++ ++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); } ++ ++inline bool NativeInstruction::is_call() { ++ // jal target ++ // nop ++ if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::jal_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ return true; ++ } ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // jal target ++ // nop ++ if ( is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ nativeInstruction_at(addr_at(16))->is_op(Assembler::jal_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return true; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) && ++ is_special_op(int_at(24), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ if(is_trampoline_call()) ++ return true; ++ ++ return false; ++ ++} ++ ++inline bool NativeInstruction::is_return() { return is_special_op(Assembler::jr_op) && is_rs(RA);} ++ ++inline bool NativeInstruction::is_cond_jump() { return is_int_branch() || is_float_branch(); } ++ ++// Call trampoline stubs. ++class NativeCallTrampolineStub : public NativeInstruction { ++ public: ++ ++ enum mips_specific_constants { ++ instruction_size = 2 * BytesPerInstWord, ++ instruction_offset = 0, ++ next_instruction_offset = 2 * BytesPerInstWord ++ }; ++ ++ address destination() const { ++ return (address)ptr_at(0); ++ } ++ ++ void set_destination(address new_destination) { ++ set_ptr_at(0, (intptr_t)new_destination); ++ } ++}; ++ ++inline bool NativeInstruction::is_trampoline_call() { ++ // lui dst, imm16 ++ // ori dst, dst, imm16 ++ // dsll dst, dst, 16 ++ // ld target, dst, imm16 ++ // jalr target ++ // nop ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ld_op) && ++ is_special_op(int_at(16), Assembler::jalr_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return true; ++ } ++ ++ return false; ++} ++ ++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { ++ return (NativeCallTrampolineStub*)addr; ++} ++#endif // CPU_MIPS_VM_NATIVEINST_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/register_definitions_mips.cpp b/src/hotspot/cpu/mips/register_definitions_mips.cpp +--- a/src/hotspot/cpu/mips/register_definitions_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/register_definitions_mips.cpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,103 @@ ++/* ++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/register.hpp" ++#include "register_mips.hpp" ++#ifdef TARGET_ARCH_MODEL_mips_32 ++# include "interp_masm_mips_32.hpp" ++#endif ++#ifdef TARGET_ARCH_MODEL_mips_64 ++# include "interp_masm_mips_64.hpp" ++#endif ++ ++REGISTER_DEFINITION(Register, noreg); ++REGISTER_DEFINITION(Register, i0); ++REGISTER_DEFINITION(Register, i1); ++REGISTER_DEFINITION(Register, i2); ++REGISTER_DEFINITION(Register, i3); ++REGISTER_DEFINITION(Register, i4); ++REGISTER_DEFINITION(Register, i5); ++REGISTER_DEFINITION(Register, i6); ++REGISTER_DEFINITION(Register, i7); ++REGISTER_DEFINITION(Register, i8); ++REGISTER_DEFINITION(Register, i9); ++REGISTER_DEFINITION(Register, i10); ++REGISTER_DEFINITION(Register, i11); ++REGISTER_DEFINITION(Register, i12); ++REGISTER_DEFINITION(Register, i13); ++REGISTER_DEFINITION(Register, i14); ++REGISTER_DEFINITION(Register, i15); ++REGISTER_DEFINITION(Register, i16); ++REGISTER_DEFINITION(Register, i17); ++REGISTER_DEFINITION(Register, i18); ++REGISTER_DEFINITION(Register, i19); ++REGISTER_DEFINITION(Register, i20); ++REGISTER_DEFINITION(Register, i21); ++REGISTER_DEFINITION(Register, i22); ++REGISTER_DEFINITION(Register, i23); ++REGISTER_DEFINITION(Register, i24); ++REGISTER_DEFINITION(Register, i25); ++REGISTER_DEFINITION(Register, i26); ++REGISTER_DEFINITION(Register, i27); ++REGISTER_DEFINITION(Register, i28); ++REGISTER_DEFINITION(Register, i29); ++REGISTER_DEFINITION(Register, i30); ++REGISTER_DEFINITION(Register, i31); ++ ++REGISTER_DEFINITION(FloatRegister, fnoreg); ++REGISTER_DEFINITION(FloatRegister, f0); ++REGISTER_DEFINITION(FloatRegister, f1); ++REGISTER_DEFINITION(FloatRegister, f2); ++REGISTER_DEFINITION(FloatRegister, f3); ++REGISTER_DEFINITION(FloatRegister, f4); ++REGISTER_DEFINITION(FloatRegister, f5); ++REGISTER_DEFINITION(FloatRegister, f6); ++REGISTER_DEFINITION(FloatRegister, f7); ++REGISTER_DEFINITION(FloatRegister, f8); ++REGISTER_DEFINITION(FloatRegister, f9); ++REGISTER_DEFINITION(FloatRegister, f10); ++REGISTER_DEFINITION(FloatRegister, f11); ++REGISTER_DEFINITION(FloatRegister, f12); ++REGISTER_DEFINITION(FloatRegister, f13); ++REGISTER_DEFINITION(FloatRegister, f14); ++REGISTER_DEFINITION(FloatRegister, f15); ++REGISTER_DEFINITION(FloatRegister, f16); ++REGISTER_DEFINITION(FloatRegister, f17); ++REGISTER_DEFINITION(FloatRegister, f18); ++REGISTER_DEFINITION(FloatRegister, f19); ++REGISTER_DEFINITION(FloatRegister, f20); ++REGISTER_DEFINITION(FloatRegister, f21); ++REGISTER_DEFINITION(FloatRegister, f22); ++REGISTER_DEFINITION(FloatRegister, f23); ++REGISTER_DEFINITION(FloatRegister, f24); ++REGISTER_DEFINITION(FloatRegister, f25); ++REGISTER_DEFINITION(FloatRegister, f26); ++REGISTER_DEFINITION(FloatRegister, f27); ++REGISTER_DEFINITION(FloatRegister, f28); ++REGISTER_DEFINITION(FloatRegister, f29); ++REGISTER_DEFINITION(FloatRegister, f30); ++REGISTER_DEFINITION(FloatRegister, f31); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/registerMap_mips.hpp b/src/hotspot/cpu/mips/registerMap_mips.hpp +--- a/src/hotspot/cpu/mips/registerMap_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/registerMap_mips.hpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_REGISTERMAP_MIPS_HPP ++#define CPU_MIPS_VM_REGISTERMAP_MIPS_HPP ++ ++// machine-dependent implemention for register maps ++ friend class frame; ++ ++ private: ++#ifndef CORE ++ // This is the hook for finding a register in an "well-known" location, ++ // such as a register block of a predetermined format. ++ // Since there is none, we just return NULL. ++ // See registerMap_sparc.hpp for an example of grabbing registers ++ // from register save areas of a standard layout. ++ address pd_location(VMReg reg) const {return NULL;} ++#endif ++ ++ // no PD state to clear or copy: ++ void pd_clear() {} ++ void pd_initialize() {} ++ void pd_initialize_from(const RegisterMap* map) {} ++ ++#endif // CPU_MIPS_VM_REGISTERMAP_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/register_mips.cpp b/src/hotspot/cpu/mips/register_mips.cpp +--- a/src/hotspot/cpu/mips/register_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/register_mips.cpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "register_mips.hpp" ++ ++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1; ++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + ++ 2 * FloatRegisterImpl::number_of_registers; ++ ++const char* RegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", ++ "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", ++ "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", ++ "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} ++ ++const char* FloatRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", ++ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", ++ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", ++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", ++ }; ++ return is_valid() ? names[encoding()] : "fnoreg"; ++} ++ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/register_mips.hpp b/src/hotspot/cpu/mips/register_mips.hpp +--- a/src/hotspot/cpu/mips/register_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/register_mips.hpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,341 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_REGISTER_MIPS_HPP ++#define CPU_MIPS_VM_REGISTER_MIPS_HPP ++ ++#include "asm/register.hpp" ++#include "utilities/formatBuffer.hpp" ++ ++class VMRegImpl; ++typedef VMRegImpl* VMReg; ++ ++// Use Register as shortcut ++class RegisterImpl; ++typedef RegisterImpl* Register; ++ ++inline Register as_Register(int encoding) { ++ return (Register)(intptr_t) encoding; ++} ++ ++class RegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32 ++ }; ++ ++ // derived registers, offsets, and addresses ++ Register successor() const { return as_Register(encoding() + 1); } ++ ++ // construction ++ inline friend Register as_Register(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++}; ++ ++ ++// The integer registers of the MIPS32 architecture ++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); ++ ++ ++CONSTANT_REGISTER_DECLARATION(Register, i0, (0)); ++CONSTANT_REGISTER_DECLARATION(Register, i1, (1)); ++CONSTANT_REGISTER_DECLARATION(Register, i2, (2)); ++CONSTANT_REGISTER_DECLARATION(Register, i3, (3)); ++CONSTANT_REGISTER_DECLARATION(Register, i4, (4)); ++CONSTANT_REGISTER_DECLARATION(Register, i5, (5)); ++CONSTANT_REGISTER_DECLARATION(Register, i6, (6)); ++CONSTANT_REGISTER_DECLARATION(Register, i7, (7)); ++CONSTANT_REGISTER_DECLARATION(Register, i8, (8)); ++CONSTANT_REGISTER_DECLARATION(Register, i9, (9)); ++CONSTANT_REGISTER_DECLARATION(Register, i10, (10)); ++CONSTANT_REGISTER_DECLARATION(Register, i11, (11)); ++CONSTANT_REGISTER_DECLARATION(Register, i12, (12)); ++CONSTANT_REGISTER_DECLARATION(Register, i13, (13)); ++CONSTANT_REGISTER_DECLARATION(Register, i14, (14)); ++CONSTANT_REGISTER_DECLARATION(Register, i15, (15)); ++CONSTANT_REGISTER_DECLARATION(Register, i16, (16)); ++CONSTANT_REGISTER_DECLARATION(Register, i17, (17)); ++CONSTANT_REGISTER_DECLARATION(Register, i18, (18)); ++CONSTANT_REGISTER_DECLARATION(Register, i19, (19)); ++CONSTANT_REGISTER_DECLARATION(Register, i20, (20)); ++CONSTANT_REGISTER_DECLARATION(Register, i21, (21)); ++CONSTANT_REGISTER_DECLARATION(Register, i22, (22)); ++CONSTANT_REGISTER_DECLARATION(Register, i23, (23)); ++CONSTANT_REGISTER_DECLARATION(Register, i24, (24)); ++CONSTANT_REGISTER_DECLARATION(Register, i25, (25)); ++CONSTANT_REGISTER_DECLARATION(Register, i26, (26)); ++CONSTANT_REGISTER_DECLARATION(Register, i27, (27)); ++CONSTANT_REGISTER_DECLARATION(Register, i28, (28)); ++CONSTANT_REGISTER_DECLARATION(Register, i29, (29)); ++CONSTANT_REGISTER_DECLARATION(Register, i30, (30)); ++CONSTANT_REGISTER_DECLARATION(Register, i31, (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define NOREG ((Register)(noreg_RegisterEnumValue)) ++ ++#define I0 ((Register)(i0_RegisterEnumValue)) ++#define I1 ((Register)(i1_RegisterEnumValue)) ++#define I2 ((Register)(i2_RegisterEnumValue)) ++#define I3 ((Register)(i3_RegisterEnumValue)) ++#define I4 ((Register)(i4_RegisterEnumValue)) ++#define I5 ((Register)(i5_RegisterEnumValue)) ++#define I6 ((Register)(i6_RegisterEnumValue)) ++#define I7 ((Register)(i7_RegisterEnumValue)) ++#define I8 ((Register)(i8_RegisterEnumValue)) ++#define I9 ((Register)(i9_RegisterEnumValue)) ++#define I10 ((Register)(i10_RegisterEnumValue)) ++#define I11 ((Register)(i11_RegisterEnumValue)) ++#define I12 ((Register)(i12_RegisterEnumValue)) ++#define I13 ((Register)(i13_RegisterEnumValue)) ++#define I14 ((Register)(i14_RegisterEnumValue)) ++#define I15 ((Register)(i15_RegisterEnumValue)) ++#define I16 ((Register)(i16_RegisterEnumValue)) ++#define I17 ((Register)(i17_RegisterEnumValue)) ++#define I18 ((Register)(i18_RegisterEnumValue)) ++#define I19 ((Register)(i19_RegisterEnumValue)) ++#define I20 ((Register)(i20_RegisterEnumValue)) ++#define I21 ((Register)(i21_RegisterEnumValue)) ++#define I22 ((Register)(i22_RegisterEnumValue)) ++#define I23 ((Register)(i23_RegisterEnumValue)) ++#define I24 ((Register)(i24_RegisterEnumValue)) ++#define I25 ((Register)(i25_RegisterEnumValue)) ++#define I26 ((Register)(i26_RegisterEnumValue)) ++#define I27 ((Register)(i27_RegisterEnumValue)) ++#define I28 ((Register)(i28_RegisterEnumValue)) ++#define I29 ((Register)(i29_RegisterEnumValue)) ++#define I30 ((Register)(i30_RegisterEnumValue)) ++#define I31 ((Register)(i31_RegisterEnumValue)) ++ ++#define R0 ((Register)(i0_RegisterEnumValue)) ++#define AT ((Register)(i1_RegisterEnumValue)) ++#define V0 ((Register)(i2_RegisterEnumValue)) ++#define V1 ((Register)(i3_RegisterEnumValue)) ++#define A0 ((Register)(i4_RegisterEnumValue)) ++#define A1 ((Register)(i5_RegisterEnumValue)) ++#define A2 ((Register)(i6_RegisterEnumValue)) ++#define A3 ((Register)(i7_RegisterEnumValue)) ++#define A4 ((Register)(i8_RegisterEnumValue)) ++#define A5 ((Register)(i9_RegisterEnumValue)) ++#define A6 ((Register)(i10_RegisterEnumValue)) ++#define A7 ((Register)(i11_RegisterEnumValue)) ++#define RT0 ((Register)(i12_RegisterEnumValue)) ++#define RT1 ((Register)(i13_RegisterEnumValue)) ++#define RT2 ((Register)(i14_RegisterEnumValue)) ++#define RT3 ((Register)(i15_RegisterEnumValue)) ++#define S0 ((Register)(i16_RegisterEnumValue)) ++#define S1 ((Register)(i17_RegisterEnumValue)) ++#define S2 ((Register)(i18_RegisterEnumValue)) ++#define S3 ((Register)(i19_RegisterEnumValue)) ++#define S4 ((Register)(i20_RegisterEnumValue)) ++#define S5 ((Register)(i21_RegisterEnumValue)) ++#define S6 ((Register)(i22_RegisterEnumValue)) ++#define S7 ((Register)(i23_RegisterEnumValue)) ++#define RT8 ((Register)(i24_RegisterEnumValue)) ++#define RT9 ((Register)(i25_RegisterEnumValue)) ++#define K0 ((Register)(i26_RegisterEnumValue)) ++#define K1 ((Register)(i27_RegisterEnumValue)) ++#define GP ((Register)(i28_RegisterEnumValue)) ++#define SP ((Register)(i29_RegisterEnumValue)) ++#define FP ((Register)(i30_RegisterEnumValue)) ++#define S8 ((Register)(i30_RegisterEnumValue)) ++#define RA ((Register)(i31_RegisterEnumValue)) ++ ++#define c_rarg0 RT0 ++#define c_rarg1 RT1 ++#define Rmethod S3 ++#define Rsender S4 ++#define Rnext S1 ++ ++/* ++#define RT0 T0 ++#define RT1 T1 ++#define RT2 T2 ++#define RT3 T3 ++#define RT4 T8 ++#define RT5 T9 ++*/ ++ ++ ++//for interpreter frame ++// bytecode pointer register ++#define BCP S0 ++// local variable pointer register ++#define LVP S7 ++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM ++// be sure to save and restore its value in call_stub ++#define TSR S2 ++ ++#define OPT_THREAD 1 ++ ++#define TREG S6 ++ ++#define S5_heapbase S5 ++ ++#define mh_SP_save SP ++ ++#define FSR V0 ++#define SSR V1 ++#define FSF F0 ++#define SSF F1 ++#define FTF F14 ++#define STF F15 ++ ++#define AFT F30 ++ ++#define RECEIVER T0 ++#define IC_Klass T1 ++ ++#define SHIFT_count T3 ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Use FloatRegister as shortcut ++class FloatRegisterImpl; ++typedef FloatRegisterImpl* FloatRegister; ++ ++inline FloatRegister as_FloatRegister(int encoding) { ++ return (FloatRegister)(intptr_t) encoding; ++} ++ ++// The implementation of floating point registers for the architecture ++class FloatRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ float_arg_base = 12, ++ number_of_registers = 32 ++ }; ++ ++ // construction ++ inline friend FloatRegister as_FloatRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++ ++}; ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) ++#define F0 ((FloatRegister)( f0_FloatRegisterEnumValue)) ++#define F1 ((FloatRegister)( f1_FloatRegisterEnumValue)) ++#define F2 ((FloatRegister)( f2_FloatRegisterEnumValue)) ++#define F3 ((FloatRegister)( f3_FloatRegisterEnumValue)) ++#define F4 ((FloatRegister)( f4_FloatRegisterEnumValue)) ++#define F5 ((FloatRegister)( f5_FloatRegisterEnumValue)) ++#define F6 ((FloatRegister)( f6_FloatRegisterEnumValue)) ++#define F7 ((FloatRegister)( f7_FloatRegisterEnumValue)) ++#define F8 ((FloatRegister)( f8_FloatRegisterEnumValue)) ++#define F9 ((FloatRegister)( f9_FloatRegisterEnumValue)) ++#define F10 ((FloatRegister)( f10_FloatRegisterEnumValue)) ++#define F11 ((FloatRegister)( f11_FloatRegisterEnumValue)) ++#define F12 ((FloatRegister)( f12_FloatRegisterEnumValue)) ++#define F13 ((FloatRegister)( f13_FloatRegisterEnumValue)) ++#define F14 ((FloatRegister)( f14_FloatRegisterEnumValue)) ++#define F15 ((FloatRegister)( f15_FloatRegisterEnumValue)) ++#define F16 ((FloatRegister)( f16_FloatRegisterEnumValue)) ++#define F17 ((FloatRegister)( f17_FloatRegisterEnumValue)) ++#define F18 ((FloatRegister)( f18_FloatRegisterEnumValue)) ++#define F19 ((FloatRegister)( f19_FloatRegisterEnumValue)) ++#define F20 ((FloatRegister)( f20_FloatRegisterEnumValue)) ++#define F21 ((FloatRegister)( f21_FloatRegisterEnumValue)) ++#define F22 ((FloatRegister)( f22_FloatRegisterEnumValue)) ++#define F23 ((FloatRegister)( f23_FloatRegisterEnumValue)) ++#define F24 ((FloatRegister)( f24_FloatRegisterEnumValue)) ++#define F25 ((FloatRegister)( f25_FloatRegisterEnumValue)) ++#define F26 ((FloatRegister)( f26_FloatRegisterEnumValue)) ++#define F27 ((FloatRegister)( f27_FloatRegisterEnumValue)) ++#define F28 ((FloatRegister)( f28_FloatRegisterEnumValue)) ++#define F29 ((FloatRegister)( f29_FloatRegisterEnumValue)) ++#define F30 ((FloatRegister)( f30_FloatRegisterEnumValue)) ++#define F31 ((FloatRegister)( f31_FloatRegisterEnumValue)) ++#endif // DONT_USE_REGISTER_DEFINES ++ ++ ++const int MIPS_ARGS_IN_REGS_NUM = 4; ++ ++// Need to know the total number of registers of all sorts for SharedInfo. ++// Define a class that exports it. ++class ConcreteRegisterImpl : public AbstractRegisterImpl { ++ public: ++ enum { ++ // A big enough number for C2: all the registers plus flags ++ // This number must be large enough to cover REG_COUNT (defined by c2) registers. ++ // There is no requirement that any ordering here matches any ordering c2 gives ++ // it's optoregs. ++ number_of_registers = (RegisterImpl::number_of_registers + FloatRegisterImpl::number_of_registers) * 2 ++ }; ++ ++ static const int max_gpr; ++ static const int max_fpr; ++}; ++ ++#endif //CPU_MIPS_VM_REGISTER_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/relocInfo_mips.cpp b/src/hotspot/cpu/mips/relocInfo_mips.cpp +--- a/src/hotspot/cpu/mips/relocInfo_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/relocInfo_mips.cpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,160 @@ ++/* ++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/relocInfo.hpp" ++#include "compiler/disassembler.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/compressedOops.inline.hpp" ++#include "oops/oop.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/safepoint.hpp" ++ ++ ++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { ++ x += o; ++ typedef Assembler::WhichOperand WhichOperand; ++ WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop ++ assert(which == Assembler::disp32_operand || ++ which == Assembler::narrow_oop_operand || ++ which == Assembler::imm_operand, "format unpacks ok"); ++ if (which == Assembler::imm_operand) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(x)); ++ } ++ } else if (which == Assembler::narrow_oop_operand) { ++ // both compressed oops and compressed classes look the same ++ if (Universe::heap()->is_in_reserved((oop)x)) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode((oop)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(oop(x))), (intptr_t)(x)); ++ } ++ } else { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x)); ++ } ++ } ++ } else { ++ // Note: Use runtime_call_type relocations for call32_operand. ++ assert(0, "call32_operand not supported in MIPS64"); ++ } ++} ++ ++ ++//NOTICE HERE, this relocate is not need for MIPS, since MIPS USE abosolutly target, ++//Maybe We should FORGET CALL RELOCATION ++address Relocation::pd_call_destination(address orig_addr) { ++ intptr_t adj = 0; ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_call()) { ++ if (!ni->is_trampoline_call()) { ++ return nativeCall_at(addr())->target_addr_for_insn(); ++ } else { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline) { ++ return nativeCallTrampolineStub_at(trampoline)->destination(); ++ } else { ++ return (address) -1; ++ } ++ } ++ } else if (ni->is_jump()) { ++ return nativeGeneralJump_at(addr())->jump_destination() + adj; ++ } else if (ni->is_cond_jump()) { ++ return nativeCondJump_at(addr())->jump_destination() +adj; ++ } else { ++ tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr())); ++ Disassembler::decode(addr() - 10 * 4, addr() + 10 * 4, tty); ++ ShouldNotReachHere(); ++ return NULL; ++ } ++} ++ ++ ++void Relocation::pd_set_call_destination(address x) { ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_call()) { ++ NativeCall* call = nativeCall_at(addr()); ++ if (!ni->is_trampoline_call()) { ++ call->set_destination(x); ++ } else { ++ address trampoline_stub_addr = call->get_trampoline(); ++ if (trampoline_stub_addr != NULL) { ++ address orig = call->target_addr_for_insn(); ++ if (orig != trampoline_stub_addr) { ++ call->patch_on_trampoline(trampoline_stub_addr); ++ } ++ call->set_destination_mt_safe(x, false); ++ } ++ } ++ } else if (ni->is_jump()) ++ nativeGeneralJump_at(addr())->set_jump_destination(x); ++ else if (ni->is_cond_jump()) ++ nativeCondJump_at(addr())->set_jump_destination(x); ++ else ++ { ShouldNotReachHere(); } ++ ++ // Unresolved jumps are recognized by a destination of -1 ++ // However 64bit can't actually produce such an address ++ // and encodes a jump to self but jump_destination will ++ // return a -1 as the signal. We must not relocate this ++ // jmp or the ic code will not see it as unresolved. ++} ++ ++ ++address* Relocation::pd_address_in_code() { ++ return (address*)addr(); ++} ++ ++ ++address Relocation::pd_get_address_from_code() { ++ NativeMovConstReg* ni = nativeMovConstReg_at(addr()); ++ return (address)ni->data(); ++} ++ ++ ++ ++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++} ++ ++/* ++void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++} ++*/ ++ ++void internal_pc_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++ address target =0; ++ NativeMovConstReg* ni = nativeMovConstReg_at(addr()); ++ target = new_addr_for((address)ni->data(), src, dest); ++ ni->set_data((intptr_t)target); ++} ++ ++void metadata_Relocation::pd_fix_value(address x) { ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/relocInfo_mips.hpp b/src/hotspot/cpu/mips/relocInfo_mips.hpp +--- a/src/hotspot/cpu/mips/relocInfo_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/relocInfo_mips.hpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_RELOCINFO_MIPS_HPP ++#define CPU_MIPS_VM_RELOCINFO_MIPS_HPP ++ ++ // machine-dependent parts of class relocInfo ++ private: ++ enum { ++ // Since MIPS instructions are whole words, ++ // the two low-order offset bits can always be discarded. ++ offset_unit = 4, ++ ++ // imm_oop_operand vs. narrow_oop_operand ++ format_width = 2 ++ }; ++ ++ public: ++ ++ static bool mustIterateImmediateOopsInCode() { return false; } ++ ++#endif // CPU_MIPS_VM_RELOCINFO_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/runtime_mips_64.cpp b/src/hotspot/cpu/mips/runtime_mips_64.cpp +--- a/src/hotspot/cpu/mips/runtime_mips_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/runtime_mips_64.cpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,198 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#ifdef COMPILER2 ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "code/vmreg.hpp" ++#include "interpreter/interpreter.hpp" ++#include "opto/runtime.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/globalDefinitions.hpp" ++#include "vmreg_mips.inline.hpp" ++#endif ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++//-------------- generate_exception_blob ----------- ++// creates _exception_blob. ++// The exception blob is jumped to from a compiled method. ++// (see emit_exception_handler in sparc.ad file) ++// ++// Given an exception pc at a call we call into the runtime for the ++// handler in this method. This handler might merely restore state ++// (i.e. callee save registers) unwind the frame and jump to the ++// exception handler for the nmethod if there is no Java level handler ++// for the nmethod. ++// ++// This code is entered with a jump, and left with a jump. ++// ++// Arguments: ++// V0: exception oop ++// V1: exception pc ++// ++// Results: ++// A0: exception oop ++// A1: exception pc in caller or ??? ++// jumps to: exception handler of caller ++// ++// Note: the exception pc MUST be at a call (precise debug information) ++// ++// [stubGenerator_mips.cpp] generate_forward_exception() ++// |- V0, V1 are created ++// |- T9 <= SharedRuntime::exception_handler_for_return_address ++// `- jr T9 ++// `- the caller's exception_handler ++// `- jr OptoRuntime::exception_blob ++// `- here ++// ++void OptoRuntime::generate_exception_blob() { ++ // Capture info about frame layout ++ enum layout { ++ fp_off, ++ return_off, // slot for return address ++ framesize ++ }; ++ ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer("exception_blob", 5120, 5120); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ ++ address start = __ pc(); ++ ++ __ daddiu(SP, SP, -1 * framesize * wordSize); // Prolog! ++ ++ // this frame will be treated as the original caller method. ++ // So, the return pc should be filled with the original exception pc. ++ // ref: X86's implementation ++ __ sd(V1, SP, return_off *wordSize); // return address ++ __ sd(FP, SP, fp_off *wordSize); ++ ++ // Save callee saved registers. None for UseSSE=0, ++ // floats-only for UseSSE=1, and doubles for UseSSE=2. ++ ++ __ daddiu(FP, SP, fp_off * wordSize); ++ ++ // Store exception in Thread object. We cannot pass any arguments to the ++ // handle_exception call, since we do not want to make any assumption ++ // about the size of the frame where the exception happened in. ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ sd(V0, Address(thread, JavaThread::exception_oop_offset())); ++ __ sd(V1, Address(thread, JavaThread::exception_pc_offset())); ++ ++ // This call does all the hard work. It checks if an exception handler ++ // exists in the method. ++ // If so, it returns the handler address. ++ // If not, it prepares for stack-unwinding, restoring the callee-save ++ // registers of the frame being removed. ++ __ set_last_Java_frame(thread, NOREG, NOREG, NULL); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ __ relocate(relocInfo::internal_pc_type); ++ ++ { ++ long save_pc = (long)__ pc() + 48; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ ++ __ move(A0, thread); ++ __ patchable_set48(T9, (long)OptoRuntime::handle_exception_C); ++ __ jalr(T9); ++ __ delayed()->nop(); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap( framesize, 0 ); ++ ++ oop_maps->add_gc_map( __ offset(), map); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(thread, true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // V0: exception handler ++ ++ // We have a handler in V0, (could be deopt blob) ++ __ move(T9, V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // Get the exception ++ __ ld(A0, Address(thread, JavaThread::exception_oop_offset())); ++ // Get the exception pc in case we are deoptimized ++ __ ld(A1, Address(thread, JavaThread::exception_pc_offset())); ++#ifdef ASSERT ++ __ sd(R0, Address(thread, JavaThread::exception_handler_pc_offset())); ++ __ sd(R0, Address(thread, JavaThread::exception_pc_offset())); ++#endif ++ // Clear the exception oop so GC no longer processes it as a root. ++ __ sd(R0, Address(thread, JavaThread::exception_oop_offset())); ++ ++ // Fix seg fault when running: ++ // Eclipse + Plugin + Debug As ++ // This is the only condition where C2 calls SharedRuntime::generate_deopt_blob() ++ // ++ __ move(V0, A0); ++ __ move(V1, A1); ++ ++ // V0: exception oop ++ // T9: exception handler ++ // A1: exception pc ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ // make sure all code is generated ++ masm->flush(); ++ ++ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp +--- a/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,3879 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/debugInfoRec.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nativeInst.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klass.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/vframeArray.hpp" ++#include "vmreg_mips.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++#include ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; ++ ++class RegisterSaver { ++ enum { FPU_regs_live = 32 }; ++ // Capture info about frame layout ++ enum layout { ++#define DEF_LAYOUT_OFFS(regname) regname ## _off, regname ## H_off, ++ DEF_LAYOUT_OFFS(for_16_bytes_aligned) ++ DEF_LAYOUT_OFFS(fpr0) ++ DEF_LAYOUT_OFFS(fpr1) ++ DEF_LAYOUT_OFFS(fpr2) ++ DEF_LAYOUT_OFFS(fpr3) ++ DEF_LAYOUT_OFFS(fpr4) ++ DEF_LAYOUT_OFFS(fpr5) ++ DEF_LAYOUT_OFFS(fpr6) ++ DEF_LAYOUT_OFFS(fpr7) ++ DEF_LAYOUT_OFFS(fpr8) ++ DEF_LAYOUT_OFFS(fpr9) ++ DEF_LAYOUT_OFFS(fpr10) ++ DEF_LAYOUT_OFFS(fpr11) ++ DEF_LAYOUT_OFFS(fpr12) ++ DEF_LAYOUT_OFFS(fpr13) ++ DEF_LAYOUT_OFFS(fpr14) ++ DEF_LAYOUT_OFFS(fpr15) ++ DEF_LAYOUT_OFFS(fpr16) ++ DEF_LAYOUT_OFFS(fpr17) ++ DEF_LAYOUT_OFFS(fpr18) ++ DEF_LAYOUT_OFFS(fpr19) ++ DEF_LAYOUT_OFFS(fpr20) ++ DEF_LAYOUT_OFFS(fpr21) ++ DEF_LAYOUT_OFFS(fpr22) ++ DEF_LAYOUT_OFFS(fpr23) ++ DEF_LAYOUT_OFFS(fpr24) ++ DEF_LAYOUT_OFFS(fpr25) ++ DEF_LAYOUT_OFFS(fpr26) ++ DEF_LAYOUT_OFFS(fpr27) ++ DEF_LAYOUT_OFFS(fpr28) ++ DEF_LAYOUT_OFFS(fpr29) ++ DEF_LAYOUT_OFFS(fpr30) ++ DEF_LAYOUT_OFFS(fpr31) ++ ++ DEF_LAYOUT_OFFS(v0) ++ DEF_LAYOUT_OFFS(v1) ++ DEF_LAYOUT_OFFS(a0) ++ DEF_LAYOUT_OFFS(a1) ++ DEF_LAYOUT_OFFS(a2) ++ DEF_LAYOUT_OFFS(a3) ++ DEF_LAYOUT_OFFS(a4) ++ DEF_LAYOUT_OFFS(a5) ++ DEF_LAYOUT_OFFS(a6) ++ DEF_LAYOUT_OFFS(a7) ++ DEF_LAYOUT_OFFS(t0) ++ DEF_LAYOUT_OFFS(t1) ++ DEF_LAYOUT_OFFS(t2) ++ DEF_LAYOUT_OFFS(t3) ++ DEF_LAYOUT_OFFS(s0) ++ DEF_LAYOUT_OFFS(s1) ++ DEF_LAYOUT_OFFS(s2) ++ DEF_LAYOUT_OFFS(s3) ++ DEF_LAYOUT_OFFS(s4) ++ DEF_LAYOUT_OFFS(s5) ++ DEF_LAYOUT_OFFS(s6) ++ DEF_LAYOUT_OFFS(s7) ++ DEF_LAYOUT_OFFS(t8) ++ DEF_LAYOUT_OFFS(t9) ++ ++ DEF_LAYOUT_OFFS(gp) ++ DEF_LAYOUT_OFFS(fp) ++ DEF_LAYOUT_OFFS(return) ++ reg_save_size ++ }; ++ ++ public: ++ ++ static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false ); ++ static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); ++ static int raOffset(void) { return return_off / 2; } ++ //Rmethod ++ static int methodOffset(void) { return s3_off / 2; } ++ ++ static int v0Offset(void) { return v0_off / 2; } ++ static int v1Offset(void) { return v1_off / 2; } ++ ++ static int fpResultOffset(void) { return fpr0_off / 2; } ++ ++ // During deoptimization only the result register need to be restored ++ // all the other values have already been extracted. ++ static void restore_result_registers(MacroAssembler* masm); ++}; ++ ++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) { ++ ++ // Always make the frame size 16-byte aligned ++ int frame_size_in_bytes = round_to(additional_frame_words*wordSize + ++ reg_save_size*BytesPerInt, 16); ++ // OopMap frame size is in compiler stack slots (jint's) not bytes or words ++ int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; ++ // The caller will allocate additional_frame_words ++ int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; ++ // CodeBlob frame size is in words. ++ int frame_size_in_words = frame_size_in_bytes / wordSize; ++ *total_frame_words = frame_size_in_words; ++ ++ // save registers ++ ++ __ daddiu(SP, SP, - reg_save_size * jintSize); ++ ++ __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize); ++ __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize); ++ __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize); ++ __ sdc1(F6, SP, fpr6_off * jintSize); __ sdc1(F7, SP, fpr7_off * jintSize); ++ __ sdc1(F8, SP, fpr8_off * jintSize); __ sdc1(F9, SP, fpr9_off * jintSize); ++ __ sdc1(F10, SP, fpr10_off * jintSize); __ sdc1(F11, SP, fpr11_off * jintSize); ++ __ sdc1(F12, SP, fpr12_off * jintSize); __ sdc1(F13, SP, fpr13_off * jintSize); ++ __ sdc1(F14, SP, fpr14_off * jintSize); __ sdc1(F15, SP, fpr15_off * jintSize); ++ __ sdc1(F16, SP, fpr16_off * jintSize); __ sdc1(F17, SP, fpr17_off * jintSize); ++ __ sdc1(F18, SP, fpr18_off * jintSize); __ sdc1(F19, SP, fpr19_off * jintSize); ++ __ sdc1(F20, SP, fpr20_off * jintSize); __ sdc1(F21, SP, fpr21_off * jintSize); ++ __ sdc1(F22, SP, fpr22_off * jintSize); __ sdc1(F23, SP, fpr23_off * jintSize); ++ __ sdc1(F24, SP, fpr24_off * jintSize); __ sdc1(F25, SP, fpr25_off * jintSize); ++ __ sdc1(F26, SP, fpr26_off * jintSize); __ sdc1(F27, SP, fpr27_off * jintSize); ++ __ sdc1(F28, SP, fpr28_off * jintSize); __ sdc1(F29, SP, fpr29_off * jintSize); ++ __ sdc1(F30, SP, fpr30_off * jintSize); __ sdc1(F31, SP, fpr31_off * jintSize); ++ __ sd(V0, SP, v0_off * jintSize); __ sd(V1, SP, v1_off * jintSize); ++ __ sd(A0, SP, a0_off * jintSize); __ sd(A1, SP, a1_off * jintSize); ++ __ sd(A2, SP, a2_off * jintSize); __ sd(A3, SP, a3_off * jintSize); ++ __ sd(A4, SP, a4_off * jintSize); __ sd(A5, SP, a5_off * jintSize); ++ __ sd(A6, SP, a6_off * jintSize); __ sd(A7, SP, a7_off * jintSize); ++ __ sd(T0, SP, t0_off * jintSize); ++ __ sd(T1, SP, t1_off * jintSize); ++ __ sd(T2, SP, t2_off * jintSize); ++ __ sd(T3, SP, t3_off * jintSize); ++ __ sd(S0, SP, s0_off * jintSize); ++ __ sd(S1, SP, s1_off * jintSize); ++ __ sd(S2, SP, s2_off * jintSize); ++ __ sd(S3, SP, s3_off * jintSize); ++ __ sd(S4, SP, s4_off * jintSize); ++ __ sd(S5, SP, s5_off * jintSize); ++ __ sd(S6, SP, s6_off * jintSize); ++ __ sd(S7, SP, s7_off * jintSize); ++ ++ __ sd(T8, SP, t8_off * jintSize); ++ __ sd(T9, SP, t9_off * jintSize); ++ ++ __ sd(GP, SP, gp_off * jintSize); ++ __ sd(FP, SP, fp_off * jintSize); ++ __ sd(RA, SP, return_off * jintSize); ++ __ daddiu(FP, SP, fp_off * jintSize); ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ //OopMap* map = new OopMap( frame_words, 0 ); ++ OopMap* map = new OopMap( frame_size_in_slots, 0 ); ++ ++ ++//#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words) ++#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) ++ map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg()); ++ ++ map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg()); ++ ++#undef STACK_OFFSET ++ return map; ++} ++ ++ ++// Pop the current frame and restore all the registers that we ++// saved. ++void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { ++ __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize); ++ __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize); ++ __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize); ++ __ ldc1(F6, SP, fpr6_off * jintSize); __ ldc1(F7, SP, fpr7_off * jintSize); ++ __ ldc1(F8, SP, fpr8_off * jintSize); __ ldc1(F9, SP, fpr9_off * jintSize); ++ __ ldc1(F10, SP, fpr10_off * jintSize); __ ldc1(F11, SP, fpr11_off * jintSize); ++ __ ldc1(F12, SP, fpr12_off * jintSize); __ ldc1(F13, SP, fpr13_off * jintSize); ++ __ ldc1(F14, SP, fpr14_off * jintSize); __ ldc1(F15, SP, fpr15_off * jintSize); ++ __ ldc1(F16, SP, fpr16_off * jintSize); __ ldc1(F17, SP, fpr17_off * jintSize); ++ __ ldc1(F18, SP, fpr18_off * jintSize); __ ldc1(F19, SP, fpr19_off * jintSize); ++ __ ldc1(F20, SP, fpr20_off * jintSize); __ ldc1(F21, SP, fpr21_off * jintSize); ++ __ ldc1(F22, SP, fpr22_off * jintSize); __ ldc1(F23, SP, fpr23_off * jintSize); ++ __ ldc1(F24, SP, fpr24_off * jintSize); __ ldc1(F25, SP, fpr25_off * jintSize); ++ __ ldc1(F26, SP, fpr26_off * jintSize); __ ldc1(F27, SP, fpr27_off * jintSize); ++ __ ldc1(F28, SP, fpr28_off * jintSize); __ ldc1(F29, SP, fpr29_off * jintSize); ++ __ ldc1(F30, SP, fpr30_off * jintSize); __ ldc1(F31, SP, fpr31_off * jintSize); ++ ++ __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize); ++ __ ld(A0, SP, a0_off * jintSize); __ ld(A1, SP, a1_off * jintSize); ++ __ ld(A2, SP, a2_off * jintSize); __ ld(A3, SP, a3_off * jintSize); ++ __ ld(A4, SP, a4_off * jintSize); __ ld(A5, SP, a5_off * jintSize); ++ __ ld(A6, SP, a6_off * jintSize); __ ld(A7, SP, a7_off * jintSize); ++ __ ld(T0, SP, t0_off * jintSize); ++ __ ld(T1, SP, t1_off * jintSize); ++ __ ld(T2, SP, t2_off * jintSize); ++ __ ld(T3, SP, t3_off * jintSize); ++ __ ld(S0, SP, s0_off * jintSize); ++ __ ld(S1, SP, s1_off * jintSize); ++ __ ld(S2, SP, s2_off * jintSize); ++ __ ld(S3, SP, s3_off * jintSize); ++ __ ld(S4, SP, s4_off * jintSize); ++ __ ld(S5, SP, s5_off * jintSize); ++ __ ld(S6, SP, s6_off * jintSize); ++ __ ld(S7, SP, s7_off * jintSize); ++ ++ __ ld(T8, SP, t8_off * jintSize); ++ __ ld(T9, SP, t9_off * jintSize); ++ ++ __ ld(GP, SP, gp_off * jintSize); ++ __ ld(FP, SP, fp_off * jintSize); ++ __ ld(RA, SP, return_off * jintSize); ++ ++ __ addiu(SP, SP, reg_save_size * jintSize); ++} ++ ++// Pop the current frame and restore the registers that might be holding ++// a result. ++void RegisterSaver::restore_result_registers(MacroAssembler* masm) { ++ ++ // Just restore result register. Only used by deoptimization. By ++ // now any callee save register that needs to be restore to a c2 ++ // caller of the deoptee has been extracted into the vframeArray ++ // and will be stuffed into the c2i adapter we create for later ++ // restoration so only result registers need to be restored here. ++ ++ __ ld(V0, SP, v0_off * jintSize); ++ __ ld(V1, SP, v1_off * jintSize); ++ __ ldc1(F0, SP, fpr0_off * jintSize); ++ __ ldc1(F1, SP, fpr1_off * jintSize); ++ __ addiu(SP, SP, return_off * jintSize); ++} ++ ++// Is vector's size (in bytes) bigger than a size saved by default? ++// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. ++bool SharedRuntime::is_wide_vector(int size) { ++ return size > 16; ++} ++ ++size_t SharedRuntime::trampoline_size() { ++ return 32; ++} ++ ++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { ++ // trampoline is not in CodeCache ++ __ set64(T9, (long)destination); ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++// The java_calling_convention describes stack locations as ideal slots on ++// a frame with no abi restrictions. Since we must observe abi restrictions ++// (like the placement of the register window) the slots must be biased by ++// the following value. ++ ++static int reg2offset_in(VMReg r) { ++ // Account for saved fp and return address ++ // This should really be in_preserve_stack_slots ++ return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); ++} ++ ++static int reg2offset_out(VMReg r) { ++ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++} ++ ++// --------------------------------------------------------------------------- ++// Read the array of BasicTypes from a signature, and compute where the ++// arguments should go. Values in the VMRegPair regs array refer to 4-byte ++// quantities. Values less than SharedInfo::stack0 are registers, those above ++// refer to 4-byte stack slots. All stack slots are based off of the stack pointer ++// as framesizes are fixed. ++// VMRegImpl::stack0 refers to the first slot 0(sp). ++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register ++// up to RegisterImpl::number_of_registers) are the 32-bit ++// integer registers. ++ ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++// Note: the INPUTS in sig_bt are in units of Java argument words, which are ++// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit ++// units regardless of build. ++ ++ ++// --------------------------------------------------------------------------- ++// The compiled Java calling convention. ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++int SharedRuntime::java_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ int total_args_passed, ++ int is_outgoing) { ++ ++ // Create the mapping between argument positions and registers. ++ static const Register INT_ArgReg[Argument::n_register_parameters] = { ++ T0, A0, A1, A2, A3, A4, A5, A6 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ F12, F13, F14, F15, F16, F17, F18, F19 ++ }; ++ ++ uint args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: ++ // halves of T_LONG or T_DOUBLE ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return round_to(stk_args, 2); ++} ++ ++// Patch the callers callsite with entry to compiled code if it exists. ++static void patch_callers_callsite(MacroAssembler *masm) { ++ Label L; ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ // Schedule the branch target address early. ++ // Call into the VM to patch the caller, then jump to compiled callee ++ // V0 isn't live so capture return address while we easily can ++ __ move(V0, RA); ++ ++ __ pushad(); ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // VM needs caller's callsite ++ // VM needs target method ++ ++ __ move(A0, Rmethod); ++ __ move(A1, V0); ++ // we should preserve the return address ++ __ move(TSR, SP); ++ __ move(AT, -(StackAlignmentInBytes)); // align the stack ++ __ andr(SP, SP, AT); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), ++ relocInfo::runtime_call_type); ++ ++ __ delayed()->nop(); ++ __ move(SP, TSR); ++ __ popad(); ++ __ bind(L); ++} ++ ++static void gen_c2i_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ Label& skip_fixup) { ++ ++ // Before we get into the guts of the C2I adapter, see if we should be here ++ // at all. We've come from compiled code and are attempting to jump to the ++ // interpreter, which means the caller made a static call to get here ++ // (vcalls always get a compiled target if there is one). Check for a ++ // compiled target. If there is one, we need to patch the caller's call. ++ // However we will run interpreted if we come thru here. The next pass ++ // thru the call site will run compiled. If we ran compiled here then ++ // we can (theorectically) do endless i2c->c2i->i2c transitions during ++ // deopt/uncommon trap cycles. If we always go interpreted here then ++ // we can have at most one and don't need to play any tricks to keep ++ // from endlessly growing the stack. ++ // ++ // Actually if we detected that we had an i2c->c2i transition here we ++ // ought to be able to reset the world back to the state of the interpreted ++ // call and not bother building another interpreter arg area. We don't ++ // do that at this point. ++ ++ patch_callers_callsite(masm); ++ __ bind(skip_fixup); ++ ++#ifdef COMPILER2 ++ __ empty_FPU_stack(); ++#endif ++ //this is for native ? ++ // Since all args are passed on the stack, total_args_passed * interpreter_ ++ // stack_element_size is the ++ // space we need. ++ int extraspace = total_args_passed * Interpreter::stackElementSize; ++ ++ // stack is aligned, keep it that way ++ extraspace = round_to(extraspace, 2*wordSize); ++ ++ // Get return address ++ __ move(V0, RA); ++ // set senderSP value ++ //refer to interpreter_mips.cpp:generate_asm_entry ++ __ move(Rsender, SP); ++ __ addiu(SP, SP, -extraspace); ++ ++ // Now write the args into the outgoing interpreter space ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // st_off points to lowest address on stack. ++ int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; ++ // Say 4 args: ++ // i st_off ++ // 0 12 T_LONG ++ // 1 8 T_VOID ++ // 2 4 T_OBJECT ++ // 3 0 T_BOOL ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // memory to memory use fpu stack top ++ int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; ++ if (!r_2->is_valid()) { ++ __ ld_ptr(AT, SP, ld_off); ++ __ st_ptr(AT, SP, st_off); ++ ++ } else { ++ ++ ++ int next_off = st_off - Interpreter::stackElementSize; ++ __ ld_ptr(AT, SP, ld_off); ++ __ st_ptr(AT, SP, st_off); ++ ++ // Ref to is_Register condition ++ if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ st_ptr(AT, SP, st_off - 8); ++ } ++ } else if (r_1->is_Register()) { ++ Register r = r_1->as_Register(); ++ if (!r_2->is_valid()) { ++ __ sd(r, SP, st_off); ++ } else { ++ //FIXME, mips will not enter here ++ // long/double in gpr ++ __ sd(r, SP, st_off); ++ // In [java/util/zip/ZipFile.java] ++ // ++ // private static native long open(String name, int mode, long lastModified); ++ // private static native int getTotal(long jzfile); ++ // ++ // We need to transfer T_LONG paramenters from a compiled method to a native method. ++ // It's a complex process: ++ // ++ // Caller -> lir_static_call -> gen_resolve_stub ++ // -> -- resolve_static_call_C ++ // `- gen_c2i_adapter() [*] ++ // | ++ // `- AdapterHandlerLibrary::get_create_apapter_index ++ // -> generate_native_entry ++ // -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] ++ // ++ // In [**], T_Long parameter is stored in stack as: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // However, the sequence is reversed here: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). ++ // ++ if (sig_bt[i] == T_LONG) ++ __ sd(r, SP, st_off - 8); ++ } ++ } else if (r_1->is_FloatRegister()) { ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ swc1(fr, SP, st_off); ++ else { ++ __ sdc1(fr, SP, st_off); ++ __ sdc1(fr, SP, st_off - 8); // T_DOUBLE needs two slots ++ } ++ } ++ } ++ ++ // Schedule the branch target address early. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) ); ++ // And repush original return address ++ __ move(RA, V0); ++ __ jr (AT); ++ __ delayed()->nop(); ++} ++ ++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs) { ++ ++ // Generate an I2C adapter: adjust the I-frame to make space for the C-frame ++ // layout. Lesp was saved by the calling I-frame and will be restored on ++ // return. Meanwhile, outgoing arg space is all owned by the callee ++ // C-frame, so we can mangle it at will. After adjusting the frame size, ++ // hoist register arguments and repack other args according to the compiled ++ // code convention. Finally, end in a jump to the compiled code. The entry ++ // point address is the start of the buffer. ++ ++ // We will only enter here from an interpreted frame and never from after ++ // passing thru a c2i. Azul allowed this but we do not. If we lose the ++ // race and use a c2i we will remain interpreted for the race loser(s). ++ // This removes all sorts of headaches on the mips side and also eliminates ++ // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. ++ ++ ++ __ move(T9, SP); ++ ++ // Cut-out for having no stack args. Since up to 2 int/oop args are passed ++ // in registers, we will occasionally have no stack args. ++ int comp_words_on_stack = 0; ++ if (comp_args_on_stack) { ++ // Sig words on the stack are greater-than VMRegImpl::stack0. Those in ++ // registers are below. By subtracting stack0, we either get a negative ++ // number (all values in registers) or the maximum stack slot accessed. ++ // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); ++ // Convert 4-byte stack slots to words. ++ comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; ++ // Round up to miminum stack alignment, in wordSize ++ comp_words_on_stack = round_to(comp_words_on_stack, 2); ++ __ daddiu(SP, SP, -comp_words_on_stack * wordSize); ++ } ++ ++ // Align the outgoing SP ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ // push the return address on the stack (note that pushing, rather ++ // than storing it, yields the correct frame alignment for the callee) ++ // Put saved SP in another register ++ const Register saved_sp = V0; ++ __ move(saved_sp, T9); ++ ++ ++ // Will jump to the compiled code just as if compiled code was doing it. ++ // Pre-load the register-jump target early, to schedule it better. ++ __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset())); ++ ++ // Now generate the shuffle code. Pick up all register args and move the ++ // rest through the floating point stack top. ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ // Longs and doubles are passed in native word order, but misaligned ++ // in the 32-bit build. ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // Pick up 0, 1 or 2 words from SP+offset. ++ ++ //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); ++ // Load in argument order going down. ++ int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; ++ // Point to interpreter value (vs. tag) ++ int next_off = ld_off - Interpreter::stackElementSize; ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset (+ wordSize to ++ // account for return address ) ++ // NOTICE HERE!!!! I sub a wordSize here ++ int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; ++ //+ wordSize; ++ ++ if (!r_2->is_valid()) { ++ __ ld(AT, saved_sp, ld_off); ++ __ sd(AT, SP, st_off); ++ } else { ++ // Interpreter local[n] == MSW, local[n+1] == LSW however locals ++ // are accessed as negative so LSW is at LOW address ++ ++ // ld_off is MSW so get LSW ++ // st_off is LSW (i.e. reg.first()) ++ ++ // [./org/eclipse/swt/graphics/GC.java] ++ // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, ++ // int destX, int destY, int destWidth, int destHeight, ++ // boolean simple, ++ // int imgWidth, int imgHeight, ++ // long maskPixmap, <-- Pass T_LONG in stack ++ // int maskType); ++ // Before this modification, Eclipse displays icons with solid black background. ++ // ++ __ ld(AT, saved_sp, ld_off); ++ if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ ld(AT, saved_sp, ld_off - 8); ++ __ sd(AT, SP, st_off); ++ } ++ } else if (r_1->is_Register()) { // Register argument ++ Register r = r_1->as_Register(); ++ if (r_2->is_valid()) { ++ // Remember r_1 is low address (and LSB on mips) ++ // So r_2 gets loaded from high address regardless of the platform ++ assert(r_2->as_Register() == r_1->as_Register(), ""); ++ __ ld(r, saved_sp, ld_off); ++ ++ // ++ // For T_LONG type, the real layout is as below: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // We should load the low-8 bytes. ++ // ++ if (sig_bt[i] == T_LONG) ++ __ ld(r, saved_sp, ld_off - 8); ++ } else { ++ __ lw(r, saved_sp, ld_off); ++ } ++ } else if (r_1->is_FloatRegister()) { // Float Register ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ lwc1(fr, saved_sp, ld_off); ++ else { ++ __ ldc1(fr, saved_sp, ld_off); ++ __ ldc1(fr, saved_sp, ld_off - 8); ++ } ++ } ++ } ++ ++ // 6243940 We might end up in handle_wrong_method if ++ // the callee is deoptimized as we race thru here. If that ++ // happens we don't want to take a safepoint because the ++ // caller frame will look interpreted and arguments are now ++ // "compiled" so it is much better to make this transition ++ // invisible to the stack walking code. Unfortunately if ++ // we try and find the callee by normal means a safepoint ++ // is possible. So we stash the desired callee in the thread ++ // and the vm will find there should this case occur. ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ sd(Rmethod, thread, in_bytes(JavaThread::callee_target_offset())); ++ ++ // move methodOop to V0 in case we end up in an c2i adapter. ++ // the c2i adapters expect methodOop in V0 (c2) because c2's ++ // resolve stubs return the result (the method) in V0. ++ // I'd love to fix this. ++ __ move(V0, Rmethod); ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++// --------------------------------------------------------------- ++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ AdapterFingerPrint* fingerprint) { ++ address i2c_entry = __ pc(); ++ ++ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); ++ ++ // ------------------------------------------------------------------------- ++ // Generate a C2I adapter. On entry we know G5 holds the methodOop. The ++ // args start out packed in the compiled layout. They need to be unpacked ++ // into the interpreter layout. This will almost always require some stack ++ // space. We grow the current (compiled) stack, then repack the args. We ++ // finally end in a jump to the generic interpreter entry point. On exit ++ // from the interpreter, the interpreter will restore our SP (lest the ++ // compiled code, which relys solely on SP and not FP, get sick). ++ ++ address c2i_unverified_entry = __ pc(); ++ Label skip_fixup; ++ { ++ Register holder = T1; ++ Register receiver = T0; ++ Register temp = T8; ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ Label missed; ++ ++ //add for compressedoops ++ __ load_klass(temp, receiver); ++ ++ __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset()); ++ __ bne(AT, temp, missed); ++ __ delayed()->nop(); ++ // Method might have been compiled since the call site was patched to ++ // interpreted if that is the case treat it as a miss so we can get ++ // the call site corrected. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, skip_fixup); ++ __ delayed()->nop(); ++ __ bind(missed); ++ ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ ++ address c2i_entry = __ pc(); ++ ++ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); ++ ++ __ flush(); ++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); ++} ++ ++int SharedRuntime::c_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ VMRegPair *regs2, ++ int total_args_passed) { ++ assert(regs2 == NULL, "not needed on MIPS"); ++ // Return the number of VMReg stack_slots needed for the args. ++ // This value does not include an abi space (like register window ++ // save area). ++ ++ // We return the amount of VMReg stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. Since we always ++ // have space for storing at least 6 registers to memory we start with that. ++ // See int_stk_helper for a further discussion. ++ // We return the amount of VMRegImpl stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. ++ static const Register INT_ArgReg[Argument::n_register_parameters] = { ++ A0, A1, A2, A3, A4, A5, A6, A7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ F12, F13, F14, F15, F16, F17, F18, F19 ++ }; ++ uint args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++// Example: ++// n java.lang.UNIXProcess::forkAndExec ++// private native int forkAndExec(byte[] prog, ++// byte[] argBlock, int argc, ++// byte[] envBlock, int envc, ++// byte[] dir, ++// boolean redirectErrorStream, ++// FileDescriptor stdin_fd, ++// FileDescriptor stdout_fd, ++// FileDescriptor stderr_fd) ++// JNIEXPORT jint JNICALL ++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env, ++// jobject process, ++// jbyteArray prog, ++// jbyteArray argBlock, jint argc, ++// jbyteArray envBlock, jint envc, ++// jbyteArray dir, ++// jboolean redirectErrorStream, ++// jobject stdin_fd, ++// jobject stdout_fd, ++// jobject stderr_fd) ++// ++// ::c_calling_convention ++// 0: // env <-- a0 ++// 1: L // klass/obj <-- t0 => a1 ++// 2: [ // prog[] <-- a0 => a2 ++// 3: [ // argBlock[] <-- a1 => a3 ++// 4: I // argc <-- a2 => a4 ++// 5: [ // envBlock[] <-- a3 => a5 ++// 6: I // envc <-- a4 => a5 ++// 7: [ // dir[] <-- a5 => a7 ++// 8: Z // redirectErrorStream <-- a6 => sp[0] ++// 9: L // stdin fp[16] => sp[8] ++// 10: L // stdout fp[24] => sp[16] ++// 11: L // stderr fp[32] => sp[24] ++// ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: // Halves of longs and doubles ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ case T_METADATA: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return round_to(stk_args, 2); ++} ++ ++// --------------------------------------------------------------------------- ++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ swc1(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ sdc1(FSF, FP, -wordSize ); ++ break; ++ case T_VOID: break; ++ case T_LONG: ++ __ sd(V0, FP, -wordSize); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ sd(V0, FP, -wordSize); ++ break; ++ default: { ++ __ sw(V0, FP, -wordSize); ++ } ++ } ++} ++ ++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ lwc1(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ ldc1(FSF, FP, -wordSize ); ++ break; ++ case T_LONG: ++ __ ld(V0, FP, -wordSize); ++ break; ++ case T_VOID: break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ ld(V0, FP, -wordSize); ++ break; ++ default: { ++ __ lw(V0, FP, -wordSize); ++ } ++ } ++} ++ ++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = first_arg ; i < arg_count ; i++ ) { ++ if (args[i].first()->is_Register()) { ++ __ push(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ push(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { ++ if (args[i].first()->is_Register()) { ++ __ pop(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ pop(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++// A simple move of integer like type ++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ lw(AT, FP, reg2offset_in(src.first())); ++ __ sd(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ // stack to reg ++ __ lw(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ if (dst.first() != src.first()){ ++ __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first() ++ } ++ } ++} ++ ++// An oop arg. Must pass a handle not the oop itself ++static void object_move(MacroAssembler* masm, ++ OopMap* map, ++ int oop_handle_offset, ++ int framesize_in_slots, ++ VMRegPair src, ++ VMRegPair dst, ++ bool is_receiver, ++ int* receiver_offset) { ++ ++ // must pass a handle. First figure out the location we use as a handle ++ ++ //FIXME, for mips, dst can be register ++ if (src.first()->is_stack()) { ++ // Oop is already on the stack as an argument ++ Register rHandle = V0; ++ Label nil; ++ __ xorr(rHandle, rHandle, rHandle); ++ __ ld(AT, FP, reg2offset_in(src.first())); ++ __ beq(AT, R0, nil); ++ __ delayed()->nop(); ++ __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); ++ __ bind(nil); ++ if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move( (dst.first())->as_Register(), rHandle); ++ //if dst is register ++ //FIXME, do mips need out preserve stack slots? ++ int offset_in_older_frame = src.first()->reg2stack() ++ + SharedRuntime::out_preserve_stack_slots(); ++ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); ++ if (is_receiver) { ++ *receiver_offset = (offset_in_older_frame ++ + framesize_in_slots) * VMRegImpl::stack_slot_size; ++ } ++ } else { ++ // Oop is in an a register we must store it to the space we reserve ++ // on the stack for oop_handles ++ const Register rOop = src.first()->as_Register(); ++ assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); ++ const Register rHandle = V0; ++ //Important: refer to java_calling_convertion ++ int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; ++ int offset = oop_slot*VMRegImpl::stack_slot_size; ++ Label skip; ++ __ sd( rOop , SP, offset ); ++ map->set_oop(VMRegImpl::stack2reg(oop_slot)); ++ __ xorr( rHandle, rHandle, rHandle); ++ __ beq(rOop, R0, skip); ++ __ delayed()->nop(); ++ __ lea(rHandle, Address(SP, offset)); ++ __ bind(skip); ++ // Store the handle parameter ++ if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move((dst.first())->as_Register(), rHandle); ++ //if dst is register ++ ++ if (is_receiver) { ++ *receiver_offset = offset; ++ } ++ } ++} ++ ++// A float arg may have to do float reg int reg conversion ++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); ++ ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ __ lw(AT, FP, reg2offset_in(src.first())); ++ __ sw(AT, SP, reg2offset_out(dst.first())); ++ } ++ else ++ __ lwc1(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } else { ++ // reg to stack ++ if(dst.first()->is_stack()) ++ __ swc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ else ++ __ mov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } ++} ++ ++// A long move ++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibility for a long_move VMRegPair is: ++ // 1: two stack slots (possibly unaligned) ++ // as neither the java or C calling convention will use registers ++ // for longs. ++ ++ if (src.first()->is_stack()) { ++ assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); ++ if( dst.first()->is_stack()){ ++ __ ld(AT, FP, reg2offset_in(src.first())); ++ __ sd(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first())); ++ } ++ } else { ++ if( dst.first()->is_stack()){ ++ __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ __ move( (dst.first())->as_Register() , (src.first())->as_Register()); ++ } ++ } ++} ++ ++// A double move ++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibilities for a double_move VMRegPair are: ++ // The painful thing here is that like long_move a VMRegPair might be ++ ++ // Because of the calling convention we know that src is either ++ // 1: a single physical register (xmm registers only) ++ // 2: two stack slots (possibly unaligned) ++ // dst can only be a pair of stack slots. ++ ++ ++ if (src.first()->is_stack()) { ++ // source is all stack ++ if( dst.first()->is_stack()){ ++ __ ld(AT, FP, reg2offset_in(src.first())); ++ __ sd(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } ++ ++ } else { ++ // reg to stack ++ // No worries about stack alignment ++ if( dst.first()->is_stack()){ ++ __ sdc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ } ++ else ++ __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ ++ } ++} ++ ++static void verify_oop_args(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ Register temp_reg = T9; // not part of any compiled calling seq ++ if (VerifyOops) { ++ for (int i = 0; i < method->size_of_parameters(); i++) { ++ if (sig_bt[i] == T_OBJECT || ++ sig_bt[i] == T_ARRAY) { ++ VMReg r = regs[i].first(); ++ assert(r->is_valid(), "bad oop arg"); ++ if (r->is_stack()) { ++ __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); ++ __ verify_oop(temp_reg); ++ } else { ++ __ verify_oop(r->as_Register()); ++ } ++ } ++ } ++ } ++} ++ ++static void gen_special_dispatch(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ verify_oop_args(masm, method, sig_bt, regs); ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ ++ // Now write the args into the outgoing interpreter space ++ bool has_receiver = false; ++ Register receiver_reg = noreg; ++ int member_arg_pos = -1; ++ Register member_reg = noreg; ++ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); ++ if (ref_kind != 0) { ++ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument ++ member_reg = S3; // known to be free at this point ++ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); ++ } else if (iid == vmIntrinsics::_invokeBasic) { ++ has_receiver = true; ++ } else { ++ fatal("unexpected intrinsic id %d", iid); ++ } ++ ++ if (member_reg != noreg) { ++ // Load the member_arg into register, if necessary. ++ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); ++ VMReg r = regs[member_arg_pos].first(); ++ if (r->is_stack()) { ++ __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ member_reg = r->as_Register(); ++ } ++ } ++ ++ if (has_receiver) { ++ // Make sure the receiver is loaded into a register. ++ assert(method->size_of_parameters() > 0, "oob"); ++ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); ++ VMReg r = regs[0].first(); ++ assert(r->is_valid(), "bad receiver arg"); ++ if (r->is_stack()) { ++ // Porting note: This assumes that compiled calling conventions always ++ // pass the receiver oop in a register. If this is not true on some ++ // platform, pick a temp and load the receiver from stack. ++ fatal("receiver always in a register"); ++ receiver_reg = SSR; // known to be free at this point ++ __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ receiver_reg = r->as_Register(); ++ } ++ } ++ ++ // Figure out which address we are really jumping to: ++ MethodHandles::generate_method_handle_dispatch(masm, iid, ++ receiver_reg, member_reg, /*for_compiler_entry:*/ true); ++} ++ ++// --------------------------------------------------------------------------- ++// Generate a native wrapper for a given method. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// convention (handlizes oops, etc), transitions to native, makes the call, ++// returns to java state (possibly blocking), unhandlizes any result and ++// returns. ++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, ++ const methodHandle& method, ++ int compile_id, ++ BasicType* in_sig_bt, ++ VMRegPair* in_regs, ++ BasicType ret_type, ++ address critical_entry) { ++ if (method->is_method_handle_intrinsic()) { ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ intptr_t start = (intptr_t)__ pc(); ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ gen_special_dispatch(masm, ++ method, ++ in_sig_bt, ++ in_regs); ++ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period ++ __ flush(); ++ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually ++ return nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ in_ByteSize(-1), ++ in_ByteSize(-1), ++ (OopMapSet*)NULL); ++ } ++ bool is_critical_native = true; ++ address native_func = critical_entry; ++ if (native_func == NULL) { ++ native_func = method->native_function(); ++ is_critical_native = false; ++ } ++ assert(native_func != NULL, "must have function"); ++ ++ // Native nmethod wrappers never take possesion of the oop arguments. ++ // So the caller will gc the arguments. The only thing we need an ++ // oopMap for is if the call is static ++ // ++ // An OopMap for lock (and class if static), and one for the VM call itself ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the jni function will expect them. To figure out where they go ++ // we convert the java signature to a C signature by inserting ++ // the hidden arguments as arg[0] and possibly arg[1] (static method) ++ ++ const int total_in_args = method->size_of_parameters(); ++ int total_c_args = total_in_args; ++ if (!is_critical_native) { ++ total_c_args += 1; ++ if (method->is_static()) { ++ total_c_args++; ++ } ++ } else { ++ for (int i = 0; i < total_in_args; i++) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ total_c_args++; ++ } ++ } ++ } ++ ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); ++ BasicType* in_elem_bt = NULL; ++ ++ int argc = 0; ++ if (!is_critical_native) { ++ out_sig_bt[argc++] = T_ADDRESS; ++ if (method->is_static()) { ++ out_sig_bt[argc++] = T_OBJECT; ++ } ++ ++ for (int i = 0; i < total_in_args ; i++ ) { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ } ++ } else { ++ Thread* THREAD = Thread::current(); ++ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); ++ SignatureStream ss(method->signature()); ++ for (int i = 0; i < total_in_args ; i++ ) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ // Arrays are passed as int, elem* pair ++ out_sig_bt[argc++] = T_INT; ++ out_sig_bt[argc++] = T_ADDRESS; ++ Symbol* atype = ss.as_symbol(CHECK_NULL); ++ const char* at = atype->as_C_string(); ++ if (strlen(at) == 2) { ++ assert(at[0] == '[', "must be"); ++ switch (at[1]) { ++ case 'B': in_elem_bt[i] = T_BYTE; break; ++ case 'C': in_elem_bt[i] = T_CHAR; break; ++ case 'D': in_elem_bt[i] = T_DOUBLE; break; ++ case 'F': in_elem_bt[i] = T_FLOAT; break; ++ case 'I': in_elem_bt[i] = T_INT; break; ++ case 'J': in_elem_bt[i] = T_LONG; break; ++ case 'S': in_elem_bt[i] = T_SHORT; break; ++ case 'Z': in_elem_bt[i] = T_BOOLEAN; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } else { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ in_elem_bt[i] = T_VOID; ++ } ++ if (in_sig_bt[i] != T_VOID) { ++ assert(in_sig_bt[i] == ss.type(), "must match"); ++ ss.next(); ++ } ++ } ++ } ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ // ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Compute framesize for the wrapper. We need to handlize all oops in ++ // registers. We must create space for them here that is disjoint from ++ // the windowed save area because we have no control over when we might ++ // flush the window again and overwrite values that gc has since modified. ++ // (The live window race) ++ // ++ // We always just allocate 6 word for storing down these object. This allow ++ // us to simply record the base and use the Ireg number to decide which ++ // slot to use. (Note that the reg number is the inbound number not the ++ // outbound number). ++ // We must shuffle args to match the native convention, and include var-args space. ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Now the space for the inbound oop handle area ++ int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers ++ if (is_critical_native) { ++ // Critical natives may have to call out so they need a save area ++ // for register arguments. ++ int double_slots = 0; ++ int single_slots = 0; ++ for ( int i = 0; i < total_in_args; i++) { ++ if (in_regs[i].first()->is_Register()) { ++ const Register reg = in_regs[i].first()->as_Register(); ++ switch (in_sig_bt[i]) { ++ case T_BOOLEAN: ++ case T_BYTE: ++ case T_SHORT: ++ case T_CHAR: ++ case T_INT: single_slots++; break; ++ case T_ARRAY: ++ case T_LONG: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ switch (in_sig_bt[i]) { ++ case T_FLOAT: single_slots++; break; ++ case T_DOUBLE: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } ++ total_save_slots = double_slots * 2 + single_slots; ++ // align the save area ++ if (double_slots != 0) { ++ stack_slots = round_to(stack_slots, 2); ++ } ++ } ++ ++ int oop_handle_offset = stack_slots; ++ stack_slots += total_save_slots; ++ ++ // Now any space we need for handlizing a klass if static method ++ ++ int klass_slot_offset = 0; ++ int klass_offset = -1; ++ int lock_slot_offset = 0; ++ bool is_static = false; ++ ++ if (method->is_static()) { ++ klass_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; ++ is_static = true; ++ } ++ ++ // Plus a lock if needed ++ ++ if (method->is_synchronized()) { ++ lock_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ } ++ ++ // Now a place to save return value or as a temporary for any gpr -> fpr moves ++ // + 2 for return address (which we own) and saved fp ++ stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // |---------------------| ++ // | 2 slots for moves | ++ // |---------------------| ++ // | lock box (if sync) | ++ // |---------------------| <- lock_slot_offset ++ // | klass (if static) | ++ // |---------------------| <- klass_slot_offset ++ // | oopHandle area | ++ // |---------------------| <- oop_handle_offset ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | vararg area | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = round_to(stack_slots, StackAlignmentInSlots); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ ++ intptr_t start = (intptr_t)__ pc(); ++ ++ ++ ++ // First thing make an ic check to see if we should even be here ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ // We are free to use all registers as temps without saving them and ++ // restoring them except fp. fp is the only callee save register ++ // as far as the interpreter and the compiler(s) are concerned. ++ ++ //refer to register_mips.hpp:IC_Klass ++ const Register ic_reg = T1; ++ const Register receiver = T0; ++ ++ Label hit; ++ Label exception_pending; ++ ++ __ verify_oop(receiver); ++ //add for compressedoops ++ __ load_klass(T9, receiver); ++ __ beq(T9, ic_reg, hit); ++ __ delayed()->nop(); ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ bind(hit); ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ ++ // Generate stack overflow check ++ if (UseStackBanging) { ++ __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size()); ++ } ++ ++ // Generate a new frame for the wrapper. ++ // do mips need this ? ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ __ enter(); ++ // -2 because return address is already present and so is saved fp ++ __ addiu(SP, SP, -1 * (stack_size - 2*wordSize)); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++ // Calculate the difference between sp and fp. We need to know it ++ // after the native call because on windows Java Natives will pop ++ // the arguments and it is painful to do sp relative addressing ++ // in a platform independent way. So after the call we switch to ++ // fp relative addressing. ++ //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change ++ //the SP ++ int fp_adjustment = stack_size - 2*wordSize; ++ ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // Compute the fp offset for any slots used after the jni call ++ ++ int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; ++ // We use TREG as a thread pointer because it is callee save and ++ // if we load it once it is usable thru the entire wrapper ++ const Register thread = TREG; ++ ++ // We use S4 as the oop handle for the receiver/klass ++ // It is callee save so it survives the call to native ++ ++ const Register oop_handle_reg = S4; ++ if (is_critical_native) { ++ Unimplemented(); ++ // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, ++ // oop_handle_offset, oop_maps, in_regs, in_sig_bt); ++ } ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // ++ // We immediately shuffle the arguments so that any vm call we have to ++ // make from here on out (sync slow path, jvmpi, etc.) we will have ++ // captured the oops from our caller and have a valid oopMap for ++ // them. ++ ++ // ----------------- ++ // The Grand Shuffle ++ // ++ // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* ++ // and, if static, the class mirror instead of a receiver. This pretty much ++ // guarantees that register layout will not match (and mips doesn't use reg ++ // parms though amd does). Since the native abi doesn't use register args ++ // and the java conventions does we don't have to worry about collisions. ++ // All of our moved are reg->stack or stack->stack. ++ // We ignore the extra arguments during the shuffle and handle them at the ++ // last moment. The shuffle is described by the two calling convention ++ // vectors we have in our possession. We simply walk the java vector to ++ // get the source locations and the c vector to get the destinations. ++ ++ int c_arg = method->is_static() ? 2 : 1 ; ++ ++ // Record sp-based slot for receiver on stack for non-static methods ++ int receiver_offset = -1; ++ ++ // This is a trick. We double the stack slots so we can claim ++ // the oops in the caller's frame. Since we are sure to have ++ // more args than the caller doubling is enough to make ++ // sure we can capture all the incoming oop args from the ++ // caller. ++ // ++ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); ++ ++ // Mark location of fp (someday) ++ // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp)); ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ // This may iterate in two different directions depending on the ++ // kind of native it is. The reason is that for regular JNI natives ++ // the incoming and outgoing registers are offset upwards and for ++ // critical natives they are offset down. ++ GrowableArray arg_order(2 * total_in_args); ++ VMRegPair tmp_vmreg; ++ tmp_vmreg.set2(T8->as_VMReg()); ++ ++ if (!is_critical_native) { ++ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { ++ arg_order.push(i); ++ arg_order.push(c_arg); ++ } ++ } else { ++ // Compute a valid move order, using tmp_vmreg to break any cycles ++ Unimplemented(); ++ // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); ++ } ++ ++ int temploc = -1; ++ for (int ai = 0; ai < arg_order.length(); ai += 2) { ++ int i = arg_order.at(ai); ++ int c_arg = arg_order.at(ai + 1); ++ __ block_comment(err_msg("move %d -> %d", i, c_arg)); ++ if (c_arg == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // This arg needs to be moved to a temporary ++ __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); ++ in_regs[i] = tmp_vmreg; ++ temploc = i; ++ continue; ++ } else if (i == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // Read from the temporary location ++ assert(temploc != -1, "must be valid"); ++ i = temploc; ++ temploc = -1; ++ } ++#ifdef ASSERT ++ if (in_regs[i].first()->is_Register()) { ++ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); ++ } ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif /* ASSERT */ ++ switch (in_sig_bt[i]) { ++ case T_ARRAY: ++ if (is_critical_native) { ++ Unimplemented(); ++ // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); ++ c_arg++; ++#ifdef ASSERT ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif ++ break; ++ } ++ case T_OBJECT: ++ assert(!is_critical_native, "no oop arguments"); ++ object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ++ ((i == 0) && (!is_static)), ++ &receiver_offset); ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ float_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_DOUBLE: ++ assert( i + 1 < total_in_args && ++ in_sig_bt[i + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ double_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_LONG : ++ long_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ simple_move32(masm, in_regs[i], out_regs[c_arg]); ++ } ++ } ++ ++ // point c_arg at the first arg that is already loaded in case we ++ // need to spill before we call out ++ c_arg = total_c_args - total_in_args; ++ // Pre-load a static method's oop. Used both by locking code and ++ // the normal JNI call code. ++ ++ __ move(oop_handle_reg, A1); ++ ++ if (method->is_static() && !is_critical_native) { ++ ++ // load opp into a register ++ int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( ++ (method->method_holder())->java_mirror())); ++ ++ ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ __ relocate(rspec); ++ __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); ++ // Now handlize the static class mirror it's known not-null. ++ __ sd( oop_handle_reg, SP, klass_offset); ++ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); ++ ++ // Now get the handle ++ __ lea(oop_handle_reg, Address(SP, klass_offset)); ++ // store the klass handle as second argument ++ __ move(A1, oop_handle_reg); ++ // and protect the arg if we must spill ++ c_arg--; ++ } ++ ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a a stack traversal). It is enough that the pc() ++ // points into the right code segment. It does not have to be the correct return pc. ++ // We use the same pc/oopMap repeatedly when we call out ++ ++ intptr_t the_pc = (intptr_t) __ pc(); ++ oop_maps->add_gc_map(the_pc - start, map); ++ ++ __ set_last_Java_frame(SP, noreg, NULL); ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)the_pc ; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ ++ // We have all of the arguments setup at this point. We must not touch any register ++ // argument registers at this point (what if we save/restore them there are no oop? ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ save_args(masm, total_c_args, c_arg, out_regs); ++ int metadata_index = __ oop_recorder()->find_index(method()); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_set48(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ thread, AT); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ } ++ ++ // These are register definitions we need for locking/unlocking ++ const Register swap_reg = T8; // Must use T8 for cmpxchg instruction ++ const Register obj_reg = T9; // Will contain the oop ++ //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) ++ const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) ++ ++ ++ ++ Label slow_path_lock; ++ Label lock_done; ++ ++ // Lock a synchronized method ++ if (method->is_synchronized()) { ++ assert(!is_critical_native, "unhandled"); ++ ++ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Get the handle (the 2nd argument) ++ __ move(oop_handle_reg, A1); ++ ++ // Get address of the box ++ __ lea(lock_reg, Address(FP, lock_slot_fp_offset)); ++ ++ // Load the oop from the handle ++ __ ld(obj_reg, oop_handle_reg, 0); ++ ++ if (UseBiasedLocking) { ++ // Note that oop_handle_reg is trashed during this call ++ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock); ++ } ++ ++ // Load immediate 1 into swap_reg %T8 ++ __ move(swap_reg, 1); ++ ++ __ ld(AT, obj_reg, 0); ++ __ orr(swap_reg, swap_reg, AT); ++ ++ __ sd(swap_reg, lock_reg, mark_word_offset); ++ __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done); ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg ++ ++ __ dsubu(swap_reg, swap_reg, SP); ++ __ move(AT, 3 - os::vm_page_size()); ++ __ andr(swap_reg , swap_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ __ sd(swap_reg, lock_reg, mark_word_offset); ++ __ bne(swap_reg, R0, slow_path_lock); ++ __ delayed()->nop(); ++ // Slow path will re-enter here ++ __ bind(lock_done); ++ ++ if (UseBiasedLocking) { ++ // Re-fetch oop_handle_reg as we trashed it above ++ __ move(A1, oop_handle_reg); ++ } ++ } ++ ++ ++ // Finally just about ready to make the JNI call ++ ++ ++ // get JNIEnv* which is first argument to native ++ if (!is_critical_native) { ++ __ addiu(A0, thread, in_bytes(JavaThread::jni_environment_offset())); ++ } ++ ++ // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob) ++ // Load the second arguments into A1 ++ //__ ld(A1, SP , wordSize ); // klass ++ ++ // Now set thread in native ++ __ addiu(AT, R0, _thread_in_native); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ // do the call ++ __ call(native_func, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ // WARNING - on Windows Java Natives use pascal calling convention and pop the ++ // arguments off of the stack. We could just re-adjust the stack pointer here ++ // and continue to do SP relative addressing but we instead switch to FP ++ // relative addressing. ++ ++ // Unpack native results. ++ switch (ret_type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ andi(V0, V0, 0xFFFF); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : // nothing to do break; ++ case T_DOUBLE : ++ case T_FLOAT : ++ // Result is in st0 we'll save as needed ++ break; ++ case T_ARRAY: // Really a handle ++ case T_OBJECT: // Really a handle ++ break; // can't de-handlize until after safepoint check ++ case T_VOID: break; ++ case T_LONG: break; ++ default : ShouldNotReachHere(); ++ } ++ // Switch thread to "native transition" state before reading the synchronization state. ++ // This additional state is necessary because reading and testing the synchronization ++ // state is not atomic w.r.t. GC, as this scenario demonstrates: ++ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. ++ // VM thread changes sync state to synchronizing and suspends threads for GC. ++ // Thread A is resumed to finish this native method, but doesn't block here since it ++ // didn't see any synchronization is progress, and escapes. ++ __ addiu(AT, R0, _thread_in_native_trans); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) { ++ if (UseMembar) { ++ // Force this write out before the read below ++ __ sync(); ++ } else { ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(thread, A0); ++ } ++ } ++ ++ Label after_transition; ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { ++ Label Continue; ++ Label slow_path; ++ ++ __ safepoint_poll_acquire(slow_path, thread); ++ __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ delayed()->nop(); ++ __ bind(slow_path); ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // ++ save_native_result(masm, ret_type, stack_slots); ++ __ move(A0, thread); ++ __ addiu(SP, SP, -wordSize); ++ __ push(S2); ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ if (!is_critical_native) { ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } else { ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ __ move(SP, S2); // use S2 as a sender SP holder ++ __ pop(S2); ++ __ addiu(SP, SP, wordSize); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ // Restore any method result value ++ restore_native_result(masm, ret_type, stack_slots); ++ ++ if (is_critical_native) { ++ // The call above performed the transition to thread_in_Java so ++ // skip the transition logic below. ++ __ beq(R0, R0, after_transition); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ addiu(AT, R0, _thread_in_Java); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ bind(after_transition); ++ Label reguard; ++ Label reguard_done; ++ __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ addiu(AT, AT, -JavaThread::stack_guard_yellow_reserved_disabled); ++ __ beq(AT, R0, reguard); ++ __ delayed()->nop(); ++ // slow path reguard re-enters here ++ __ bind(reguard_done); ++ ++ // Handle possible exception (will unlock if necessary) ++ ++ // native result if any is live ++ ++ // Unlock ++ Label slow_path_unlock; ++ Label unlock_done; ++ if (method->is_synchronized()) { ++ ++ Label done; ++ ++ // Get locked oop from the handle we passed to jni ++ __ ld( obj_reg, oop_handle_reg, 0); ++ if (UseBiasedLocking) { ++ __ biased_locking_exit(obj_reg, T8, done); ++ ++ } ++ ++ // Simple recursive lock? ++ ++ __ ld(AT, FP, lock_slot_fp_offset); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ // Must save FSF if if it is live now because cmpxchg must use it ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // get old displaced header ++ __ ld (T8, FP, lock_slot_fp_offset); ++ // get address of the stack lock ++ __ addiu(c_rarg0, FP, lock_slot_fp_offset); ++ // Atomic swap old header if oop still contains the stack lock ++ __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock); ++ ++ // slow path re-enters here ++ __ bind(unlock_done); ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ __ bind(done); ++ ++ } ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ // Tell dtrace about this method exit ++ save_native_result(masm, ret_type, stack_slots); ++ int metadata_index = __ oop_recorder()->find_index( (method())); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_set48(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ thread, AT); ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // We can finally stop using that last_Java_frame we setup ages ago ++ ++ __ reset_last_Java_frame(false); ++ ++ // Unpack oop result, e.g. JNIHandles::resolve value. ++ if (ret_type == T_OBJECT || ret_type == T_ARRAY) { ++ __ resolve_jobject(V0, thread, T9); ++ } ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ if (!is_critical_native) { ++ // reset handle block ++ __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes()); ++ } ++ ++ if (!is_critical_native) { ++ // Any exception pending? ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, exception_pending); ++ __ delayed()->nop(); ++ } ++ // no exception, we're almost done ++ ++ // check that only result value is on FPU stack ++ __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); ++ ++ // Return ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ leave(); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ // Unexpected paths are out of line and go here ++ // Slow path locking & unlocking ++ if (method->is_synchronized()) { ++ ++ // BEGIN Slow path lock ++ __ bind(slow_path_lock); ++ ++ // protect the args we've loaded ++ save_args(masm, total_c_args, c_arg, out_regs); ++ ++ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM ++ // args are (oop obj, BasicLock* lock, JavaThread* thread) ++ ++ __ move(A0, obj_reg); ++ __ move(A1, lock_reg); ++ __ move(A2, thread); ++ __ addiu(SP, SP, - 3*wordSize); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ move(SP, S2); ++ __ addiu(SP, SP, 3*wordSize); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("no pending exception allowed on exit from monitorenter"); ++ __ bind(L); ++ } ++#endif ++ __ b(lock_done); ++ __ delayed()->nop(); ++ // END Slow path lock ++ ++ // BEGIN Slow path unlock ++ __ bind(slow_path_unlock); ++ ++ // Slow path unlock ++ ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ // Save pending exception around call to VM (which contains an EXCEPTION_MARK) ++ ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ push(AT); ++ __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ ++ // should be a peal ++ // +wordSize because of the push above ++ __ addiu(A1, FP, lock_slot_fp_offset); ++ ++ __ move(A0, obj_reg); ++ __ move(A2, thread); ++ __ addiu(SP, SP, -2*wordSize); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), ++ relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ addiu(SP, SP, 2*wordSize); ++ __ move(SP, S2); ++ //add for compressedoops ++ __ reinit_heapbase(); ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld( AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); ++ __ bind(L); ++ } ++#endif /* ASSERT */ ++ ++ __ pop(AT); ++ __ sd(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ __ b(unlock_done); ++ __ delayed()->nop(); ++ // END Slow path unlock ++ ++ } ++ ++ // SLOW PATH Reguard the stack if needed ++ ++ __ bind(reguard); ++ save_native_result(masm, ret_type, stack_slots); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), ++ relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ restore_native_result(masm, ret_type, stack_slots); ++ __ b(reguard_done); ++ __ delayed()->nop(); ++ ++ // BEGIN EXCEPTION PROCESSING ++ if (!is_critical_native) { ++ // Forward the exception ++ __ bind(exception_pending); ++ ++ // remove possible return value from FPU register stack ++ __ empty_FPU_stack(); ++ ++ // pop our frame ++ //forward_exception_entry need return address on stack ++ __ move(SP, FP); ++ __ pop(FP); ++ ++ // and forward the exception ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ __ flush(); ++ ++ nmethod *nm = nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), ++ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), ++ oop_maps); ++ ++ if (is_critical_native) { ++ nm->set_lazy_critical_native(true); ++ } ++ ++ return nm; ++ ++} ++ ++#ifdef HAVE_DTRACE_H ++// --------------------------------------------------------------------------- ++// Generate a dtrace nmethod for a given signature. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// abi and then leaves nops at the position you would expect to call a native ++// function. When the probe is enabled the nops are replaced with a trap ++// instruction that dtrace inserts and the trace will cause a notification ++// to dtrace. ++// ++// The probes are only able to take primitive types and java/lang/String as ++// arguments. No other java types are allowed. Strings are converted to utf8 ++// strings so that from dtrace point of view java strings are converted to C ++// strings. There is an arbitrary fixed limit on the total space that a method ++// can use for converting the strings. (256 chars per string in the signature). ++// So any java string larger then this is truncated. ++ ++static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; ++static bool offsets_initialized = false; ++ ++static VMRegPair reg64_to_VMRegPair(Register r) { ++ VMRegPair ret; ++ if (wordSize == 8) { ++ ret.set2(r->as_VMReg()); ++ } else { ++ ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); ++ } ++ return ret; ++} ++ ++ ++nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm, ++ methodHandle method) { ++ ++ ++ // generate_dtrace_nmethod is guarded by a mutex so we are sure to ++ // be single threaded in this method. ++ assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); ++ ++ // Fill in the signature array, for the calling-convention call. ++ int total_args_passed = method->size_of_parameters(); ++ ++ BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); ++ VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); ++ ++ // The signature we are going to use for the trap that dtrace will see ++ // java/lang/String is converted. We drop "this" and any other object ++ // is converted to NULL. (A one-slot java/lang/Long object reference ++ // is converted to a two-slot long, which is why we double the allocation). ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); ++ ++ int i=0; ++ int total_strings = 0; ++ int first_arg_to_pass = 0; ++ int total_c_args = 0; ++ ++ // Skip the receiver as dtrace doesn't want to see it ++ if( !method->is_static() ) { ++ in_sig_bt[i++] = T_OBJECT; ++ first_arg_to_pass = 1; ++ } ++ ++ SignatureStream ss(method->signature()); ++ for ( ; !ss.at_return_type(); ss.next()) { ++ BasicType bt = ss.type(); ++ in_sig_bt[i++] = bt; // Collect remaining bits of signature ++ out_sig_bt[total_c_args++] = bt; ++ if( bt == T_OBJECT) { ++ symbolOop s = ss.as_symbol_or_null(); ++ if (s == vmSymbols::java_lang_String()) { ++ total_strings++; ++ out_sig_bt[total_c_args-1] = T_ADDRESS; ++ } else if (s == vmSymbols::java_lang_Boolean() || ++ s == vmSymbols::java_lang_Byte()) { ++ out_sig_bt[total_c_args-1] = T_BYTE; ++ } else if (s == vmSymbols::java_lang_Character() || ++ s == vmSymbols::java_lang_Short()) { ++ out_sig_bt[total_c_args-1] = T_SHORT; ++ } else if (s == vmSymbols::java_lang_Integer() || ++ s == vmSymbols::java_lang_Float()) { ++ out_sig_bt[total_c_args-1] = T_INT; ++ } else if (s == vmSymbols::java_lang_Long() || ++ s == vmSymbols::java_lang_Double()) { ++ out_sig_bt[total_c_args-1] = T_LONG; ++ out_sig_bt[total_c_args++] = T_VOID; ++ } ++ } else if ( bt == T_LONG || bt == T_DOUBLE ) { ++ in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots ++ // We convert double to long ++ out_sig_bt[total_c_args-1] = T_LONG; ++ out_sig_bt[total_c_args++] = T_VOID; ++ } else if ( bt == T_FLOAT) { ++ // We convert float to int ++ out_sig_bt[total_c_args-1] = T_INT; ++ } ++ } ++ ++ assert(i==total_args_passed, "validly parsed signature"); ++ ++ // Now get the compiled-Java layout as input arguments ++ int comp_args_on_stack; ++ comp_args_on_stack = SharedRuntime::java_calling_convention( ++ in_sig_bt, in_regs, total_args_passed, false); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the a native (non-jni) function would expect them. To figure out ++ // where they go we convert the java signature to a C signature and remove ++ // T_VOID for any long/double we might have received. ++ ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Plus a temp for possible converion of float/double/long register args ++ ++ int conversion_temp = stack_slots; ++ stack_slots += 2; ++ ++ ++ // Now space for the string(s) we must convert ++ ++ int string_locs = stack_slots; ++ stack_slots += total_strings * ++ (max_dtrace_string_size / VMRegImpl::stack_slot_size); ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // |---------------------| ++ // | string[n] | ++ // |---------------------| <- string_locs[n] ++ // | string[n-1] | ++ // |---------------------| <- string_locs[n-1] ++ // | ... | ++ // | ... | ++ // |---------------------| <- string_locs[1] ++ // | string[0] | ++ // |---------------------| <- string_locs[0] ++ // | temp | ++ // |---------------------| <- conversion_temp ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ ++ intptr_t start = (intptr_t)__ pc(); ++ ++ // First thing make an ic check to see if we should even be here ++ ++ { ++ Label L; ++ const Register temp_reg = G3_scratch; ++ Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub()); ++ __ verify_oop(O0); ++ __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); ++ __ cmp(temp_reg, G5_inline_cache_reg); ++ __ brx(Assembler::equal, true, Assembler::pt, L); ++ __ delayed()->nop(); ++ ++ __ jump_to(ic_miss, 0); ++ __ delayed()->nop(); ++ __ align(CodeEntryAlignment); ++ __ bind(L); ++ } ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ ++ // Generate stack overflow check before creating frame ++ __ generate_stack_overflow_check(stack_size); ++ ++ // Generate a new frame for the wrapper. ++ __ save(SP, -stack_size, SP); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ VMRegPair zero; ++ const Register g0 = G0; // without this we get a compiler warning (why??) ++ zero.set2(g0->as_VMReg()); ++ ++ int c_arg, j_arg; ++ ++ Register conversion_off = noreg; ++ ++ for (j_arg = first_arg_to_pass, c_arg = 0 ; ++ j_arg < total_args_passed ; j_arg++, c_arg++ ) { ++ ++ VMRegPair src = in_regs[j_arg]; ++ VMRegPair dst = out_regs[c_arg]; ++ ++#ifdef ASSERT ++ if (src.first()->is_Register()) { ++ assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); ++ } else if (src.first()->is_FloatRegister()) { ++ assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( ++ FloatRegisterImpl::S)], "ack!"); ++ } ++ if (dst.first()->is_Register()) { ++ reg_destroyed[dst.first()->as_Register()->encoding()] = true; ++ } else if (dst.first()->is_FloatRegister()) { ++ freg_destroyed[dst.first()->as_FloatRegister()->encoding( ++ FloatRegisterImpl::S)] = true; ++ } ++#endif /* ASSERT */ ++ ++ switch (in_sig_bt[j_arg]) { ++ case T_ARRAY: ++ case T_OBJECT: ++ { ++ if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || ++ out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { ++ // need to unbox a one-slot value ++ Register in_reg = L0; ++ Register tmp = L2; ++ if ( src.first()->is_reg() ) { ++ in_reg = src.first()->as_Register(); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), ++ "must be"); ++ __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); ++ } ++ // If the final destination is an acceptable register ++ if ( dst.first()->is_reg() ) { ++ if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { ++ tmp = dst.first()->as_Register(); ++ } ++ } ++ ++ Label skipUnbox; ++ if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { ++ __ mov(G0, tmp->successor()); ++ } ++ __ br_null(in_reg, true, Assembler::pn, skipUnbox); ++ __ delayed()->mov(G0, tmp); ++ ++ BasicType bt = out_sig_bt[c_arg]; ++ int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); ++ switch (bt) { ++ case T_BYTE: ++ __ ldub(in_reg, box_offset, tmp); break; ++ case T_SHORT: ++ __ lduh(in_reg, box_offset, tmp); break; ++ case T_INT: ++ __ ld(in_reg, box_offset, tmp); break; ++ case T_LONG: ++ __ ld_long(in_reg, box_offset, tmp); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ bind(skipUnbox); ++ // If tmp wasn't final destination copy to final destination ++ if (tmp == L2) { ++ VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); ++ if (out_sig_bt[c_arg] == T_LONG) { ++ long_move(masm, tmp_as_VM, dst); ++ } else { ++ move32_64(masm, tmp_as_VM, out_regs[c_arg]); ++ } ++ } ++ if (out_sig_bt[c_arg] == T_LONG) { ++ assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); ++ ++c_arg; // move over the T_VOID to keep the loop indices in sync ++ } ++ } else if (out_sig_bt[c_arg] == T_ADDRESS) { ++ Register s = ++ src.first()->is_reg() ? src.first()->as_Register() : L2; ++ Register d = ++ dst.first()->is_reg() ? dst.first()->as_Register() : L2; ++ ++ // We store the oop now so that the conversion pass can reach ++ // while in the inner frame. This will be the only store if ++ // the oop is NULL. ++ if (s != L2) { ++ // src is register ++ if (d != L2) { ++ // dst is register ++ __ mov(s, d); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } else { ++ // src not a register ++ assert(Assembler::is_simm13(reg2offset(src.first()) + ++ STACK_BIAS), "must be"); ++ __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); ++ if (d == L2) { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } ++ } else if (out_sig_bt[c_arg] != T_VOID) { ++ // Convert the arg to NULL ++ if (dst.first()->is_reg()) { ++ __ mov(G0, dst.first()->as_Register()); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } ++ } ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ if (src.first()->is_stack()) { ++ // Stack to stack/reg is simple ++ move32_64(masm, src, dst); ++ } else { ++ if (dst.first()->is_reg()) { ++ // freg -> reg ++ int off = ++ STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ Register d = dst.first()->as_Register(); ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, off); ++ __ ld(SP, off, d); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ __ ld(SP, conversion_off , d); ++ } ++ } else { ++ // freg -> mem ++ int off = STACK_BIAS + reg2offset(dst.first()); ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, off); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ } ++ } ++ } ++ break; ++ ++ case T_DOUBLE: ++ assert( j_arg + 1 < total_args_passed && ++ in_sig_bt[j_arg + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ if (src.first()->is_stack()) { ++ // Stack to stack/reg is simple ++ long_move(masm, src, dst); ++ } else { ++ Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; ++ ++ // Destination could be an odd reg on 32bit in which case ++ // we can't load direct to the destination. ++ ++ if (!d->is_even() && wordSize == 4) { ++ d = L2; ++ } ++ int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), ++ SP, off); ++ __ ld_long(SP, off, d); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ __ ld_long(SP, conversion_off, d); ++ } ++ if (d == L2) { ++ long_move(masm, reg64_to_VMRegPair(L2), dst); ++ } ++ } ++ break; ++ ++ case T_LONG : ++ // 32bit can't do a split move of something like g1 -> O0, O1 ++ // so use a memory temp ++ if (src.is_single_phys_reg() && wordSize == 4) { ++ Register tmp = L2; ++ if (dst.first()->is_reg() && ++ (wordSize == 8 || dst.first()->as_Register()->is_even())) { ++ tmp = dst.first()->as_Register(); ++ } ++ ++ int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ if (Assembler::is_simm13(off)) { ++ __ stx(src.first()->as_Register(), SP, off); ++ __ ld_long(SP, off, tmp); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stx(src.first()->as_Register(), SP, conversion_off); ++ __ ld_long(SP, conversion_off, tmp); ++ } ++ ++ if (tmp == L2) { ++ long_move(masm, reg64_to_VMRegPair(L2), dst); ++ } ++ } else { ++ long_move(masm, src, dst); ++ } ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ move32_64(masm, src, dst); ++ } ++ } ++ ++ ++ // If we have any strings we must store any register based arg to the stack ++ // This includes any still live xmm registers too. ++ ++ if (total_strings > 0 ) { ++ ++ // protect all the arg registers ++ __ save_frame(0); ++ __ mov(G2_thread, L7_thread_cache); ++ const Register L2_string_off = L2; ++ ++ // Get first string offset ++ __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); ++ ++ for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { ++ if (out_sig_bt[c_arg] == T_ADDRESS) { ++ ++ VMRegPair dst = out_regs[c_arg]; ++ const Register d = dst.first()->is_reg() ? ++ dst.first()->as_Register()->after_save() : noreg; ++ ++ // It's a string the oop and it was already copied to the out arg ++ // position ++ if (d != noreg) { ++ __ mov(d, O0); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), ++ "must be"); ++ __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); ++ } ++ Label skip; ++ ++ __ br_null(O0, false, Assembler::pn, skip); ++ __ delayed()->addu(FP, L2_string_off, O1); ++ ++ if (d != noreg) { ++ __ mov(O1, d); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), ++ "must be"); ++ __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), ++ relocInfo::runtime_call_type); ++ __ delayed()->addu(L2_string_off, max_dtrace_string_size, L2_string_off); ++ ++ __ bind(skip); ++ ++ } ++ ++ } ++ __ mov(L7_thread_cache, G2_thread); ++ __ restore(); ++ ++ } ++ ++ ++ // Ok now we are done. Need to place the nop that dtrace wants in order to ++ // patch in the trap ++ ++ int patch_offset = ((intptr_t)__ pc()) - start; ++ ++ __ nop(); ++ ++ ++ // Return ++ ++ __ ret(); ++ __ delayed()->restore(); ++ ++ __ flush(); ++ ++ nmethod *nm = nmethod::new_dtrace_nmethod( ++ method, masm->code(), vep_offset, patch_offset, frame_complete, ++ stack_slots / VMRegImpl::slots_per_word); ++ return nm; ++ ++} ++ ++#endif // HAVE_DTRACE_H ++ ++// this function returns the adjust size (in number of words) to a c2i adapter ++// activation for use during deoptimization ++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { ++ return (callee_locals - callee_parameters) * Interpreter::stackElementWords; ++} ++ ++// "Top of Stack" slots that may be unused by the calling convention but must ++// otherwise be preserved. ++// On Intel these are not necessary and the value can be zero. ++// On Sparc this describes the words reserved for storing a register window ++// when an interrupt occurs. ++uint SharedRuntime::out_preserve_stack_slots() { ++ return 0; ++} ++ ++//------------------------------generate_deopt_blob---------------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_deopt_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ //CodeBuffer buffer ("deopt_blob", 4000, 2048); ++ CodeBuffer buffer ("deopt_blob", 8000, 2048); ++ MacroAssembler* masm = new MacroAssembler( & buffer); ++ int frame_size_in_words; ++ OopMap* map = NULL; ++ // Account for the extra args we place on the stack ++ // by the time we call fetch_unroll_info ++ const int additional_words = 2; // deopt kind, thread ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ address start = __ pc(); ++ Label cont; ++ // we use S3 for DeOpt reason register ++ Register reason = S3; ++ // use S6 for thread register ++ Register thread = TREG; ++ // use S7 for fetch_unroll_info returned UnrollBlock ++ Register unroll = S7; ++ // Prolog for non exception case! ++ // Correct the return address we were given. ++ //FIXME, return address is on the tos or Ra? ++ __ addiu(RA, RA, - (NativeCall::return_address_offset_long)); ++ // Save everything in sight. ++ map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); ++ // Normal deoptimization ++ __ move(reason, Deoptimization::Unpack_deopt); ++ __ b(cont); ++ __ delayed()->nop(); ++ ++ int reexecute_offset = __ pc() - start; ++ ++ // Reexecute case ++ // return address is the pc describes what bci to do re-execute at ++ ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); ++ __ move(reason, Deoptimization::Unpack_reexecute); ++ __ b(cont); ++ __ delayed()->nop(); ++ ++ int exception_offset = __ pc() - start; ++ // Prolog for exception case ++ ++ // all registers are dead at this entry point, except for V0 and ++ // V1 which contain the exception oop and exception pc ++ // respectively. Set them in TLS and fall thru to the ++ // unpack_with_exception_in_tls entry point. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ int exception_in_tls_offset = __ pc() - start; ++ // new implementation because exception oop is now passed in JavaThread ++ ++ // Prolog for exception case ++ // All registers must be preserved because they might be used by LinearScan ++ // Exceptiop oop and throwing PC are passed in JavaThread ++ // tos: stack at point of call to method that threw the exception (i.e. only ++ // args are on the stack, no return address) ++ ++ // Return address will be patched later with the throwing pc. The correct value is not ++ // available now because loading it from memory would destroy registers. ++ // Save everything in sight. ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ __ addiu(RA, RA, - (NativeCall::return_address_offset_long)); ++ (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); ++ ++ // Now it is safe to overwrite any register ++ // store the correct deoptimization type ++ __ move(reason, Deoptimization::Unpack_exception); ++ // load throwing pc from JavaThread and patch it as the return address ++ // of the current frame. Then clear the field in JavaThread ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ ++ ++#ifdef ASSERT ++ // verify that there is really an exception oop in JavaThread ++ __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ verify_oop(AT); ++ // verify that there is no pending exception ++ Label no_pending_exception; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, no_pending_exception); ++ __ delayed()->nop(); ++ __ stop("must not have pending exception here"); ++ __ bind(no_pending_exception); ++#endif ++ __ bind(cont); ++ // Compiled code leaves the floating point stack dirty, empty it. ++ __ empty_FPU_stack(); ++ ++ ++ // Call C code. Need thread and this frame, but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ move(A0, thread); ++ __ move(A1, reason); // exec_mode ++ __ addiu(SP, SP, -additional_words * wordSize); ++ ++ __ set_last_Java_frame(NOREG, NOREG, NULL); ++ ++ // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. Call should capture return values. ++ ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ __ call((address)Deoptimization::fetch_unroll_info); ++ //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ oop_maps->add_gc_map(__ pc() - start, map); ++ __ addiu(SP, SP, additional_words * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock into S7 ++ __ move(unroll, V0); ++ ++ ++ // Move the unpack kind to a safe place in the UnrollBlock because ++ // we are very short of registers ++ ++ Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ sw(reason, unpack_kind); ++ // save the unpack_kind value ++ // Retrieve the possible live values (return values) ++ // All callee save registers representing jvm state ++ // are now in the vframeArray. ++ ++ Label noException; ++ __ move(AT, Deoptimization::Unpack_exception); ++ __ bne(AT, reason, noException);// Was exception pending? ++ __ delayed()->nop(); ++ __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ ++ __ verify_oop(V0); ++ ++ // Overwrite the result registers with the exception results. ++ __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize); ++ __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize); ++ ++ __ bind(noException); ++ ++ ++ // Stack is back to only having register save data on the stack. ++ // Now restore the result registers. Everything else is either dead or captured ++ // in the vframeArray. ++ ++ RegisterSaver::restore_result_registers(masm); ++ // All of the register save area has been popped of the stack. Only the ++ // return address remains. ++ // Pop all the frames we must move/replace. ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: caller of deopting frame (could be compiled/interpreted). ++ // ++ // Note: by leaving the return address of self-frame on the stack ++ // and using the size of frame 2 to adjust the stack ++ // when we are done the return to frame 3 will still be on the stack. ++ ++ // register for the sender's sp ++ Register sender_sp = Rsender; ++ // register for frame pcs ++ Register pcs = T0; ++ // register for frame sizes ++ Register sizes = T1; ++ // register for frame count ++ Register count = T3; ++ ++ // Pop deoptimized frame ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ addu(SP, SP, AT); ++ // sp should be pointing at the return address to the caller (3) ++ ++ // Load array of frame pcs into pcs ++ __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ __ addiu(SP, SP, wordSize); // trash the old pc ++ // Load array of frame sizes into T6 ++ __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ ++ ++ ++ // Load count of frams into T3 ++ __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ // Pick up the initial fp we should save ++ __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ __ move(sender_sp, SP); ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ subu(SP, SP, AT); ++ ++ // Push interpreter frames in a loop ++ // ++ //Loop: ++ // 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld ++ // 0x000000555bd82d1c: ld at, 0x0(t0) ; ld pcs[i] ++ // 0x000000555bd82d20: daddiu t2, t2, 0xfffffff0 ; t2 -= 16 ++ // 0x000000555bd82d24: daddiu sp, sp, 0xfffffff0 ++ // 0x000000555bd82d28: sd fp, 0x0(sp) ; push fp ++ // 0x000000555bd82d2c: sd at, 0x8(sp) ; push at ++ // 0x000000555bd82d30: daddu fp, sp, zero ; fp <- sp ++ // 0x000000555bd82d34: dsubu sp, sp, t2 ; sp -= t2 ++ // 0x000000555bd82d38: sd zero, 0xfffffff0(fp) ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ // 0x000000555bd82d3c: sd s4, 0xfffffff8(fp) ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ // 0x000000555bd82d40: daddu s4, sp, zero ; move(sender_sp, SP); ++ // 0x000000555bd82d44: daddiu t3, t3, 0xffffffff ; count -- ++ // 0x000000555bd82d48: daddiu t1, t1, 0x4 ; sizes += 4 ++ // 0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18 ++ // 0x000000555bd82d50: daddiu t0, t0, 0x4 ; <--- error t0 += 8 ++ // ++ // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split ++ Label loop; ++ __ bind(loop); ++ __ ld(T2, sizes, 0); // Load frame size ++ __ ld_ptr(AT, pcs, 0); // save return address ++ __ addiu(T2, T2, -2*wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ subu(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ addiu(count, count, -1); // decrement counter ++ __ addiu(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ bne(count, R0, loop); ++ __ delayed()->addiu(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ ld(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); ++ // Re-push self-frame ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ __ addiu(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); ++ ++ // Restore frame locals after moving the frame ++ __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize); ++ __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize); ++ __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local ++ __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize); ++ ++ ++ // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. ++ __ move(A1, reason); // exec_mode ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(A0, thread); // thread ++ __ addiu(SP, SP, (-additional_words) *wordSize); ++ ++ // set last_Java_sp, last_Java_fp ++ __ set_last_Java_frame(NOREG, FP, NULL); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ // Revert SP alignment after call since we're going to do some SP relative addressing below ++ __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0)); ++ ++ __ push(V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(true); ++ ++ // Collect return values ++ __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words + 1) * wordSize); ++ __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words + 1) * wordSize); ++ __ ldc1(F0, SP, (RegisterSaver::fpResultOffset() + additional_words + 1) * wordSize);// Pop float stack and store in local ++ __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + additional_words + 2) * wordSize); ++ //FIXME, ++ // Clear floating point stack before returning to interpreter ++ __ empty_FPU_stack(); ++ //FIXME, we should consider about float and double ++ // Push a float or double return value if necessary. ++ __ leave(); ++ ++ // Jump to interpreter ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ masm->flush(); ++ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); ++ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); ++} ++ ++#ifdef COMPILER2 ++ ++//------------------------------generate_uncommon_trap_blob-------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_uncommon_trap_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ enum frame_layout { ++ fp_off, fp_off2, ++ return_off, return_off2, ++ framesize ++ }; ++ assert(framesize % 4 == 0, "sp not 16-byte aligned"); ++ ++ address start = __ pc(); ++ ++ // Push self-frame. ++ __ daddiu(SP, SP, -framesize * BytesPerInt); ++ ++ __ sd(RA, SP, return_off * BytesPerInt); ++ __ sd(FP, SP, fp_off * BytesPerInt); ++ ++ __ daddiu(FP, SP, fp_off * BytesPerInt); ++ ++ // Clear the floating point exception stack ++ __ empty_FPU_stack(); ++ ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // set last_Java_sp ++ __ set_last_Java_frame(NOREG, FP, NULL); ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ long save_pc = (long)__ pc() + 56; ++ __ patchable_set48(AT, (long)save_pc); ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // capture callee-saved registers as well as return values. ++ __ move(A0, thread); ++ // argument already in T0 ++ __ move(A1, T0); ++ __ addiu(A2, R0, Deoptimization::Unpack_uncommon_trap); ++ __ patchable_call((address)Deoptimization::uncommon_trap); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap( framesize, 0 ); ++ ++ //oop_maps->add_gc_map( __ offset(), true, map); ++ oop_maps->add_gc_map( __ offset(), map); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock into S7 ++ Register unroll = S7; ++ __ move(unroll, V0); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ li(T9, Deoptimization::Unpack_uncommon_trap); ++ __ beq(AT, T9, L); ++ __ delayed()->nop(); ++ __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); ++ __ bind(L); ++ } ++#endif ++ ++ // Pop all the frames we must move/replace. ++ // ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: possible-i2c-adapter-frame ++ // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an ++ // and c2i here) ++ ++ __ daddiu(SP, SP, framesize * BytesPerInt); ++ ++ // Pop deoptimized frame ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ daddu(SP, SP, AT); ++ ++ // register for frame pcs ++ Register pcs = T8; ++ // register for frame sizes ++ Register sizes = T9; ++ // register for frame count ++ Register count = T3; ++ // register for the sender's sp ++ Register sender_sp = T1; ++ ++ // sp should be pointing at the return address to the caller (4) ++ // Load array of frame pcs ++ __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ ++ // Load array of frame sizes ++ __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ ++ // Pick up the initial fp we should save ++ __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ ++ __ move(sender_sp, SP); ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ dsubu(SP, SP, AT); ++ // Push interpreter frames in a loop ++ Label loop; ++ __ bind(loop); ++ __ ld(T2, sizes, 0); // Load frame size ++ __ ld(AT, pcs, 0); // save return address ++ __ daddiu(T2, T2, -2*wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ dsubu(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ daddiu(count, count, -1); // decrement counter ++ __ daddiu(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ addiu(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ bne(count, R0, loop); ++ __ delayed()->nop(); // Bump array pointer (pcs) ++ ++ __ ld(RA, pcs, 0); ++ ++ // Re-push self-frame ++ // save old & set new FP ++ // save final return address ++ __ enter(); ++ ++ // Use FP because the frames look interpreted now ++ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. ++ // Don't need the precise return PC here, just precise enough to point into this code blob. ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(NOREG, FP, the_pc); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // restore return values to their stack-slots with the new SP. ++ __ move(A0, thread); ++ __ addiu(A1, R0, Deoptimization::Unpack_uncommon_trap); ++ __ patchable_call((address)Deoptimization::unpack_frames); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map( __ offset(), new OopMap( framesize, 0 ) ); ++ ++ __ reset_last_Java_frame(true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // Jump to interpreter ++ __ jr(RA); ++ __ delayed()->nop(); ++ // ------------- ++ // make sure all code is generated ++ masm->flush(); ++ ++ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2); ++} ++ ++#endif // COMPILER2 ++ ++//------------------------------generate_handler_blob------------------- ++// ++// Generate a special Compile2Runtime blob that saves all registers, and sets ++// up an OopMap and calls safepoint code to stop the compiled code for ++// a safepoint. ++// ++// This blob is jumped to (via a breakpoint and the signal handler) from a ++// safepoint in compiled code. ++ ++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) { ++ ++ // Account for thread arg in our frame ++ const int additional_words = 0; ++ int frame_size_in_words; ++ ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ ResourceMark rm; ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map; ++ ++ // allocate space for the code ++ // setup code generation tools ++ CodeBuffer buffer ("handler_blob", 2048, 512); ++ MacroAssembler* masm = new MacroAssembler( &buffer); ++ ++ const Register thread = TREG; ++ address start = __ pc(); ++ address call_pc = NULL; ++ bool cause_return = (pool_type == POLL_AT_RETURN); ++ bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // The following is basically a call_VM. However, we need the precise ++ // address of the call in order to generate an oopmap. Hence, we do all the ++ // work outselvs. ++ ++ __ set_last_Java_frame(NOREG, NOREG, NULL); ++ ++ if (!cause_return) { ++ // overwrite the return address pushed by save_live_registers ++ // Additionally, TSR is a callee-saved register so we can look at ++ // it later to determine if someone changed the return address for ++ // us! ++ __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ __ st_ptr(TSR, SP, RegisterSaver::raOffset() * wordSize); ++ } ++ ++ // Do the call ++ __ move(A0, thread); ++ __ call(call_ptr); ++ __ delayed()->nop(); ++ ++ // Set an oopmap for the call site. This oopmap will map all ++ // oop-registers and debug-info registers as callee-saved. This ++ // will allow deoptimization at this safepoint to find all possible ++ // debug-info recordings, as well as let GC find all oops. ++ oop_maps->add_gc_map(__ offset(), map); ++ ++ Label noException; ++ ++ // Clear last_Java_sp again ++ __ reset_last_Java_frame(false); ++ ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, noException); ++ __ delayed()->nop(); ++ ++ // Exception pending ++ ++ RegisterSaver::restore_live_registers(masm, save_vectors); ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++ __ patchable_jump((address)StubRoutines::forward_exception_entry()); ++ ++ // No exception case ++ __ bind(noException); ++ ++ Label no_adjust, bail; ++ if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { ++ // If our stashed return pc was modified by the runtime we avoid touching it ++ __ ld_ptr(AT, SP, RegisterSaver::raOffset() * wordSize); ++ __ bne(AT, TSR, no_adjust); ++ __ delayed()->nop(); ++ ++#ifdef ASSERT ++ // Verify the correct encoding of the poll we're about to skip. ++ // See NativeInstruction::is_safepoint_poll() ++ __ lwu(AT, TSR, 0); ++ __ dsrl(AT, AT, 16); ++ __ andi(AT, AT, 0xfc1f); ++ __ xori(AT, AT, 0x8c01); ++ __ bne(AT, R0, bail); ++ __ delayed()->nop(); ++#endif ++ // Adjust return pc forward to step over the safepoint poll instruction ++ __ addiu(RA, TSR, 4); // NativeInstruction::instruction_size=4 ++ __ st_ptr(RA, SP, RegisterSaver::raOffset() * wordSize); ++ } ++ ++ __ bind(no_adjust); ++ // Normal exit, register restoring and exit ++ RegisterSaver::restore_live_registers(masm, save_vectors); ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++#ifdef ASSERT ++ __ bind(bail); ++ __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); ++#endif ++ ++ // Make sure all code is generated ++ masm->flush(); ++ ++ // Fill-out other meta info ++ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); ++} ++ ++// ++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss ++// ++// Generate a stub that calls into vm to find out the proper destination ++// of a java call. All the argument registers are live at this point ++// but since this is generic code we don't know what they are and the caller ++// must do any gc of the args. ++// ++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ // allocate space for the code ++ ResourceMark rm; ++ ++ //CodeBuffer buffer(name, 1000, 512); ++ CodeBuffer buffer(name, 2000, 2048); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ int frame_size_words; ++ //we put the thread in A0 ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = NULL; ++ ++ int start = __ offset(); ++ map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); ++ ++ ++ int frame_complete = __ offset(); ++ ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ __ get_thread(thread); ++#else ++ const Register thread = TREG; ++#endif ++ ++ __ move(A0, thread); ++ __ set_last_Java_frame(noreg, FP, NULL); ++ //align the stack before invoke native ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ ++ __ call(destination); ++ __ delayed()->nop(); ++ ++ // Set an oopmap for the call site. ++ // We need this not only for callee-saved registers, but also for volatile ++ // registers that the compiler might be keeping live across a safepoint. ++ oop_maps->add_gc_map( __ offset() - start, map); ++ // V0 contains the address we are going to jump to assuming no exception got installed ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // clear last_Java_sp ++ __ reset_last_Java_frame(true); ++ // check for pending exceptions ++ Label pending; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, pending); ++ __ delayed()->nop(); ++ // get the returned Method* ++ //FIXME, do mips need this ? ++ __ get_vm_result_2(Rmethod, thread); // Refer to OpenJDK8 ++ __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize); ++ __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize); ++ RegisterSaver::restore_live_registers(masm); ++ ++ // We are back the the original state on entry and ready to go the callee method. ++ __ jr(V0); ++ __ delayed()->nop(); ++ // Pending exception after the safepoint ++ ++ __ bind(pending); ++ ++ RegisterSaver::restore_live_registers(masm); ++ ++ // exception pending => remove activation and forward to exception handler ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); ++ __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ // ++ // make sure all code is generated ++ masm->flush(); ++ ++ RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); ++ return tmp; ++} ++ ++extern "C" int SpinPause() {return 0;} ++ ++ ++//------------------------------Montgomery multiplication------------------------ ++// ++ ++// Subtract 0:b from carry:a. Return carry. ++static unsigned long ++sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { ++ long borrow = 0, t = 0; ++ unsigned long tmp0, tmp1; ++ __asm__ __volatile__ ( ++ "0: \n" ++ "ld %[tmp0], 0(%[a]) \n" ++ "ld %[tmp1], 0(%[b]) \n" ++ "sltu %[t], %[tmp0], %[borrow] \n" ++ "dsubu %[tmp0], %[tmp0], %[borrow] \n" ++ "sltu %[borrow], %[tmp0], %[tmp1] \n" ++ "or %[borrow], %[borrow], %[t] \n" ++ "dsubu %[tmp0], %[tmp0], %[tmp1] \n" ++ "sd %[tmp0], 0(%[a]) \n" ++ "daddiu %[a], %[a], 8 \n" ++ "daddiu %[b], %[b], 8 \n" ++ "daddiu %[len], %[len], -1 \n" ++ "bgtz %[len], 0b \n" ++ "dsubu %[tmp0], %[carry], %[borrow] \n" ++ : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"+r"(t) ++ : [carry]"r"(carry) ++ : "memory" ++ ); ++ return tmp0; ++} ++ ++// Multiply (unsigned) Long A by Long B, accumulating the double- ++// length result into the accumulator formed of t0, t1, and t2. ++inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { ++ unsigned long hi, lo, carry = 0, t = 0; ++ __asm__ __volatile__( ++ "dmultu %[A], %[B] \n" ++ "mfhi %[hi] \n" ++ "mflo %[lo] \n" ++ "daddu %[t0], %[t0], %[lo] \n" ++ "sltu %[carry], %[t0], %[lo] \n" ++ "daddu %[t1], %[t1], %[carry] \n" ++ "sltu %[t], %[t1], %[carry] \n" ++ "daddu %[t1], %[t1], %[hi] \n" ++ "sltu %[carry], %[t1], %[hi] \n" ++ "or %[carry], %[carry], %[t] \n" ++ "daddu %[t2], %[t2], %[carry] \n" ++ : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t) ++ : [A]"r"(A), [B]"r"(B) ++ : ++ ); ++} ++ ++// As above, but add twice the double-length result into the ++// accumulator. ++inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { ++ unsigned long hi, lo, carry = 0, t = 0; ++ __asm__ __volatile__( ++ "dmultu %[A], %[B] \n" ++ "mfhi %[hi] \n" ++ "mflo %[lo] \n" ++ "daddu %[t0], %[t0], %[lo] \n" ++ "sltu %[carry], %[t0], %[lo] \n" ++ "daddu %[t1], %[t1], %[carry] \n" ++ "sltu %[t], %[t1], %[carry] \n" ++ "daddu %[t1], %[t1], %[hi] \n" ++ "sltu %[carry], %[t1], %[hi] \n" ++ "or %[carry], %[carry], %[t] \n" ++ "daddu %[t2], %[t2], %[carry] \n" ++ "daddu %[t0], %[t0], %[lo] \n" ++ "sltu %[carry], %[t0], %[lo] \n" ++ "daddu %[t1], %[t1], %[carry] \n" ++ "sltu %[t], %[t1], %[carry] \n" ++ "daddu %[t1], %[t1], %[hi] \n" ++ "sltu %[carry], %[t1], %[hi] \n" ++ "or %[carry], %[carry], %[t] \n" ++ "daddu %[t2], %[t2], %[carry] \n" ++ : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t) ++ : [A]"r"(A), [B]"r"(B) ++ : ++ ); ++} ++ ++// Fast Montgomery multiplication. The derivation of the algorithm is ++// in A Cryptographic Library for the Motorola DSP56000, ++// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. ++ ++static void __attribute__((noinline)) ++montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[], ++ unsigned long m[], unsigned long inv, int len) { ++ unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator ++ int i; ++ ++ assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ ++ for (i = 0; i < len; i++) { ++ int j; ++ for (j = 0; j < i; j++) { ++ MACC(a[j], b[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ MACC(a[i], b[0], t0, t1, t2); ++ m[i] = t0 * inv; ++ MACC(m[i], n[0], t0, t1, t2); ++ ++ assert(t0 == 0, "broken Montgomery multiply"); ++ ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ for (i = len; i < 2*len; i++) { ++ int j; ++ for (j = i-len+1; j < len; j++) { ++ MACC(a[j], b[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ m[i-len] = t0; ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ while (t0) ++ t0 = sub(m, n, t0, len); ++} ++ ++// Fast Montgomery squaring. This uses asymptotically 25% fewer ++// multiplies so it should be up to 25% faster than Montgomery ++// multiplication. However, its loop control is more complex and it ++// may actually run slower on some machines. ++ ++static void __attribute__((noinline)) ++montgomery_square(unsigned long a[], unsigned long n[], ++ unsigned long m[], unsigned long inv, int len) { ++ unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator ++ int i; ++ ++ assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ ++ for (i = 0; i < len; i++) { ++ int j; ++ int end = (i+1)/2; ++ for (j = 0; j < end; j++) { ++ MACC2(a[j], a[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ if ((i & 1) == 0) { ++ MACC(a[j], a[j], t0, t1, t2); ++ } ++ for (; j < i; j++) { ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ m[i] = t0 * inv; ++ MACC(m[i], n[0], t0, t1, t2); ++ ++ assert(t0 == 0, "broken Montgomery square"); ++ ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ for (i = len; i < 2*len; i++) { ++ int start = i-len+1; ++ int end = start + (len - start)/2; ++ int j; ++ for (j = start; j < end; j++) { ++ MACC2(a[j], a[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ if ((i & 1) == 0) { ++ MACC(a[j], a[j], t0, t1, t2); ++ } ++ for (; j < len; j++) { ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ m[i-len] = t0; ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ while (t0) ++ t0 = sub(m, n, t0, len); ++} ++ ++// Swap words in a longword. ++static unsigned long swap(unsigned long x) { ++ return (x << 32) | (x >> 32); ++} ++ ++// Copy len longwords from s to d, word-swapping as we go. The ++// destination array is reversed. ++static void reverse_words(unsigned long *s, unsigned long *d, int len) { ++ d += len; ++ while(len-- > 0) { ++ d--; ++ *d = swap(*s); ++ s++; ++ } ++} ++ ++// The threshold at which squaring is advantageous was determined ++// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. ++// Doesn't seem to be relevant for MIPS64 so we use the same value. ++#define MONTGOMERY_SQUARING_THRESHOLD 64 ++ ++void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, ++ jint len, jlong inv, ++ jint *m_ints) { ++ assert(len % 2 == 0, "array length in montgomery_multiply must be even"); ++ int longwords = len/2; ++ ++ // Make very sure we don't use so much space that the stack might ++ // overflow. 512 jints corresponds to an 16384-bit integer and ++ // will use here a total of 8k bytes of stack space. ++ int total_allocation = longwords * sizeof (unsigned long) * 4; ++ guarantee(total_allocation <= 8192, "must be"); ++ unsigned long *scratch = (unsigned long *)alloca(total_allocation); ++ ++ // Local scratch arrays ++ unsigned long ++ *a = scratch + 0 * longwords, ++ *b = scratch + 1 * longwords, ++ *n = scratch + 2 * longwords, ++ *m = scratch + 3 * longwords; ++ ++ reverse_words((unsigned long *)a_ints, a, longwords); ++ reverse_words((unsigned long *)b_ints, b, longwords); ++ reverse_words((unsigned long *)n_ints, n, longwords); ++ ++ ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); ++ ++ reverse_words(m, (unsigned long *)m_ints, longwords); ++} ++ ++void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, ++ jint len, jlong inv, ++ jint *m_ints) { ++ assert(len % 2 == 0, "array length in montgomery_square must be even"); ++ int longwords = len/2; ++ ++ // Make very sure we don't use so much space that the stack might ++ // overflow. 512 jints corresponds to an 16384-bit integer and ++ // will use here a total of 6k bytes of stack space. ++ int total_allocation = longwords * sizeof (unsigned long) * 3; ++ guarantee(total_allocation <= 8192, "must be"); ++ unsigned long *scratch = (unsigned long *)alloca(total_allocation); ++ ++ // Local scratch arrays ++ unsigned long ++ *a = scratch + 0 * longwords, ++ *n = scratch + 1 * longwords, ++ *m = scratch + 2 * longwords; ++ ++ reverse_words((unsigned long *)a_ints, a, longwords); ++ reverse_words((unsigned long *)n_ints, n, longwords); ++ ++ if (len >= MONTGOMERY_SQUARING_THRESHOLD) { ++ ::montgomery_square(a, n, m, (unsigned long)inv, longwords); ++ } else { ++ ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); ++ } ++ ++ reverse_words(m, (unsigned long *)m_ints, longwords); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp +--- a/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,2162 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/instanceOop.hpp" ++#include "oops/method.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++// Declaration and definition of StubGenerator (no .hpp file). ++// For a more detailed description of the stub routine structure ++// see the comment in stubRoutines.hpp ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++//#define a__ ((Assembler*)_masm)-> ++ ++//#ifdef PRODUCT ++//#define BLOCK_COMMENT(str) /* nothing */ ++//#else ++//#define BLOCK_COMMENT(str) __ block_comment(str) ++//#endif ++ ++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions ++ ++// Stub Code definitions ++ ++class StubGenerator: public StubCodeGenerator { ++ private: ++ ++ // ABI mips n64 ++ // This fig is not MIPS ABI. It is call Java from C ABI. ++ // Call stubs are used to call Java from C ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ // ... ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S1) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp ++ // 3 [ result ] <--- a1 ++ // 4 [ result_type ] <--- a2 ++ // 5 [ method ] <--- a3 ++ // 6 [ entry_point ] <--- a4 ++ // 7 [ parameters ] <--- a5 ++ // 8 [ parameter_size ] <--- a6 ++ // 9 [ thread ] <--- a7 ++ ++ // ++ // n64 does not save paras in sp. ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ // ... ++ //-13 [ thread ] ++ //-12 [ result_type ] <--- a2 ++ //-11 [ result ] <--- a1 ++ //-10 [ ] ++ // -9 [ ptr. to call wrapper ] <--- a0 ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S1) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ] <--- old sp ++ // ++ // Find a right place in the call_stub for GP. ++ // GP will point to the starting point of Interpreter::dispatch_table(itos). ++ // It should be saved/restored before/after Java calls. ++ // ++ enum call_stub_layout { ++ RA_off = 1, ++ FP_off = 0, ++ BCP_off = -1, ++ LVP_off = -2, ++ TSR_off = -3, ++ S1_off = -4, ++ S3_off = -5, ++ S4_off = -6, ++ S5_off = -7, ++ S6_off = -8, ++ call_wrapper_off = -9, ++ result_off = -11, ++ result_type_off = -12, ++ thread_off = -13, ++ total_off = thread_off - 1, ++ GP_off = -14, ++ }; ++ ++ address generate_call_stub(address& return_address) { ++ ++ StubCodeMark mark(this, "StubRoutines", "call_stub"); ++ address start = __ pc(); ++ ++ // same as in generate_catch_exception()! ++ ++ // stub code ++ // save ra and fp ++ __ enter(); ++ // I think 14 is the max gap between argument and callee saved register ++ assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code"); ++ __ daddiu(SP, SP, total_off * wordSize); ++ __ sd(BCP, FP, BCP_off * wordSize); ++ __ sd(LVP, FP, LVP_off * wordSize); ++ __ sd(TSR, FP, TSR_off * wordSize); ++ __ sd(S1, FP, S1_off * wordSize); ++ __ sd(S3, FP, S3_off * wordSize); ++ __ sd(S4, FP, S4_off * wordSize); ++ __ sd(S5, FP, S5_off * wordSize); ++ __ sd(S6, FP, S6_off * wordSize); ++ __ sd(A0, FP, call_wrapper_off * wordSize); ++ __ sd(A1, FP, result_off * wordSize); ++ __ sd(A2, FP, result_type_off * wordSize); ++ __ sd(A7, FP, thread_off * wordSize); ++ __ sd(GP, FP, GP_off * wordSize); ++ ++ __ set64(GP, (long)Interpreter::dispatch_table(itos)); ++ ++#ifdef OPT_THREAD ++ __ move(TREG, A7); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ ++#ifdef ASSERT ++ // make sure we have no pending exceptions ++ { ++ Label L; ++ __ ld(AT, A7, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ /* FIXME: I do not know how to realize stop in mips arch, do it in the future */ ++ __ stop("StubRoutines::call_stub: entered with pending exception"); ++ __ bind(L); ++ } ++#endif ++ ++ // pass parameters if any ++ // A5: parameter ++ // A6: parameter_size ++ // T0: parameter_size_tmp(--) ++ // T2: offset(++) ++ // T3: tmp ++ Label parameters_done; ++ // judge if the parameter_size equals 0 ++ __ beq(A6, R0, parameters_done); ++ __ delayed()->nop(); ++ __ dsll(AT, A6, Interpreter::logStackElementSize); ++ __ dsubu(SP, SP, AT); ++ __ move(AT, -StackAlignmentInBytes); ++ __ andr(SP, SP , AT); ++ // Copy Java parameters in reverse order (receiver last) ++ // Note that the argument order is inverted in the process ++ Label loop; ++ __ move(T0, A6); ++ __ move(T2, R0); ++ __ bind(loop); ++ ++ // get parameter ++ __ dsll(T3, T0, LogBytesPerWord); ++ __ daddu(T3, T3, A5); ++ __ ld(AT, T3, -wordSize); ++ __ dsll(T3, T2, LogBytesPerWord); ++ __ daddu(T3, T3, SP); ++ __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0)); ++ __ daddiu(T2, T2, 1); ++ __ daddiu(T0, T0, -1); ++ __ bne(T0, R0, loop); ++ __ delayed()->nop(); ++ // advance to next parameter ++ ++ // call Java function ++ __ bind(parameters_done); ++ ++ // receiver in V0, methodOop in Rmethod ++ ++ __ move(Rmethod, A3); ++ __ move(Rsender, SP); //set sender sp ++ __ jalr(A4); ++ __ delayed()->nop(); ++ return_address = __ pc(); ++ ++ Label common_return; ++ __ bind(common_return); ++ ++ // store result depending on type ++ // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) ++ __ ld(T0, FP, result_off * wordSize); // result --> T0 ++ Label is_long, is_float, is_double, exit; ++ __ ld(T2, FP, result_type_off * wordSize); // result_type --> T2 ++ __ daddiu(T3, T2, (-1) * T_LONG); ++ __ beq(T3, R0, is_long); ++ __ delayed()->daddiu(T3, T2, (-1) * T_FLOAT); ++ __ beq(T3, R0, is_float); ++ __ delayed()->daddiu(T3, T2, (-1) * T_DOUBLE); ++ __ beq(T3, R0, is_double); ++ __ delayed()->nop(); ++ ++ // handle T_INT case ++ __ sd(V0, T0, 0 * wordSize); ++ __ bind(exit); ++ ++ // restore ++ __ ld(BCP, FP, BCP_off * wordSize); ++ __ ld(LVP, FP, LVP_off * wordSize); ++ __ ld(GP, FP, GP_off * wordSize); ++ __ ld(TSR, FP, TSR_off * wordSize); ++ ++ __ ld(S1, FP, S1_off * wordSize); ++ __ ld(S3, FP, S3_off * wordSize); ++ __ ld(S4, FP, S4_off * wordSize); ++ __ ld(S5, FP, S5_off * wordSize); ++ __ ld(S6, FP, S6_off * wordSize); ++ ++ __ leave(); ++ ++ // return ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ // handle return types different from T_INT ++ __ bind(is_long); ++ __ sd(V0, T0, 0 * wordSize); ++ __ b(exit); ++ __ delayed()->nop(); ++ ++ __ bind(is_float); ++ __ swc1(F0, T0, 0 * wordSize); ++ __ b(exit); ++ __ delayed()->nop(); ++ ++ __ bind(is_double); ++ __ sdc1(F0, T0, 0 * wordSize); ++ __ b(exit); ++ __ delayed()->nop(); ++ //FIXME, 1.6 mips version add operation of fpu here ++ StubRoutines::gs2::set_call_stub_compiled_return(__ pc()); ++ __ b(common_return); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Return point for a Java call if there's an exception thrown in ++ // Java code. The exception is caught and transformed into a ++ // pending exception stored in JavaThread that can be tested from ++ // within the VM. ++ // ++ // Note: Usually the parameters are removed by the callee. In case ++ // of an exception crossing an activation frame boundary, that is ++ // not the case if the callee is compiled code => need to setup the ++ // sp. ++ // ++ // V0: exception oop ++ ++ address generate_catch_exception() { ++ StubCodeMark mark(this, "StubRoutines", "catch_exception"); ++ address start = __ pc(); ++ ++ Register thread = TREG; ++ ++ // get thread directly ++#ifndef OPT_THREAD ++ __ ld(thread, FP, thread_off * wordSize); ++#endif ++ ++#ifdef ASSERT ++ // verify that threads correspond ++ { Label L; ++ __ get_thread(T8); ++ __ beq(T8, thread, L); ++ __ delayed()->nop(); ++ __ stop("StubRoutines::catch_exception: threads must correspond"); ++ __ bind(L); ++ } ++#endif ++ // set pending exception ++ __ verify_oop(V0); ++ __ sd(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ li(AT, (long)__FILE__); ++ __ sd(AT, thread, in_bytes(Thread::exception_file_offset ())); ++ __ li(AT, (long)__LINE__); ++ __ sd(AT, thread, in_bytes(Thread::exception_line_offset ())); ++ ++ // complete return to VM ++ assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); ++ __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Continuation point for runtime calls returning with a pending ++ // exception. The pending exception check happened in the runtime ++ // or native call stub. The pending exception in Thread is ++ // converted into a Java-level exception. ++ // ++ // Contract with Java-level exception handlers: ++ // V0: exception ++ // V1: throwing pc ++ // ++ // NOTE: At entry of this stub, exception-pc must be on stack !! ++ ++ address generate_forward_exception() { ++ StubCodeMark mark(this, "StubRoutines", "forward exception"); ++ //Register thread = TREG; ++ Register thread = TREG; ++ address start = __ pc(); ++ ++ // Upon entry, the sp points to the return address returning into ++ // Java (interpreted or compiled) code; i.e., the return address ++ // throwing pc. ++ // ++ // Arguments pushed before the runtime call are still on the stack ++ // but the exception handler will reset the stack pointer -> ++ // ignore them. A potential result in registers can be ignored as ++ // well. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++#ifdef ASSERT ++ // make sure this code is only executed if there is a pending exception ++ { ++ Label L; ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("StubRoutines::forward exception: no pending exception (1)"); ++ __ bind(L); ++ } ++#endif ++ ++ // compute exception handler into T9 ++ __ ld(A1, SP, 0); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T9, V0); ++ __ pop(V1); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++#ifdef ASSERT ++ // make sure exception is set ++ { ++ Label L; ++ __ bne(V0, R0, L); ++ __ delayed()->nop(); ++ __ stop("StubRoutines::forward exception: no pending exception (2)"); ++ __ bind(L); ++ } ++#endif ++ ++ // continue at exception handler (return address removed) ++ // V0: exception ++ // T9: exception handler ++ // V1: throwing pc ++ __ verify_oop(V0); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Non-destructive plausibility checks for oops ++ // ++ address generate_verify_oop() { ++ StubCodeMark mark(this, "StubRoutines", "verify_oop"); ++ address start = __ pc(); ++ __ reinit_heapbase(); ++ __ verify_oop_subroutine(); ++ address end = __ pc(); ++ return start; ++ } ++ ++ // ++ // Generate overlap test for array copy stubs ++ // ++ // Input: ++ // A0 - array1 ++ // A1 - array2 ++ // A2 - element count ++ // ++ ++ // use T9 as temp ++ void array_overlap_test(address no_overlap_target, int log2_elem_size) { ++ int elem_size = 1 << log2_elem_size; ++ Address::ScaleFactor sf = Address::times_1; ++ ++ switch (log2_elem_size) { ++ case 0: sf = Address::times_1; break; ++ case 1: sf = Address::times_2; break; ++ case 2: sf = Address::times_4; break; ++ case 3: sf = Address::times_8; break; ++ } ++ ++ __ dsll(AT, A2, sf); ++ __ daddu(AT, AT, A0); ++ __ daddiu(T9, AT, -elem_size); ++ __ dsubu(AT, A1, A0); ++ __ blez(AT, no_overlap_target); ++ __ delayed()->nop(); ++ __ dsubu(AT, A1, T9); ++ __ bgtz(AT, no_overlap_target); ++ __ delayed()->nop(); ++ ++ // If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target ++ Label L; ++ __ bgez(A0, L); ++ __ delayed()->nop(); ++ __ bgtz(A1, no_overlap_target); ++ __ delayed()->nop(); ++ __ bind(L); ++ ++ } ++ ++ // ++ // Generate stub for array fill. If "aligned" is true, the ++ // "to" address is assumed to be heapword aligned. ++ // ++ // Arguments for generated stub: ++ // to: c_rarg0 ++ // value: c_rarg1 ++ // count: c_rarg2 treated as signed ++ // ++ address generate_fill(BasicType t, bool aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ const Register to = A0; // source array address ++ const Register value = A1; // value ++ const Register count = A2; // elements count ++ ++ const Register cnt_words = T8; // temp register ++ ++ __ enter(); ++ ++ Label L_fill_elements, L_exit1; ++ ++ int shift = -1; ++ switch (t) { ++ case T_BYTE: ++ shift = 0; ++ __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element ++ __ dins(value, value, 8, 8); // 8 bit -> 16 bit ++ __ dins(value, value, 16, 16); // 16 bit -> 32 bit ++ __ bne(AT, R0, L_fill_elements); ++ __ delayed()->nop(); ++ break; ++ case T_SHORT: ++ shift = 1; ++ __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element ++ __ dins(value, value, 16, 16); // 16 bit -> 32 bit ++ __ bne(AT, R0, L_fill_elements); ++ __ delayed()->nop(); ++ break; ++ case T_INT: ++ shift = 2; ++ __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element ++ __ bne(AT, R0, L_fill_elements); ++ __ delayed()->nop(); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ // Align source address at 8 bytes address boundary. ++ Label L_skip_align1, L_skip_align2, L_skip_align4; ++ if (!aligned) { ++ switch (t) { ++ case T_BYTE: ++ // One byte misalignment happens only for byte arrays. ++ __ andi(AT, to, 1); ++ __ beq(AT, R0, L_skip_align1); ++ __ delayed()->nop(); ++ __ sb(value, to, 0); ++ __ daddiu(to, to, 1); ++ __ addiu32(count, count, -1); ++ __ bind(L_skip_align1); ++ // Fallthrough ++ case T_SHORT: ++ // Two bytes misalignment happens only for byte and short (char) arrays. ++ __ andi(AT, to, 1 << 1); ++ __ beq(AT, R0, L_skip_align2); ++ __ delayed()->nop(); ++ __ sh(value, to, 0); ++ __ daddiu(to, to, 2); ++ __ addiu32(count, count, -(2 >> shift)); ++ __ bind(L_skip_align2); ++ // Fallthrough ++ case T_INT: ++ // Align to 8 bytes, we know we are 4 byte aligned to start. ++ __ andi(AT, to, 1 << 2); ++ __ beq(AT, R0, L_skip_align4); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ __ daddiu(to, to, 4); ++ __ addiu32(count, count, -(4 >> shift)); ++ __ bind(L_skip_align4); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ ++ // ++ // Fill large chunks ++ // ++ __ srl(cnt_words, count, 3 - shift); // number of words ++ __ dinsu(value, value, 32, 32); // 32 bit -> 64 bit ++ __ sll(AT, cnt_words, 3 - shift); ++ __ subu32(count, count, AT); ++ ++ Label L_loop_begin, L_loop_not_64bytes_fill, L_loop_end; ++ __ addiu32(AT, cnt_words, -8); ++ __ bltz(AT, L_loop_not_64bytes_fill); ++ __ delayed()->nop(); ++ __ bind(L_loop_begin); ++ __ sd(value, to, 0); ++ __ sd(value, to, 8); ++ __ sd(value, to, 16); ++ __ sd(value, to, 24); ++ __ sd(value, to, 32); ++ __ sd(value, to, 40); ++ __ sd(value, to, 48); ++ __ sd(value, to, 56); ++ __ daddiu(to, to, 64); ++ __ addiu32(cnt_words, cnt_words, -8); ++ __ addiu32(AT, cnt_words, -8); ++ __ bgez(AT, L_loop_begin); ++ __ delayed()->nop(); ++ ++ __ bind(L_loop_not_64bytes_fill); ++ __ beq(cnt_words, R0, L_loop_end); ++ __ delayed()->nop(); ++ __ sd(value, to, 0); ++ __ daddiu(to, to, 8); ++ __ addiu32(cnt_words, cnt_words, -1); ++ __ b(L_loop_not_64bytes_fill); ++ __ delayed()->nop(); ++ __ bind(L_loop_end); ++ ++ // Remaining count is less than 8 bytes. Fill it by a single store. ++ // Note that the total length is no less than 8 bytes. ++ if (t == T_BYTE || t == T_SHORT) { ++ Label L_exit1; ++ __ beq(count, R0, L_exit1); ++ __ delayed()->nop(); ++ __ sll(AT, count, shift); ++ __ daddu(to, to, AT); // points to the end ++ __ sd(value, to, -8); // overwrite some elements ++ __ bind(L_exit1); ++ __ leave(); ++ __ jr(RA); ++ __ delayed()->nop(); ++ } ++ ++ // Handle copies less than 8 bytes. ++ Label L_fill_2, L_fill_4, L_exit2; ++ __ bind(L_fill_elements); ++ switch (t) { ++ case T_BYTE: ++ __ andi(AT, count, 1); ++ __ beq(AT, R0, L_fill_2); ++ __ delayed()->nop(); ++ __ sb(value, to, 0); ++ __ daddiu(to, to, 1); ++ __ bind(L_fill_2); ++ __ andi(AT, count, 1 << 1); ++ __ beq(AT, R0, L_fill_4); ++ __ delayed()->nop(); ++ __ sh(value, to, 0); ++ __ daddiu(to, to, 2); ++ __ bind(L_fill_4); ++ __ andi(AT, count, 1 << 2); ++ __ beq(AT, R0, L_exit2); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ break; ++ case T_SHORT: ++ __ andi(AT, count, 1); ++ __ beq(AT, R0, L_fill_4); ++ __ delayed()->nop(); ++ __ sh(value, to, 0); ++ __ daddiu(to, to, 2); ++ __ bind(L_fill_4); ++ __ andi(AT, count, 1 << 1); ++ __ beq(AT, R0, L_exit2); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ break; ++ case T_INT: ++ __ beq(count, R0, L_exit2); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ __ bind(L_exit2); ++ __ leave(); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_byte_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_byte_copy(). ++ // ++ address generate_disjoint_byte_copy(bool aligned, const char * name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ ++ Register tmp1 = T0; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ ++ address start = __ pc(); ++ ++ __ push(tmp1); ++ __ push(tmp2); ++ __ push(tmp3); ++ __ move(tmp1, A0); ++ __ move(tmp2, A1); ++ __ move(tmp3, A2); ++ ++ ++ Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11; ++ Label l_debug; ++ ++ __ daddiu(AT, tmp3, -9); //why the number is 9 ? ++ __ blez(AT, l_9); ++ __ delayed()->nop(); ++ ++ if (!aligned) { ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 1); ++ __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy ++ __ delayed()->nop(); ++ ++ __ andi(AT, tmp1, 1); ++ __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes ++ __ delayed()->nop(); ++ ++ __ lb(AT, tmp1, 0); ++ __ daddiu(tmp1, tmp1, 1); ++ __ sb(AT, tmp2, 0); ++ __ daddiu(tmp2, tmp2, 1); ++ __ daddiu(tmp3, tmp3, -1); ++ __ bind(l_10); ++ ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 3); ++ __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy ++ __ delayed()->nop(); ++ ++ // At this point it is guaranteed that both, from and to have the same alignment mod 4. ++ ++ // Copy 2 elements if necessary to align to 4 bytes. ++ __ andi(AT, tmp1, 3); ++ __ beq(AT, R0, l_2); ++ __ delayed()->nop(); ++ ++ __ lhu(AT, tmp1, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(tmp3, tmp3, -2); ++ __ bind(l_2); ++ ++ // At this point the positions of both, from and to, are at least 4 byte aligned. ++ ++ // Copy 4 elements at a time. ++ // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 7); ++ __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned ++ __ delayed()->nop(); ++ ++ // Copy a 4 elements if necessary to align to 8 bytes. ++ __ andi(AT, tmp1, 7); ++ __ beq(AT, R0, l_7); ++ __ delayed()->nop(); ++ ++ __ lw(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -4); ++ __ sw(AT, tmp2, 0); ++ { // FasterArrayCopy ++ __ daddiu(tmp1, tmp1, 4); ++ __ daddiu(tmp2, tmp2, 4); ++ } ++ } ++ ++ __ bind(l_7); ++ ++ // Copy 4 elements at a time; either the loads or the stores can ++ // be unaligned if aligned == false. ++ ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -7); ++ __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain ++ __ delayed()->nop(); ++ ++ __ bind(l_8); ++ // For Loongson, there is 128-bit memory access. TODO ++ __ ld(AT, tmp1, 0); ++ __ sd(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 8); ++ __ daddiu(tmp2, tmp2, 8); ++ __ daddiu(tmp3, tmp3, -8); ++ __ daddiu(AT, tmp3, -8); ++ __ bgez(AT, l_8); ++ __ delayed()->nop(); ++ } ++ __ bind(l_6); ++ ++ // copy 4 bytes at a time ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -3); ++ __ blez(AT, l_1); ++ __ delayed()->nop(); ++ ++ __ bind(l_3); ++ __ lw(AT, tmp1, 0); ++ __ sw(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 4); ++ __ daddiu(tmp2, tmp2, 4); ++ __ daddiu(tmp3, tmp3, -4); ++ __ daddiu(AT, tmp3, -4); ++ __ bgez(AT, l_3); ++ __ delayed()->nop(); ++ ++ } ++ ++ // do 2 bytes copy ++ __ bind(l_1); ++ { ++ __ daddiu(AT, tmp3, -1); ++ __ blez(AT, l_9); ++ __ delayed()->nop(); ++ ++ __ bind(l_5); ++ __ lhu(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -2); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(AT, tmp3, -2); ++ __ bgez(AT, l_5); ++ __ delayed()->nop(); ++ } ++ ++ //do 1 element copy--byte ++ __ bind(l_9); ++ __ beq(R0, tmp3, l_4); ++ __ delayed()->nop(); ++ ++ { ++ __ bind(l_11); ++ __ lb(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -1); ++ __ sb(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 1); ++ __ daddiu(tmp2, tmp2, 1); ++ __ daddiu(AT, tmp3, -1); ++ __ bgez(AT, l_11); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(l_4); ++ __ pop(tmp3); ++ __ pop(tmp2); ++ __ pop(tmp1); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_byte_copy(bool aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit; ++ Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned; ++ ++ address nooverlap_target = aligned ? ++ StubRoutines::arrayof_jbyte_disjoint_arraycopy() : ++ StubRoutines::jbyte_disjoint_arraycopy(); ++ ++ array_overlap_test(nooverlap_target, 0); ++ ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elements count ++ const Register end_from = T3; // source array end address ++ const Register end_to = T0; // destination array end address ++ const Register end_count = T1; // destination array end address ++ ++ __ push(end_from); ++ __ push(end_to); ++ __ push(end_count); ++ __ push(T8); ++ ++ // copy from high to low ++ __ move(end_count, count); ++ __ daddu(end_from, from, end_count); ++ __ daddu(end_to, to, end_count); ++ ++ // If end_from and end_to has differante alignment, unaligned copy is performed. ++ __ andi(AT, end_from, 3); ++ __ andi(T8, end_to, 3); ++ __ bne(AT, T8, l_copy_byte); ++ __ delayed()->nop(); ++ ++ // First deal with the unaligned data at the top. ++ __ bind(l_unaligned); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_from, 3); ++ __ bne(AT, R0, l_from_unaligned); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_to, 3); ++ __ beq(AT, R0, l_4_bytes_aligned); ++ __ delayed()->nop(); ++ ++ __ bind(l_from_unaligned); ++ __ lb(AT, end_from, -1); ++ __ sb(AT, end_to, -1); ++ __ daddiu(end_from, end_from, -1); ++ __ daddiu(end_to, end_to, -1); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_unaligned); ++ __ delayed()->nop(); ++ ++ // now end_to, end_from point to 4-byte aligned high-ends ++ // end_count contains byte count that is not copied. ++ // copy 4 bytes at a time ++ __ bind(l_4_bytes_aligned); ++ ++ __ move(T8, end_count); ++ __ daddiu(AT, end_count, -3); ++ __ blez(AT, l_copy_suffix); ++ __ delayed()->nop(); ++ ++ //__ andi(T8, T8, 3); ++ __ lea(end_from, Address(end_from, -4)); ++ __ lea(end_to, Address(end_to, -4)); ++ ++ __ dsrl(end_count, end_count, 2); ++ __ align(16); ++ __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes ++ __ lw(AT, end_from, 0); ++ __ sw(AT, end_to, 0); ++ __ addiu(end_from, end_from, -4); ++ __ addiu(end_to, end_to, -4); ++ __ addiu(end_count, end_count, -1); ++ __ bne(end_count, R0, l_copy_4_bytes_loop); ++ __ delayed()->nop(); ++ ++ __ b(l_copy_suffix); ++ __ delayed()->nop(); ++ // copy dwords aligned or not with repeat move ++ // l_copy_suffix ++ // copy suffix (0-3 bytes) ++ __ bind(l_copy_suffix); ++ __ andi(T8, T8, 3); ++ __ beq(T8, R0, l_exit); ++ __ delayed()->nop(); ++ __ addiu(end_from, end_from, 3); ++ __ addiu(end_to, end_to, 3); ++ __ bind(l_copy_suffix_loop); ++ __ lb(AT, end_from, 0); ++ __ sb(AT, end_to, 0); ++ __ addiu(end_from, end_from, -1); ++ __ addiu(end_to, end_to, -1); ++ __ addiu(T8, T8, -1); ++ __ bne(T8, R0, l_copy_suffix_loop); ++ __ delayed()->nop(); ++ ++ __ bind(l_copy_byte); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ __ lb(AT, end_from, -1); ++ __ sb(AT, end_to, -1); ++ __ daddiu(end_from, end_from, -1); ++ __ daddiu(end_to, end_to, -1); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_copy_byte); ++ __ delayed()->nop(); ++ ++ __ bind(l_exit); ++ __ pop(T8); ++ __ pop(end_count); ++ __ pop(end_to); ++ __ pop(end_from); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Generate stub for disjoint short copy. If "aligned" is true, the ++ // "from" and "to" addresses are assumed to be heapword aligned. ++ // ++ // Arguments for generated stub: ++ // from: A0 ++ // to: A1 ++ // elm.count: A2 treated as signed ++ // one element: 2 bytes ++ // ++ // Strategy for aligned==true: ++ // ++ // If length <= 9: ++ // 1. copy 1 elements at a time (l_5) ++ // ++ // If length > 9: ++ // 1. copy 4 elements at a time until less than 4 elements are left (l_7) ++ // 2. copy 2 elements at a time until less than 2 elements are left (l_6) ++ // 3. copy last element if one was left in step 2. (l_1) ++ // ++ // ++ // Strategy for aligned==false: ++ // ++ // If length <= 9: same as aligned==true case ++ // ++ // If length > 9: ++ // 1. continue with step 7. if the alignment of from and to mod 4 ++ // is different. ++ // 2. align from and to to 4 bytes by copying 1 element if necessary ++ // 3. at l_2 from and to are 4 byte aligned; continue with ++ // 6. if they cannot be aligned to 8 bytes because they have ++ // got different alignment mod 8. ++ // 4. at this point we know that both, from and to, have the same ++ // alignment mod 8, now copy one element if necessary to get ++ // 8 byte alignment of from and to. ++ // 5. copy 4 elements at a time until less than 4 elements are ++ // left; depending on step 3. all load/stores are aligned. ++ // 6. copy 2 elements at a time until less than 2 elements are ++ // left. (l_6) ++ // 7. copy 1 element at a time. (l_5) ++ // 8. copy last element if one was left in step 6. (l_1) ++ ++ address generate_disjoint_short_copy(bool aligned, const char * name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ Register tmp1 = T0; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T8; ++ Register tmp5 = T9; ++ Register tmp6 = T2; ++ ++ address start = __ pc(); ++ ++ __ push(tmp1); ++ __ push(tmp2); ++ __ push(tmp3); ++ __ move(tmp1, A0); ++ __ move(tmp2, A1); ++ __ move(tmp3, A2); ++ ++ Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14; ++ Label l_debug; ++ // don't try anything fancy if arrays don't have many elements ++ __ daddiu(AT, tmp3, -23); ++ __ blez(AT, l_14); ++ __ delayed()->nop(); ++ // move push here ++ __ push(tmp4); ++ __ push(tmp5); ++ __ push(tmp6); ++ ++ if (!aligned) { ++ __ xorr(AT, A0, A1); ++ __ andi(AT, AT, 1); ++ __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen? ++ __ delayed()->nop(); ++ ++ __ xorr(AT, A0, A1); ++ __ andi(AT, AT, 3); ++ __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy ++ __ delayed()->nop(); ++ ++ // At this point it is guaranteed that both, from and to have the same alignment mod 4. ++ ++ // Copy 1 element if necessary to align to 4 bytes. ++ __ andi(AT, A0, 3); ++ __ beq(AT, R0, l_2); ++ __ delayed()->nop(); ++ ++ __ lhu(AT, tmp1, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(tmp3, tmp3, -1); ++ __ bind(l_2); ++ ++ // At this point the positions of both, from and to, are at least 4 byte aligned. ++ ++ // Copy 4 elements at a time. ++ // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 7); ++ __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned ++ __ delayed()->nop(); ++ ++ // Copy a 2-element word if necessary to align to 8 bytes. ++ __ andi(AT, tmp1, 7); ++ __ beq(AT, R0, l_7); ++ __ delayed()->nop(); ++ ++ __ lw(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -2); ++ __ sw(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 4); ++ __ daddiu(tmp2, tmp2, 4); ++ }// end of if (!aligned) ++ ++ __ bind(l_7); ++ // At this time the position of both, from and to, are at least 8 byte aligned. ++ // Copy 8 elemnets at a time. ++ // Align to 16 bytes, but only if both from and to have same alignment mod 8. ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 15); ++ __ bne(AT, R0, l_9); ++ __ delayed()->nop(); ++ ++ // Copy 4-element word if necessary to align to 16 bytes, ++ __ andi(AT, tmp1, 15); ++ __ beq(AT, R0, l_10); ++ __ delayed()->nop(); ++ ++ __ ld(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -4); ++ __ sd(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 8); ++ __ daddiu(tmp2, tmp2, 8); ++ ++ __ bind(l_10); ++ ++ // Copy 8 elements at a time; either the loads or the stores can ++ // be unalligned if aligned == false ++ ++ { // FasterArrayCopy ++ __ bind(l_11); ++ // For loongson the 128-bit memory access instruction is gslq/gssq ++ if (UseLEXT1) { ++ __ gslq(AT, tmp4, tmp1, 0); ++ __ gslq(tmp5, tmp6, tmp1, 16); ++ __ daddiu(tmp1, tmp1, 32); ++ __ daddiu(tmp2, tmp2, 32); ++ __ gssq(AT, tmp4, tmp2, -32); ++ __ gssq(tmp5, tmp6, tmp2, -16); ++ } else { ++ __ ld(AT, tmp1, 0); ++ __ ld(tmp4, tmp1, 8); ++ __ ld(tmp5, tmp1, 16); ++ __ ld(tmp6, tmp1, 24); ++ __ daddiu(tmp1, tmp1, 32); ++ __ sd(AT, tmp2, 0); ++ __ sd(tmp4, tmp2, 8); ++ __ sd(tmp5, tmp2, 16); ++ __ sd(tmp6, tmp2, 24); ++ __ daddiu(tmp2, tmp2, 32); ++ } ++ __ daddiu(tmp3, tmp3, -16); ++ __ daddiu(AT, tmp3, -16); ++ __ bgez(AT, l_11); ++ __ delayed()->nop(); ++ } ++ __ bind(l_9); ++ ++ // Copy 4 elements at a time; either the loads or the stores can ++ // be unaligned if aligned == false. ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16 ++ __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain ++ __ delayed()->nop(); ++ ++ __ bind(l_8); ++ __ ld(AT, tmp1, 0); ++ __ ld(tmp4, tmp1, 8); ++ __ ld(tmp5, tmp1, 16); ++ __ ld(tmp6, tmp1, 24); ++ __ sd(AT, tmp2, 0); ++ __ sd(tmp4, tmp2, 8); ++ __ sd(tmp5, tmp2,16); ++ __ daddiu(tmp1, tmp1, 32); ++ __ daddiu(tmp2, tmp2, 32); ++ __ daddiu(tmp3, tmp3, -16); ++ __ daddiu(AT, tmp3, -16); ++ __ bgez(AT, l_8); ++ __ delayed()->sd(tmp6, tmp2, -8); ++ } ++ __ bind(l_6); ++ ++ // copy 2 element at a time ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -7); ++ __ blez(AT, l_4); ++ __ delayed()->nop(); ++ ++ __ bind(l_3); ++ __ lw(AT, tmp1, 0); ++ __ lw(tmp4, tmp1, 4); ++ __ lw(tmp5, tmp1, 8); ++ __ lw(tmp6, tmp1, 12); ++ __ sw(AT, tmp2, 0); ++ __ sw(tmp4, tmp2, 4); ++ __ sw(tmp5, tmp2, 8); ++ __ daddiu(tmp1, tmp1, 16); ++ __ daddiu(tmp2, tmp2, 16); ++ __ daddiu(tmp3, tmp3, -8); ++ __ daddiu(AT, tmp3, -8); ++ __ bgez(AT, l_3); ++ __ delayed()->sw(tmp6, tmp2, -4); ++ } ++ ++ __ bind(l_1); ++ // do single element copy (8 bit), can this happen? ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -3); ++ __ blez(AT, l_4); ++ __ delayed()->nop(); ++ ++ __ bind(l_5); ++ __ lhu(AT, tmp1, 0); ++ __ lhu(tmp4, tmp1, 2); ++ __ lhu(tmp5, tmp1, 4); ++ __ lhu(tmp6, tmp1, 6); ++ __ sh(AT, tmp2, 0); ++ __ sh(tmp4, tmp2, 2); ++ __ sh(tmp5, tmp2, 4); ++ __ daddiu(tmp1, tmp1, 8); ++ __ daddiu(tmp2, tmp2, 8); ++ __ daddiu(tmp3, tmp3, -4); ++ __ daddiu(AT, tmp3, -4); ++ __ bgez(AT, l_5); ++ __ delayed()->sh(tmp6, tmp2, -2); ++ } ++ // single element ++ __ bind(l_4); ++ ++ __ pop(tmp6); ++ __ pop(tmp5); ++ __ pop(tmp4); ++ ++ __ bind(l_14); ++ { // FasterArrayCopy ++ __ beq(R0, tmp3, l_13); ++ __ delayed()->nop(); ++ ++ __ bind(l_12); ++ __ lhu(AT, tmp1, 0); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(tmp3, tmp3, -1); ++ __ daddiu(AT, tmp3, -1); ++ __ bgez(AT, l_12); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(l_13); ++ __ pop(tmp3); ++ __ pop(tmp2); ++ __ pop(tmp1); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ __ bind(l_debug); ++ __ stop("generate_disjoint_short_copy should not reach here"); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we ++ // let the hardware handle it. The two or four words within dwords ++ // or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_short_copy(bool aligned, const char *name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ Label l_exit, l_copy_short, l_from_unaligned, l_unaligned, l_4_bytes_aligned; ++ ++ address nooverlap_target = aligned ? ++ StubRoutines::arrayof_jshort_disjoint_arraycopy() : ++ StubRoutines::jshort_disjoint_arraycopy(); ++ ++ array_overlap_test(nooverlap_target, 1); ++ ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elements count ++ const Register end_from = T3; // source array end address ++ const Register end_to = T0; // destination array end address ++ const Register end_count = T1; // destination array end address ++ ++ __ push(end_from); ++ __ push(end_to); ++ __ push(end_count); ++ __ push(T8); ++ ++ // copy from high to low ++ __ move(end_count, count); ++ __ sll(AT, end_count, Address::times_2); ++ __ daddu(end_from, from, AT); ++ __ daddu(end_to, to, AT); ++ ++ // If end_from and end_to has differante alignment, unaligned copy is performed. ++ __ andi(AT, end_from, 3); ++ __ andi(T8, end_to, 3); ++ __ bne(AT, T8, l_copy_short); ++ __ delayed()->nop(); ++ ++ // First deal with the unaligned data at the top. ++ __ bind(l_unaligned); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_from, 3); ++ __ bne(AT, R0, l_from_unaligned); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_to, 3); ++ __ beq(AT, R0, l_4_bytes_aligned); ++ __ delayed()->nop(); ++ ++ // Copy 1 element if necessary to align to 4 bytes. ++ __ bind(l_from_unaligned); ++ __ lhu(AT, end_from, -2); ++ __ sh(AT, end_to, -2); ++ __ daddiu(end_from, end_from, -2); ++ __ daddiu(end_to, end_to, -2); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_unaligned); ++ __ delayed()->nop(); ++ ++ // now end_to, end_from point to 4-byte aligned high-ends ++ // end_count contains byte count that is not copied. ++ // copy 4 bytes at a time ++ __ bind(l_4_bytes_aligned); ++ ++ __ daddiu(AT, end_count, -1); ++ __ blez(AT, l_copy_short); ++ __ delayed()->nop(); ++ ++ __ lw(AT, end_from, -4); ++ __ sw(AT, end_to, -4); ++ __ addiu(end_from, end_from, -4); ++ __ addiu(end_to, end_to, -4); ++ __ addiu(end_count, end_count, -2); ++ __ b(l_4_bytes_aligned); ++ __ delayed()->nop(); ++ ++ // copy 1 element at a time ++ __ bind(l_copy_short); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ __ lhu(AT, end_from, -2); ++ __ sh(AT, end_to, -2); ++ __ daddiu(end_from, end_from, -2); ++ __ daddiu(end_to, end_to, -2); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_copy_short); ++ __ delayed()->nop(); ++ ++ __ bind(l_exit); ++ __ pop(T8); ++ __ pop(end_count); ++ __ pop(end_to); ++ __ pop(end_from); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_3, l_4, l_5, l_6, l_7; ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ if(!aligned) { ++ __ xorr(AT, T3, T0); ++ __ andi(AT, AT, 7); ++ __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time ++ __ delayed()->nop(); ++ ++ __ andi(AT, T3, 7); ++ __ beq(AT, R0, l_6); //copy 2 elements each time ++ __ delayed()->nop(); ++ ++ __ lw(AT, T3, 0); ++ __ daddiu(T1, T1, -1); ++ __ sw(AT, T0, 0); ++ __ daddiu(T3, T3, 4); ++ __ daddiu(T0, T0, 4); ++ } ++ ++ { ++ __ bind(l_6); ++ __ daddiu(AT, T1, -1); ++ __ blez(AT, l_5); ++ __ delayed()->nop(); ++ ++ __ bind(l_7); ++ __ ld(AT, T3, 0); ++ __ sd(AT, T0, 0); ++ __ daddiu(T3, T3, 8); ++ __ daddiu(T0, T0, 8); ++ __ daddiu(T1, T1, -2); ++ __ daddiu(AT, T1, -2); ++ __ bgez(AT, l_7); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(l_5); ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_3); ++ __ lw(AT, T3, 0); ++ __ sw(AT, T0, 0); ++ __ addiu(T3, T3, 4); ++ __ addiu(T0, T0, 4); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_3); ++ __ delayed()->nop(); ++ ++ // exit ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_2, l_4; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ address nooverlap_target; ++ ++ if (is_oop) { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_oop_disjoint_arraycopy() : ++ StubRoutines::oop_disjoint_arraycopy(); ++ } else { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_jint_disjoint_arraycopy() : ++ StubRoutines::jint_disjoint_arraycopy(); ++ } ++ ++ array_overlap_test(nooverlap_target, 2); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ // no registers are destroyed by this call ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ // T3: source array address ++ // T0: destination array address ++ // T1: element count ++ ++ __ sll(AT, T1, Address::times_4); ++ __ addu(AT, T3, AT); ++ __ daddiu(T3, AT, -4); ++ __ sll(AT, T1, Address::times_4); ++ __ addu(AT, T0, AT); ++ __ daddiu(T0, AT, -4); ++ ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_2); ++ __ lw(AT, T3, 0); ++ __ sw(AT, T0, 0); ++ __ addiu(T3, T3, -4); ++ __ addiu(T0, T0, -4); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_2); ++ __ delayed()->nop(); ++ ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_3, l_4; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ // T3: source array address ++ // T0: destination array address ++ // T1: element count ++ ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_3); ++ __ ld(AT, T3, 0); ++ __ sd(AT, T0, 0); ++ __ addiu(T3, T3, 8); ++ __ addiu(T0, T0, 8); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_3); ++ __ delayed()->nop(); ++ ++ // exit ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_2, l_4; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ address nooverlap_target; ++ ++ if (is_oop) { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_oop_disjoint_arraycopy() : ++ StubRoutines::oop_disjoint_arraycopy(); ++ } else { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_jlong_disjoint_arraycopy() : ++ StubRoutines::jlong_disjoint_arraycopy(); ++ } ++ ++ array_overlap_test(nooverlap_target, 3); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T3, AT); ++ __ daddiu(T3, AT, -8); ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T0, AT); ++ __ daddiu(T0, AT, -8); ++ ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_2); ++ __ ld(AT, T3, 0); ++ __ sd(AT, T0, 0); ++ __ addiu(T3, T3, -8); ++ __ addiu(T0, T0, -8); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_2); ++ __ delayed()->nop(); ++ ++ // exit ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ //FIXME ++ address generate_disjoint_long_copy(bool aligned, const char *name) { ++ Label l_1, l_2; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ b(l_2); ++ __ delayed()->nop(); ++ __ align(16); ++ __ bind(l_1); ++ __ ld(AT, T3, 0); ++ __ sd (AT, T0, 0); ++ __ addiu(T3, T3, 8); ++ __ addiu(T0, T0, 8); ++ __ bind(l_2); ++ __ addiu(T1, T1, -1); ++ __ bgez(T1, l_1); ++ __ delayed()->nop(); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ ++ address generate_conjoint_long_copy(bool aligned, const char *name) { ++ Label l_1, l_2; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ address nooverlap_target = aligned ? ++ StubRoutines::arrayof_jlong_disjoint_arraycopy() : ++ StubRoutines::jlong_disjoint_arraycopy(); ++ array_overlap_test(nooverlap_target, 3); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T3, AT); ++ __ daddiu(T3, AT, -8); ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T0, AT); ++ __ daddiu(T0, AT, -8); ++ ++ __ b(l_2); ++ __ delayed()->nop(); ++ __ align(16); ++ __ bind(l_1); ++ __ ld(AT, T3, 0); ++ __ sd (AT, T0, 0); ++ __ addiu(T3, T3, -8); ++ __ addiu(T0, T0,-8); ++ __ bind(l_2); ++ __ addiu(T1, T1, -1); ++ __ bgez(T1, l_1); ++ __ delayed()->nop(); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ void generate_arraycopy_stubs() { ++ if (UseCompressedOops) { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, ++ "oop_disjoint_arraycopy"); ++ StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, ++ "oop_arraycopy"); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, ++ "oop_disjoint_arraycopy_uninit", true); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, ++ "oop_arraycopy_uninit", true); ++ } else { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, ++ "oop_disjoint_arraycopy"); ++ StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, ++ "oop_arraycopy"); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, ++ "oop_disjoint_arraycopy_uninit", true); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, ++ "oop_arraycopy_uninit", true); ++ } ++ ++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); ++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); ++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy"); ++ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); ++ ++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); ++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); ++ StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy"); ++ StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy"); ++ ++ // We don't generate specialized code for HeapWord-aligned source ++ // arrays, so just use the code we've already generated ++ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; ++ StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; ++ ++ StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; ++ StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; ++ ++ StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; ++ StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; ++ ++ StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; ++ StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; ++ StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; ++ StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; ++ ++ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); ++ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); ++ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); ++ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); ++ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); ++ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); ++ } ++ ++ // add a function to implement SafeFetch32 and SafeFetchN ++ void generate_safefetch(const char* name, int size, address* entry, ++ address* fault_pc, address* continuation_pc) { ++ // safefetch signatures: ++ // int SafeFetch32(int* adr, int errValue); ++ // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); ++ // ++ // arguments: ++ // A0 = adr ++ // A1 = errValue ++ // ++ // result: ++ // PPC_RET = *adr or errValue ++ ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ // Entry point, pc or function descriptor. ++ *entry = __ pc(); ++ ++ // Load *adr into A1, may fault. ++ *fault_pc = __ pc(); ++ switch (size) { ++ case 4: ++ // int32_t ++ __ lw(A1, A0, 0); ++ break; ++ case 8: ++ // int64_t ++ __ ld(A1, A0, 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ // return errValue or *adr ++ *continuation_pc = __ pc(); ++ __ addu(V0,A1,R0); ++ __ jr(RA); ++ __ delayed()->nop(); ++ } ++ ++ ++#undef __ ++#define __ masm-> ++ ++ // Continuation point for throwing of implicit exceptions that are ++ // not handled in the current activation. Fabricates an exception ++ // oop and initiates normal exception dispatching in this ++ // frame. Since we need to preserve callee-saved values (currently ++ // only for C2, but done for C1 as well) we need a callee-saved oop ++ // map and therefore have to make these stubs into RuntimeStubs ++ // rather than BufferBlobs. If the compiler needs all registers to ++ // be preserved between the fault point and the exception handler ++ // then it must assume responsibility for that in ++ // AbstractCompiler::continuation_for_implicit_null_exception or ++ // continuation_for_implicit_division_by_zero_exception. All other ++ // implicit exceptions (e.g., NullPointerException or ++ // AbstractMethodError on entry) are either at call sites or ++ // otherwise assume that stack unwinding will be initiated, so ++ // caller saved registers were assumed volatile in the compiler. ++ address generate_throw_exception(const char* name, ++ address runtime_entry, ++ bool restore_saved_exception_pc) { ++ // Information about frame layout at time of blocking runtime call. ++ // Note that we only have to preserve callee-saved registers since ++ // the compilers are responsible for supplying a continuation point ++ // if they expect all registers to be preserved. ++ enum layout { ++ thread_off, // last_java_sp ++ S7_off, // callee saved register sp + 1 ++ S6_off, // callee saved register sp + 2 ++ S5_off, // callee saved register sp + 3 ++ S4_off, // callee saved register sp + 4 ++ S3_off, // callee saved register sp + 5 ++ S2_off, // callee saved register sp + 6 ++ S1_off, // callee saved register sp + 7 ++ S0_off, // callee saved register sp + 8 ++ FP_off, ++ ret_address, ++ framesize ++ }; ++ ++ int insts_size = 2048; ++ int locs_size = 32; ++ ++ // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, ++ // NULL, NULL, NULL, false, NULL, name, false); ++ CodeBuffer code (name , insts_size, locs_size); ++ OopMapSet* oop_maps = new OopMapSet(); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ ++ address start = __ pc(); ++ ++ // This is an inlined and slightly modified version of call_VM ++ // which has the ability to fetch the return PC out of ++ // thread-local storage and also sets up last_Java_sp slightly ++ // differently than the real call_VM ++#ifndef OPT_THREAD ++ Register java_thread = TREG; ++ __ get_thread(java_thread); ++#else ++ Register java_thread = TREG; ++#endif ++ if (restore_saved_exception_pc) { ++ __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ } ++ ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ addiu(SP, SP, (-1) * (framesize-2) * wordSize); // prolog ++ __ sd(S0, SP, S0_off * wordSize); ++ __ sd(S1, SP, S1_off * wordSize); ++ __ sd(S2, SP, S2_off * wordSize); ++ __ sd(S3, SP, S3_off * wordSize); ++ __ sd(S4, SP, S4_off * wordSize); ++ __ sd(S5, SP, S5_off * wordSize); ++ __ sd(S6, SP, S6_off * wordSize); ++ __ sd(S7, SP, S7_off * wordSize); ++ ++ int frame_complete = __ pc() - start; ++ // push java thread (becomes first argument of C function) ++ __ sd(java_thread, SP, thread_off * wordSize); ++ if (java_thread != A0) ++ __ move(A0, java_thread); ++ ++ // Set up last_Java_sp and last_Java_fp ++ __ set_last_Java_frame(java_thread, SP, FP, NULL); ++ // Align stack ++ __ set64(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ ++ // Call runtime ++ __ call(runtime_entry); ++ __ delayed()->nop(); ++ // Generate oop map ++ OopMap* map = new OopMap(framesize, 0); ++ oop_maps->add_gc_map(__ offset(), map); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ __ get_thread(java_thread); ++#endif ++ ++ __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ reset_last_Java_frame(java_thread, true); ++ ++ // Restore callee save registers. This must be done after resetting the Java frame ++ __ ld(S0, SP, S0_off * wordSize); ++ __ ld(S1, SP, S1_off * wordSize); ++ __ ld(S2, SP, S2_off * wordSize); ++ __ ld(S3, SP, S3_off * wordSize); ++ __ ld(S4, SP, S4_off * wordSize); ++ __ ld(S5, SP, S5_off * wordSize); ++ __ ld(S6, SP, S6_off * wordSize); ++ __ ld(S7, SP, S7_off * wordSize); ++ ++ // discard arguments ++ __ move(SP, FP); // epilog ++ __ pop(FP); ++ ++ // check for pending exceptions ++#ifdef ASSERT ++ Label L; ++ __ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ should_not_reach_here(); ++ __ bind(L); ++#endif //ASSERT ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, ++ &code, ++ frame_complete, ++ framesize, ++ oop_maps, false); ++ return stub->entry_point(); ++ } ++ ++ // Initialization ++ void generate_initial() { ++ // Generates all stubs and initializes the entry points ++ ++ //------------------------------------------------------------- ++ //----------------------------------------------------------- ++ // entry points that exist in all platforms ++ // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller ++ // than the disadvantage of having a much more complicated generator structure. ++ // See also comment in stubRoutines.hpp. ++ StubRoutines::_forward_exception_entry = generate_forward_exception(); ++ StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); ++ // is referenced by megamorphic call ++ StubRoutines::_catch_exception_entry = generate_catch_exception(); ++ ++ StubRoutines::_throw_StackOverflowError_entry = ++ generate_throw_exception("StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), ++ false); ++ StubRoutines::_throw_delayed_StackOverflowError_entry = ++ generate_throw_exception("delayed StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError), ++ false); ++ } ++ ++ void generate_all() { ++ // Generates all stubs and initializes the entry points ++ ++ // These entry points require SharedInfo::stack0 to be set up in ++ // non-core builds and need to be relocatable, so they each ++ // fabricate a RuntimeStub internally. ++ StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); ++ ++ StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false); ++ ++ StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); ++ ++ // entry points that are platform specific ++ ++ // support for verify_oop (must happen after universe_init) ++ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); ++#ifndef CORE ++ // arraycopy stubs used by compilers ++ generate_arraycopy_stubs(); ++#endif ++ ++ // Safefetch stubs. ++ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, ++ &StubRoutines::_safefetch32_fault_pc, ++ &StubRoutines::_safefetch32_continuation_pc); ++ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, ++ &StubRoutines::_safefetchN_fault_pc, ++ &StubRoutines::_safefetchN_continuation_pc); ++ ++#ifdef COMPILER2 ++ if (UseMontgomeryMultiplyIntrinsic) { ++ StubRoutines::_montgomeryMultiply ++ = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); ++ } ++ if (UseMontgomerySquareIntrinsic) { ++ StubRoutines::_montgomerySquare ++ = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); ++ } ++#endif ++ } ++ ++ public: ++ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { ++ if (all) { ++ generate_all(); ++ } else { ++ generate_initial(); ++ } ++ } ++}; // end class declaration ++ ++void StubGenerator_generate(CodeBuffer* code, bool all) { ++ StubGenerator g(code, all); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp +--- a/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++ ++// a description of how to extend it, see the stubRoutines.hpp file. ++ ++//find the last fp value ++address StubRoutines::gs2::_call_stub_compiled_return = NULL; +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/stubRoutines_mips.hpp b/src/hotspot/cpu/mips/stubRoutines_mips.hpp +--- a/src/hotspot/cpu/mips/stubRoutines_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/stubRoutines_mips.hpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP ++#define CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP ++ ++// This file holds the platform specific parts of the StubRoutines ++// definition. See stubRoutines.hpp for a description on how to ++// extend it. ++ ++static bool returns_to_call_stub(address return_pc){ ++ return return_pc == _call_stub_return_address||return_pc == gs2::get_call_stub_compiled_return(); ++} ++ ++enum platform_dependent_constants { ++ code_size1 = 20000, // simply increase if too small (assembler will crash if too small) ++ code_size2 = 40000 // simply increase if too small (assembler will crash if too small) ++}; ++ ++class gs2 { ++ friend class StubGenerator; ++ friend class VMStructs; ++ private: ++ // If we call compiled code directly from the call stub we will ++ // need to adjust the return back to the call stub to a specialized ++ // piece of code that can handle compiled results and cleaning the fpu ++ // stack. The variable holds that location. ++ static address _call_stub_compiled_return; ++ ++public: ++ // Call back points for traps in compiled code ++ static address get_call_stub_compiled_return() { return _call_stub_compiled_return; } ++ static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; } ++ ++}; ++ ++#endif // CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp +--- a/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,2149 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/templateInterpreterGenerator.hpp" ++#include "interpreter/templateTable.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "runtime/timer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++int TemplateInterpreter::InterpreterCodeSize = 500 * K; ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++address TemplateInterpreterGenerator::generate_slow_signature_handler() { ++ address entry = __ pc(); ++ ++ // Rmethod: method ++ // LVP: pointer to locals ++ // A3: first stack arg ++ __ move(A3, SP); ++ __ daddiu(SP, SP, -10 * wordSize); ++ __ sd(RA, SP, 0); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::slow_signature_handler), ++ Rmethod, LVP, A3); ++ ++ // V0: result handler ++ ++ // Stack layout: ++ // ... ++ // 10 stack arg0 <--- old sp ++ // 9 float/double identifiers ++ // 8 register arg7 ++ // ... ++ // 2 register arg1 ++ // 1 aligned slot ++ // SP: 0 return address ++ ++ // Do FP first so we can use T3 as temp ++ __ ld(T3, Address(SP, 9 * wordSize)); // float/double identifiers ++ ++ // A0 is for env. ++ // If the mothed is not static, A1 will be corrected in generate_native_entry. ++ for ( int i = 1; i < Argument::n_register_parameters; i++ ) { ++ Register reg = as_Register(i + A0->encoding()); ++ FloatRegister floatreg = as_FloatRegister(i + F12->encoding()); ++ Label isfloatordouble, isdouble, next; ++ ++ __ andi(AT, T3, 1 << (i*2)); // Float or Double? ++ __ bne(AT, R0, isfloatordouble); ++ __ delayed()->nop(); ++ ++ // Do Int register here ++ __ ld(reg, SP, (1 + i) * wordSize); ++ __ b (next); ++ __ delayed()->nop(); ++ ++ __ bind(isfloatordouble); ++ __ andi(AT, T3, 1 << ((i*2)+1)); // Double? ++ __ bne(AT, R0, isdouble); ++ __ delayed()->nop(); ++ ++ // Do Float Here ++ __ lwc1(floatreg, SP, (1 + i) * wordSize); ++ __ b(next); ++ __ delayed()->nop(); ++ ++ // Do Double here ++ __ bind(isdouble); ++ __ ldc1(floatreg, SP, (1 + i) * wordSize); ++ ++ __ bind(next); ++ } ++ ++ __ ld(RA, SP, 0); ++ __ daddiu(SP, SP, 10 * wordSize); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return entry; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.update(int crc, int b) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_update_entry() { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ Unimplemented(); ++ return entry; ++ } ++ return NULL; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) ++ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ Unimplemented(); ++ return entry; ++ } ++ return NULL; ++} ++ ++/** ++* Method entry for static (non-native) methods: ++* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) ++* int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end) ++*/ ++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ if (UseCRC32CIntrinsics) { ++ address entry = __ pc(); ++ Unimplemented(); ++ return entry; ++ } ++ return NULL; ++} ++ ++// ++// Various method entries ++// ++ ++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { ++ if (!InlineIntrinsics) return NULL; // Generate a vanilla entry ++ ++ // These don't need a safepoint check because they aren't virtually ++ // callable. We won't enter these intrinsics from compiled code. ++ // If in the future we added an intrinsic which was virtually callable ++ // we'd have to worry about how to safepoint so that this code is used. ++ ++ // mathematical functions inlined by compiler ++ // (interpreter must provide identical implementation ++ // in order to avoid monotonicity bugs when switching ++ // from interpreter to compiler in the middle of some ++ // computation) ++ // ++ // stack: ++ // [ arg ] <-- sp ++ // [ arg ] ++ // retaddr in ra ++ ++ address entry_point = NULL; ++ switch (kind) { ++ case Interpreter::java_lang_math_abs: ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 0); ++ __ abs_d(F0, F12); ++ __ move(SP, Rsender); ++ break; ++ case Interpreter::java_lang_math_sqrt: ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 0); ++ __ sqrt_d(F0, F12); ++ __ move(SP, Rsender); ++ break; ++ case Interpreter::java_lang_math_sin : ++ case Interpreter::java_lang_math_cos : ++ case Interpreter::java_lang_math_tan : ++ case Interpreter::java_lang_math_log : ++ case Interpreter::java_lang_math_log10 : ++ case Interpreter::java_lang_math_exp : ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 0); ++ __ move(SP, Rsender); ++ __ dmtc1(RA, F24); ++ __ dmtc1(SP, F25); ++ __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes)); ++ generate_transcendental_entry(kind, 1); ++ __ dmfc1(SP, F25); ++ __ dmfc1(RA, F24); ++ break; ++ case Interpreter::java_lang_math_pow : ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 2 * Interpreter::stackElementSize); ++ __ ldc1(F13, SP, 0); ++ __ move(SP, Rsender); ++ __ dmtc1(RA, F24); ++ __ dmtc1(SP, F25); ++ __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes)); ++ generate_transcendental_entry(kind, 2); ++ __ dmfc1(SP, F25); ++ __ dmfc1(RA, F24); ++ break; ++ case Interpreter::java_lang_math_fmaD : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 4 * Interpreter::stackElementSize); ++ __ ldc1(F13, SP, 2 * Interpreter::stackElementSize); ++ __ ldc1(F14, SP, 0); ++ __ madd_d(F0, F14, F13, F12); ++ __ move(SP, Rsender); ++ } ++ break; ++ case Interpreter::java_lang_math_fmaF : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ lwc1(F12, SP, 2 * Interpreter::stackElementSize); ++ __ lwc1(F13, SP, Interpreter::stackElementSize); ++ __ lwc1(F14, SP, 0); ++ __ madd_s(F0, F14, F13, F12); ++ __ move(SP, Rsender); ++ } ++ break; ++ default: ++ ; ++ } ++ if (entry_point) { ++ __ jr(RA); ++ __ delayed()->nop(); ++ } ++ ++ return entry_point; ++} ++ ++ // double trigonometrics and transcendentals ++ // static jdouble dsin(jdouble x); ++ // static jdouble dcos(jdouble x); ++ // static jdouble dtan(jdouble x); ++ // static jdouble dlog(jdouble x); ++ // static jdouble dlog10(jdouble x); ++ // static jdouble dexp(jdouble x); ++ // static jdouble dpow(jdouble x, jdouble y); ++ ++void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) { ++ address fn; ++ switch (kind) { ++ case Interpreter::java_lang_math_sin : ++ if (StubRoutines::dsin() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin()); ++ } ++ break; ++ case Interpreter::java_lang_math_cos : ++ if (StubRoutines::dcos() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos()); ++ } ++ break; ++ case Interpreter::java_lang_math_tan : ++ if (StubRoutines::dtan() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan()); ++ } ++ break; ++ case Interpreter::java_lang_math_log : ++ if (StubRoutines::dlog() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog()); ++ } ++ break; ++ case Interpreter::java_lang_math_log10 : ++ if (StubRoutines::dlog10() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10()); ++ } ++ break; ++ case Interpreter::java_lang_math_exp : ++ if (StubRoutines::dexp() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp()); ++ } ++ break; ++ case Interpreter::java_lang_math_pow : ++ if (StubRoutines::dpow() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow()); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ fn = NULL; // unreachable ++ } ++ __ li(T9, fn); ++ __ jalr(T9); ++ __ delayed()->nop(); ++} ++ ++// Abstract method entry ++// Attempt to execute abstract method. Throw exception ++address TemplateInterpreterGenerator::generate_abstract_entry(void) { ++ ++ // Rmethod: methodOop ++ // V0: receiver (unused) ++ // Rsender : sender 's sp ++ address entry_point = __ pc(); ++ ++ // abstract method entry ++ // throw exception ++ // adjust stack to what a normal return would do ++ __ empty_expression_stack(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ return entry_point; ++} ++ ++ ++const int method_offset = frame::interpreter_frame_method_offset * wordSize; ++const int bci_offset = frame::interpreter_frame_bcp_offset * wordSize; ++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; ++ ++//----------------------------------------------------------------------------- ++ ++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { ++ address entry = __ pc(); ++ ++#ifdef ASSERT ++ { ++ Label L; ++ __ addiu(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ subu(T1, T1, SP); // T1 = maximal sp for current fp ++ __ bgez(T1, L); // check if frame is complete ++ __ delayed()->nop(); ++ __ stop("interpreter frame not set up"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // Restore bcp under the assumption that the current frame is still ++ // interpreted ++ // FIXME: please change the func restore_bcp ++ // S0 is the conventional register for bcp ++ __ restore_bcp(); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // throw exception ++ // FIXME: why do not pass parameter thread ? ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { ++ address entry = __ pc(); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // ??? convention: expect array in register A1 ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ClassCastException_handler() { ++ address entry = __ pc(); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), FSR); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_exception_handler_common( ++ const char* name, const char* message, bool pass_oop) { ++ assert(!pass_oop || message == NULL, "either oop or message but not both"); ++ address entry = __ pc(); ++ ++ // expression stack must be empty before entering the VM if an exception happened ++ __ empty_expression_stack(); ++ // setup parameters ++ __ li(A1, (long)name); ++ if (pass_oop) { ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR); ++ } else { ++ __ li(A2, (long)message); ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2); ++ } ++ // throw exception ++ __ jmp(Interpreter::throw_exception_entry(), relocInfo::none); ++ __ delayed()->nop(); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { ++ ++ address entry = __ pc(); ++ ++ // Restore stack bottom in case i2c adjusted stack ++ __ ld(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); ++ // and NULL it as marker that sp is now tos until next java call ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ __ restore_bcp(); ++ __ restore_locals(); ++ ++ // mdp: T8 ++ // ret: FSR ++ // tmp: T9 ++ if (state == atos) { ++ Register mdp = T8; ++ Register tmp = T9; ++ __ profile_return_type(mdp, FSR, tmp); ++ } ++ ++ ++ const Register cache = T9; ++ const Register index = T3; ++ __ get_cache_and_index_at_bcp(cache, index, 1, index_size); ++ ++ const Register flags = cache; ++ __ dsll(AT, index, Address::times_ptr); ++ __ daddu(AT, cache, AT); ++ __ lw(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask); ++ __ dsll(AT, flags, Interpreter::logStackElementSize); ++ __ daddu(SP, SP, AT); ++ ++ Register java_thread; ++#ifndef OPT_THREAD ++ java_thread = T9; ++ __ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ ++ __ check_and_handle_popframe(java_thread); ++ __ check_and_handle_earlyret(java_thread); ++ ++ __ dispatch_next(state, step); ++ ++ return entry; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, ++ int step, ++ address continuation) { ++ address entry = __ pc(); ++ // NULL last_sp until next java call ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ restore_bcp(); ++ __ restore_locals(); ++ // handle exceptions ++ { ++ Label L; ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ if (continuation == NULL) { ++ __ dispatch_next(state, step); ++ } else { ++ __ jump_to_entry(continuation); ++ __ delayed()->nop(); ++ } ++ return entry; ++} ++ ++int AbstractInterpreter::BasicType_as_index(BasicType type) { ++ int i = 0; ++ switch (type) { ++ case T_BOOLEAN: i = 0; break; ++ case T_CHAR : i = 1; break; ++ case T_BYTE : i = 2; break; ++ case T_SHORT : i = 3; break; ++ case T_INT : // fall through ++ case T_LONG : // fall through ++ case T_VOID : i = 4; break; ++ case T_FLOAT : i = 5; break; ++ case T_DOUBLE : i = 6; break; ++ case T_OBJECT : // fall through ++ case T_ARRAY : i = 7; break; ++ default : ShouldNotReachHere(); ++ } ++ assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, ++ "index out of bounds"); ++ return i; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_result_handler_for( ++ BasicType type) { ++ address entry = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ andi(V0, V0, 0xFFFF); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : /* nothing to do */ break; ++ case T_FLOAT : /* nothing to do */ break; ++ case T_DOUBLE : /* nothing to do */ break; ++ case T_OBJECT : ++ { ++ __ ld(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ verify_oop(V0); // and verify it ++ } ++ break; ++ default : ShouldNotReachHere(); ++ } ++ __ jr(RA); // return from result handler ++ __ delayed()->nop(); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_safept_entry_for( ++ TosState state, ++ address runtime_entry) { ++ address entry = __ pc(); ++ __ push(state); ++ __ call_VM(noreg, runtime_entry); ++ __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); ++ return entry; ++} ++ ++ ++ ++// Helpers for commoning out cases in the various type of method entries. ++// ++ ++ ++// increment invocation count & check for overflow ++// ++// Note: checking for negative value instead of overflow ++// so we have a 'sticky' overflow test ++// ++// prerequisites : method in T0, invocation counter in T3 ++void TemplateInterpreterGenerator::generate_counter_incr( ++ Label* overflow, ++ Label* profile_method, ++ Label* profile_method_continue) { ++ Label done; ++ const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset()) ++ + in_bytes(InvocationCounter::counter_offset())); ++ const Address backedge_counter (FSR, in_bytes(MethodCounters::backedge_counter_offset()) ++ + in_bytes(InvocationCounter::counter_offset())); ++ ++ __ get_method_counters(Rmethod, FSR, done); ++ ++ if (ProfileInterpreter) { // %%% Merge this into methodDataOop ++ __ lw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); ++ __ incrementl(T9, 1); ++ __ sw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); ++ } ++ // Update standard invocation counters ++ __ lw(T3, invocation_counter); ++ __ increment(T3, InvocationCounter::count_increment); ++ __ sw(T3, invocation_counter); // save invocation count ++ ++ __ lw(FSR, backedge_counter); // load backedge counter ++ __ li(AT, InvocationCounter::count_mask_value); // mask out the status bits ++ __ andr(FSR, FSR, AT); ++ ++ __ daddu(T3, T3, FSR); // add both counters ++ ++ if (ProfileInterpreter && profile_method != NULL) { ++ // Test to see if we should create a method data oop ++ if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { ++ __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T3, AT); ++ } ++ ++ __ bne_far(AT, R0, *profile_method_continue); ++ __ delayed()->nop(); ++ ++ // if no method data exists, go to profile_method ++ __ test_method_data_pointer(FSR, *profile_method); ++ } ++ ++ if (Assembler::is_simm16(CompileThreshold)) { ++ __ srl(AT, T3, InvocationCounter::count_shift); ++ __ slti(AT, AT, CompileThreshold); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T3, AT); ++ } ++ ++ __ beq_far(AT, R0, *overflow); ++ __ delayed()->nop(); ++ __ bind(done); ++} ++ ++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { ++ ++ // Asm interpreter on entry ++ // S7 - locals ++ // S0 - bcp ++ // Rmethod - method ++ // FP - interpreter frame ++ ++ // On return (i.e. jump to entry_point) ++ // Rmethod - method ++ // RA - return address of interpreter caller ++ // tos - the last parameter to Java method ++ // SP - sender_sp ++ ++ // the bcp is valid if and only if it's not null ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), R0); ++ __ ld(Rmethod, FP, method_offset); ++ // Preserve invariant that S0/S7 contain bcp/locals of sender frame ++ __ b_far(do_continue); ++ __ delayed()->nop(); ++} ++ ++// See if we've got enough room on the stack for locals plus overhead. ++// The expression stack grows down incrementally, so the normal guard ++// page mechanism will work for that. ++// ++// NOTE: Since the additional locals are also always pushed (wasn't ++// obvious in generate_method_entry) so the guard should work for them ++// too. ++// ++// Args: ++// T2: number of additional locals this frame needs (what we must check) ++// T0: Method* ++// ++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { ++ // see if we've got enough room on the stack for locals plus overhead. ++ // the expression stack grows down incrementally, so the normal guard ++ // page mechanism will work for that. ++ // ++ // Registers live on entry: ++ // ++ // T0: Method* ++ // T2: number of additional locals this frame needs (what we must check) ++ ++ // NOTE: since the additional locals are also always pushed (wasn't obvious in ++ // generate_method_entry) so the guard should work for them too. ++ // ++ ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++ // total overhead size: entry_size + (saved fp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize) ++ + entry_size; ++ ++ const int page_size = os::vm_page_size(); ++ ++ Label after_frame_check; ++ ++ // see if the frame is greater than one page in size. If so, ++ // then we need to verify there is enough stack space remaining ++ // for the additional locals. ++ __ move(AT, (page_size - overhead_size) / Interpreter::stackElementSize); ++ __ slt(AT, AT, T2); ++ __ beq(AT, R0, after_frame_check); ++ __ delayed()->nop(); ++ ++ // compute sp as if this were going to be the last frame on ++ // the stack before the red zone ++#ifndef OPT_THREAD ++ Register thread = T1; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ // locals + overhead, in bytes ++ __ dsll(T3, T2, Interpreter::logStackElementSize); ++ __ daddiu(T3, T3, overhead_size); // locals * 4 + overhead_size --> T3 ++ ++#ifdef ASSERT ++ Label stack_base_okay, stack_size_okay; ++ // verify that thread stack base is non-zero ++ __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); ++ __ bne(AT, R0, stack_base_okay); ++ __ delayed()->nop(); ++ __ stop("stack base is zero"); ++ __ bind(stack_base_okay); ++ // verify that thread stack size is non-zero ++ __ ld(AT, thread, in_bytes(Thread::stack_size_offset())); ++ __ bne(AT, R0, stack_size_okay); ++ __ delayed()->nop(); ++ __ stop("stack size is zero"); ++ __ bind(stack_size_okay); ++#endif ++ ++ // Add stack base to locals and subtract stack size ++ __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT ++ __ daddu(T3, T3, AT); // locals * 4 + overhead_size + stack_base--> T3 ++ __ ld(AT, thread, in_bytes(Thread::stack_size_offset())); // stack_size --> AT ++ __ dsubu(T3, T3, AT); // locals * 4 + overhead_size + stack_base - stack_size --> T3 ++ ++ // Use the bigger size for banging. ++ const int max_bang_size = (int)MAX2(JavaThread::stack_shadow_zone_size(), JavaThread::stack_guard_zone_size()); ++ ++ // add in the redzone and yellow size ++ __ move(AT, max_bang_size); ++ __ addu(T3, T3, AT); ++ ++ // check against the current stack bottom ++ __ slt(AT, T3, SP); ++ __ bne(AT, R0, after_frame_check); ++ __ delayed()->nop(); ++ ++ // Note: the restored frame is not necessarily interpreted. ++ // Use the shared runtime version of the StackOverflowError. ++ __ move(SP, Rsender); ++ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); ++ __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ ++ // all done with frame size check ++ __ bind(after_frame_check); ++} ++ ++// Allocate monitor and lock method (asm interpreter) ++// Rmethod - Method* ++void TemplateInterpreterGenerator::lock_method(void) { ++ // synchronize method ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++#ifdef ASSERT ++ { Label L; ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T0, T0, JVM_ACC_SYNCHRONIZED); ++ __ bne(T0, R0, L); ++ __ delayed()->nop(); ++ __ stop("method doesn't need synchronization"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // get synchronization object ++ { ++ Label done; ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, T0, JVM_ACC_STATIC); ++ __ ld(T0, LVP, Interpreter::local_offset_in_bytes(0)); ++ __ beq(T2, R0, done); ++ __ delayed()->nop(); ++ __ load_mirror(T0, Rmethod, T9); ++ __ bind(done); ++ } ++ // add space for monitor & lock ++ __ daddiu(SP, SP, (-1) * entry_size); // add space for a monitor entry ++ __ sd(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // set new monitor block top ++ __ sd(T0, SP, BasicObjectLock::obj_offset_in_bytes()); // store object ++ // FIXME: I do not know what lock_object will do and what it will need ++ __ move(c_rarg0, SP); // object address ++ __ lock_object(c_rarg0); ++} ++ ++// Generate a fixed interpreter frame. This is identical setup for ++// interpreted methods and for native methods hence the shared code. ++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { ++ ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- T0(sender's sp) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // initialize fixed part of activation frame ++ // sender's sp in Rsender ++ int i = 0; ++ int frame_size = 10; ++#ifndef CORE ++ ++frame_size; ++#endif ++ __ daddiu(SP, SP, (-frame_size) * wordSize); ++ __ sd(RA, SP, (frame_size - 1) * wordSize); // save return address ++ __ sd(FP, SP, (frame_size - 2) * wordSize); // save sender's fp ++ __ daddiu(FP, SP, (frame_size - 2) * wordSize); ++ __ sd(Rsender, FP, (-++i) * wordSize); // save sender's sp ++ __ sd(R0, FP,(-++i) * wordSize); //save last_sp as null ++ __ sd(LVP, FP, (-++i) * wordSize); // save locals offset ++ __ ld(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop ++ __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase ++ __ sd(Rmethod, FP, (-++i) * wordSize); // save Method* ++ // Get mirror and store it in the frame as GC root for this Method* ++ __ load_mirror(T2, Rmethod, T9); ++ __ sd(T2, FP, (-++i) * wordSize); // Mirror ++#ifndef CORE ++ if (ProfileInterpreter) { ++ Label method_data_continue; ++ __ ld(AT, Rmethod, in_bytes(Method::method_data_offset())); ++ __ beq(AT, R0, method_data_continue); ++ __ delayed()->nop(); ++ __ daddiu(AT, AT, in_bytes(MethodData::data_offset())); ++ __ bind(method_data_continue); ++ __ sd(AT, FP, (-++i) * wordSize); ++ } else { ++ __ sd(R0, FP, (-++i) * wordSize); ++ } ++#endif // !CORE ++ ++ __ ld(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld(T2, T2, ConstantPool::cache_offset_in_bytes()); ++ __ sd(T2, FP, (-++i) * wordSize); // set constant pool cache ++ if (native_call) { ++ __ sd(R0, FP, (-++i) * wordSize); // no bcp ++ } else { ++ __ sd(BCP, FP, (-++i) * wordSize); // set bcp ++ } ++ __ sd(SP, FP, (-++i) * wordSize); // reserve word for pointer to expression stack bottom ++ assert(i + 2 == frame_size, "i + 2 should be equal to frame_size"); ++} ++ ++// End of helpers ++ ++// Various method entries ++//------------------------------------------------------------------------------------------------------------------------ ++// ++// ++ ++// Method entry for java.lang.ref.Reference.get. ++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { ++ address entry = __ pc(); ++ Label slow_path; ++ __ b(slow_path); ++ __ delayed()->nop(); ++ ++ // generate a vanilla interpreter entry as the slow path ++ __ bind(slow_path); ++ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); ++ __ delayed()->nop(); ++ return entry; ++} ++ ++// Interpreter stub for calling a native method. (asm interpreter) ++// This sets up a somewhat different looking stack for calling the ++// native method than the typical interpreter frame setup. ++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ // Rsender: sender's sp ++ // Rmethod: Method* ++ address entry_point = __ pc(); ++ ++#ifndef CORE ++ const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset())); ++#endif ++ ++ // get parameter size (always needed) ++ // the size in the java stack ++ __ ld(V0, Rmethod, in_bytes(Method::const_offset())); ++ __ lhu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // native calls don't need the stack size check since they have no expression stack ++ // and the arguments are already on the stack and we only add a handful of words ++ // to the stack ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ ++ // for natives the size of locals is zero ++ ++ // compute beginning of parameters (S7) ++ __ dsll(LVP, V0, Address::times_8); ++ __ daddiu(LVP, LVP, (-1) * wordSize); ++ __ daddu(LVP, LVP, SP); ++ ++ ++ // add 2 zero-initialized slots for native calls ++ // 1 slot for native oop temp offset (setup via runtime) ++ // 1 slot for static native result handler3 (setup via runtime) ++ __ push2(R0, R0); ++ ++ // Layout of frame at this point ++ // [ method holder mirror ] <--- sp ++ // [ result type info ] ++ // [ argument word n-1 ] <--- T0 ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++#ifndef CORE ++ if (inc_counter) __ lw(T3, invocation_counter); // (pre-)fetch invocation count ++#endif ++ ++ // initialize fixed part of activation frame ++ generate_fixed_frame(true); ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- sender's sp ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ ++ // make sure method is native & not abstract ++#ifdef ASSERT ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_NATIVE); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_ABSTRACT); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(AT, (int)true); ++ __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, NULL, NULL); ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++#endif // CORE ++ ++ bang_stack_shadow_pages(true); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ if (synchronized) { ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { ++ Label L; ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, T0, JVM_ACC_SYNCHRONIZED); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // after method_lock, the layout of frame is as following ++ // ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ delayed()->nop(); ++ __ stop("broken stack frame setup in interpreter in asm"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ // work registers ++ const Register method = Rmethod; ++ const Register t = T8; ++ ++ __ get_method(method); ++ { ++ Label L, Lstatic; ++ __ ld(t,method,in_bytes(Method::const_offset())); ++ __ lhu(t, t, in_bytes(ConstMethod::size_of_parameters_offset())); ++ // MIPS n64 ABI: caller does not reserve space for the register auguments. ++ // A0 and A1(if needed) ++ __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, AT, JVM_ACC_STATIC); ++ __ beq(AT, R0, Lstatic); ++ __ delayed()->nop(); ++ __ daddiu(t, t, 1); ++ __ bind(Lstatic); ++ __ daddiu(t, t, -7); ++ __ blez(t, L); ++ __ delayed()->nop(); ++ __ dsll(t, t, Address::times_8); ++ __ dsubu(SP, SP, t); ++ __ bind(L); ++ } ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ __ move(AT, SP); ++ // [ ] <--- sp ++ // ... (size of parameters - 8 ) ++ // [ monitor entry ] ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // get signature handler ++ { ++ Label L; ++ __ ld(T9, method, in_bytes(Method::signature_handler_offset())); ++ __ bne(T9, R0, L); ++ __ delayed()->nop(); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld(T9, method, in_bytes(Method::signature_handler_offset())); ++ __ bind(L); ++ } ++ ++ // call signature handler ++ // FIXME: when change codes in InterpreterRuntime, note this point ++ // from: begin of parameters ++ assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code"); ++ // to: current sp ++ assert(InterpreterRuntime::SignatureHandlerGenerator::to () == SP, "adjust this code"); ++ // temp: T3 ++ assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t , "adjust this code"); ++ ++ __ jalr(T9); ++ __ delayed()->nop(); ++ __ get_method(method); ++ ++ // ++ // if native function is static, and its second parameter has type length of double word, ++ // and first parameter has type length of word, we have to reserve one word ++ // for the first parameter, according to mips o32 abi. ++ // if native function is not static, and its third parameter has type length of double word, ++ // and second parameter has type length of word, we have to reserve one word for the second ++ // parameter. ++ // ++ ++ ++ // result handler is in V0 ++ // set result handler ++ __ sd(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize); ++ ++#define FIRSTPARA_SHIFT_COUNT 5 ++#define SECONDPARA_SHIFT_COUNT 9 ++#define THIRDPARA_SHIFT_COUNT 13 ++#define PARA_MASK 0xf ++ ++ // pass mirror handle if static call ++ { ++ Label L; ++ __ lw(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, t, JVM_ACC_STATIC); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ ++ // get mirror ++ __ load_mirror(t, method, T9); ++ // copy mirror into activation frame ++ __ sd(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ // pass handle to mirror ++ __ daddiu(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ move(A1, t); ++ __ bind(L); ++ } ++ ++ // [ mthd holder mirror ptr ] <--- sp --------------------| (only for static method) ++ // [ ] | ++ // ... size of parameters(or +1) | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Mirror ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // get native function entry point ++ { Label L; ++ __ ld(T9, method, in_bytes(Method::native_function_offset())); ++ __ li(V1, SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); ++ __ bne(V1, T9, L); ++ __ delayed()->nop(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld(T9, method, in_bytes(Method::native_function_offset())); ++ __ bind(L); ++ } ++ ++ // pass JNIEnv ++ // native function in T9 ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ daddiu(t, thread, in_bytes(JavaThread::jni_environment_offset())); ++ __ move(A0, t); ++ // [ jni environment ] <--- sp ++ // [ mthd holder mirror ptr ] ---------------------------->| (only for static method) ++ // [ ] | ++ // ... size of parameters | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Mirror ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // set_last_Java_frame_before_call ++ __ sd(FP, thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a a stack traversal). It is enough that the pc() ++ // points into the right code segment. It does not have to be the correct return pc. ++ __ li(t, __ pc()); ++ __ sd(t, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ __ sd(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ ++ // change thread state ++#ifdef ASSERT ++ { ++ Label L; ++ __ lw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ daddiu(t, t, (-1) * _thread_in_Java); ++ __ beq(t, R0, L); ++ __ delayed()->nop(); ++ __ stop("Wrong thread state in native stub"); ++ __ bind(L); ++ } ++#endif ++ ++ __ move(t, _thread_in_native); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ // call native method ++ __ jalr(T9); ++ __ delayed()->nop(); ++ // result potentially in V0 or F0 ++ ++ ++ // via _last_native_pc and not via _last_jave_sp ++ // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. ++ // If the order changes or anything else is added to the stack the code in ++ // interpreter_frame_result will have to be changed. ++ //FIXME, should modify here ++ // save return value to keep the value from being destroyed by other calls ++ __ push(dtos); ++ __ push(ltos); ++ ++ // change thread state ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(t, _thread_in_native_trans); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) { ++ if (UseMembar) { ++ // Force this write out before the read below ++ __ sync(); ++ } else { ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(thread, A0); ++ } ++ } ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { Label Continue; ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are ++ // preserved and correspond to the bcp/locals pointers. So we do a runtime call ++ // by hand. ++ // ++ Label slow_path; ++ ++ __ safepoint_poll_acquire(slow_path, thread); ++ __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ delayed()->nop(); ++ __ bind(slow_path); ++ __ move(A0, thread); ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), ++ relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ move(t, _thread_in_Java); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ reset_last_Java_frame(thread, true); ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ // reset handle block ++ __ ld(t, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ sw(R0, t, JNIHandleBlock::top_offset_in_bytes()); ++ ++ // If result was an oop then unbox and save it in the frame ++ { ++ Label no_oop; ++ //FIXME, addi only support 16-bit imeditate ++ __ ld(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize); ++ __ li(T0, AbstractInterpreter::result_handler(T_OBJECT)); ++ __ bne(AT, T0, no_oop); ++ __ delayed()->nop(); ++ __ pop(ltos); ++ // Unbox oop result, e.g. JNIHandles::resolve value. ++ __ resolve_jobject(V0, thread, T9); ++ __ sd(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize); ++ // keep stack depth as expected by pushing oop which will eventually be discarded ++ __ push(ltos); ++ __ bind(no_oop); ++ } ++ { ++ Label no_reguard; ++ __ lw(t, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ move(AT, (u1)JavaThread::stack_guard_yellow_reserved_disabled); ++ __ bne(t, AT, no_reguard); ++ __ delayed()->nop(); ++ __ pushad(); ++ __ move(S5_heapbase, SP); ++ __ move(AT, -StackAlignmentInBytes); ++ __ andr(SP, SP, AT); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ move(SP, S5_heapbase); ++ __ popad(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(no_reguard); ++ } ++ // restore BCP to have legal interpreter frame, ++ // i.e., bci == 0 <=> BCP == code_base() ++ // Can't call_VM until bcp is within reasonable. ++ __ get_method(method); // method is junk from thread_in_native to now. ++ __ ld(BCP, method, in_bytes(Method::const_offset())); ++ __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset()))); ++ // handle exceptions (exception handling will handle unlocking!) ++ { ++ Label L; ++ __ ld(t, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(t, R0, L); ++ __ delayed()->nop(); ++ // Note: At some point we may want to unify this with the code used in ++ // call_VM_base(); ++ // i.e., we should use the StubRoutines::forward_exception code. For now this ++ // doesn't work here because the sp is not correctly set at this point. ++ __ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ ++ // do unlocking if necessary ++ { ++ Label L; ++ __ lw(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(t, t, JVM_ACC_SYNCHRONIZED); ++ __ beq(t, R0, L); ++ // the code below should be shared with interpreter macro assembler implementation ++ { ++ Label unlock; ++ // BasicObjectLock will be first in list, ++ // since this is a synchronized method. However, need ++ // to check that the object has not been unlocked by ++ // an explicit monitorexit bytecode. ++ __ delayed()->daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); ++ // address of first monitor ++ ++ __ ld(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ bne(t, R0, unlock); ++ __ delayed()->nop(); ++ ++ // Entry already unlocked, need to throw exception ++ __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ __ bind(unlock); ++ __ unlock_object(c_rarg0); ++ } ++ __ bind(L); ++ } ++ ++ // jvmti/jvmpi support ++ // Note: This must happen _after_ handling/throwing any exceptions since ++ // the exception handler code notifies the runtime of method exits ++ // too. If this happens before, method entry/exit notifications are ++ // not properly paired (was bug - gri 11/22/99). ++ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); ++ ++ // restore potential result in V0, ++ // call result handler to restore potential result in ST0 & handle result ++ ++ __ pop(ltos); ++ __ pop(dtos); ++ ++ __ ld(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize); ++ __ jalr(t); ++ __ delayed()->nop(); ++ ++ ++ // remove activation ++ __ ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp ++ __ ld(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address ++ __ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++#ifndef CORE ++ if (inc_counter) { ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ // entry_point is the beginning of this ++ // function and checks again for compiled code ++ } ++#endif ++ return entry_point; ++} ++ ++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { ++ // Quick & dirty stack overflow checking: bang the stack & handle trap. ++ // Note that we do the banging after the frame is setup, since the exception ++ // handling code expects to find a valid interpreter frame on the stack. ++ // Doing the banging earlier fails if the caller frame is not an interpreter ++ // frame. ++ // (Also, the exception throwing code expects to unlock any synchronized ++ // method receiever, so do the banging after locking the receiver.) ++ ++ // Bang each page in the shadow zone. We can't assume it's been done for ++ // an interpreter frame with greater than a page of locals, so each page ++ // needs to be checked. Only true for non-native. ++ if (UseStackBanging) { ++ const int page_size = os::vm_page_size(); ++ const int n_shadow_pages = ((int)JavaThread::stack_shadow_zone_size()) / page_size; ++ const int start_page = native_call ? n_shadow_pages : 1; ++ BLOCK_COMMENT("bang_stack_shadow_pages:"); ++ for (int pages = start_page; pages <= n_shadow_pages; pages++) { ++ __ bang_stack_with_offset(pages*page_size); ++ } ++ } ++} ++ ++// ++// Generic interpreted method entry to (asm) interpreter ++// ++// Layout of frame just at the entry ++// ++// [ argument word n-1 ] <--- sp ++// ... ++// [ argument word 0 ] ++// assume Method* in Rmethod before call this method. ++// prerequisites to the generated stub : the callee Method* in Rmethod ++// note you must save the caller bcp before call the generated stub ++// ++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ ++ // Rmethod: Method* ++ // Rsender: sender 's sp ++ address entry_point = __ pc(); ++ ++ const Address invocation_counter(Rmethod, ++ in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset())); ++ ++ // get parameter size (always needed) ++ __ ld(T3, Rmethod, in_bytes(Method::const_offset())); //T3 --> Rmethod._constMethod ++ __ lhu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i ++ // get size of locals in words to T2 ++ __ lhu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset())); ++ // T2 = no. of additional locals, locals include parameters ++ __ dsubu(T2, T2, V0); ++ ++ // see if we've got enough room on the stack for locals plus overhead. ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ generate_stack_overflow_check(); ++ // after this function, the layout of frame does not change ++ ++ // compute beginning of parameters (LVP) ++ __ dsll(LVP, V0, LogBytesPerWord); ++ __ daddiu(LVP, LVP, (-1) * wordSize); ++ __ daddu(LVP, LVP, SP); ++ ++ // T2 - # of additional locals ++ // allocate space for locals ++ // explicitly initialize locals ++ { ++ Label exit, loop; ++ __ beq(T2, R0, exit); ++ __ delayed()->nop(); ++ ++ __ bind(loop); ++ __ daddiu(SP, SP, (-1) * wordSize); ++ __ daddiu(T2, T2, -1); // until everything initialized ++ __ bne(T2, R0, loop); ++ __ delayed()->sd(R0, SP, 0); // initialize local variables ++ ++ __ bind(exit); ++ } ++ ++ // ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argument word n-1 ] <--- T0? ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // initialize fixed part of activation frame ++ ++ generate_fixed_frame(false); ++ ++ ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] <--- fp ++ // [ return address ] ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // make sure method is not native & not abstract ++#ifdef ASSERT ++ __ ld(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_NATIVE); ++ __ beq(T2, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_ABSTRACT); ++ __ beq(T2, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ move(AT, (int)true); ++ __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ ++ // mdp : T8 ++ // tmp1: T9 ++ // tmp2: T2 ++ __ profile_parameters_type(T8, T9, T2); ++ ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ Label profile_method; ++ Label profile_method_continue; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, ++ &profile_method, ++ &profile_method_continue); ++ if (ProfileInterpreter) { ++ __ bind(profile_method_continue); ++ } ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++ ++#endif // CORE ++ ++ bang_stack_shadow_pages(false); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ // ++ if (synchronized) { ++ // Allocate monitor and lock method ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { Label L; ++ __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, AT, JVM_ACC_SYNCHRONIZED); ++ __ beq(T2, R0, L); ++ __ delayed()->nop(); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // layout of frame after lock_method ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ delayed()->nop(); ++ __ stop("broken stack frame setup in interpreter in native"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ __ dispatch_next(vtos); ++ ++ // invocation counter overflow ++ if (inc_counter) { ++ if (ProfileInterpreter) { ++ // We have decided to profile this method in the interpreter ++ __ bind(profile_method); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ __ get_method(Rmethod); ++ __ b(profile_method_continue); ++ __ delayed()->nop(); ++ } ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ } ++ ++ return entry_point; ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateInterpreterGenerator::generate_throw_exception() { ++ // Entry point in previous activation (i.e., if the caller was ++ // interpreted) ++ Interpreter::_rethrow_exception_entry = __ pc(); ++ // Restore sp to interpreter_frame_last_sp even though we are going ++ // to empty the expression stack for the exception processing. ++ __ sd(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ // V0: exception ++ // V1: return address/pc that threw exception ++ __ restore_bcp(); // BCP points to call/send ++ __ restore_locals(); ++ ++ //add for compressedoops ++ __ reinit_heapbase(); ++ // Entry point for exceptions thrown within interpreter code ++ Interpreter::_throw_exception_entry = __ pc(); ++ // expression stack is undefined here ++ // V0: exception ++ // BCP: exception bcp ++ __ verify_oop(V0); ++ ++ // expression stack must be empty before entering the VM in case of an exception ++ __ empty_expression_stack(); ++ // find exception handler address and preserve exception oop ++ __ move(A1, V0); ++ __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1); ++ // V0: exception handler entry point ++ // V1: preserved exception oop ++ // S0: bcp for exception handler ++ __ push(V1); // push exception which is now the only value on the stack ++ __ jr(V0); // jump to exception handler (may be _remove_activation_entry!) ++ __ delayed()->nop(); ++ ++ // If the exception is not handled in the current frame the frame is removed and ++ // the exception is rethrown (i.e. exception continuation is _rethrow_exception). ++ // ++ // Note: At this point the bci is still the bxi for the instruction which caused ++ // the exception and the expression stack is empty. Thus, for any VM calls ++ // at this point, GC will find a legal oop map (with empty expression stack). ++ ++ // In current activation ++ // V0: exception ++ // BCP: exception bcp ++ ++ // ++ // JVMTI PopFrame support ++ // ++ ++ Interpreter::_remove_activation_preserving_args_entry = __ pc(); ++ __ empty_expression_stack(); ++ // Set the popframe_processing bit in pending_popframe_condition indicating that we are ++ // currently handling popframe, so that call_VMs that may happen later do not trigger new ++ // popframe handling cycles. ++#ifndef OPT_THREAD ++ Register thread = T2; ++ __ get_thread(T2); ++#else ++ Register thread = TREG; ++#endif ++ __ lw(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ __ ori(T3, T3, JavaThread::popframe_processing_bit); ++ __ sw(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#ifndef CORE ++ { ++ // Check to see whether we are returning to a deoptimized frame. ++ // (The PopFrame call ensures that the caller of the popped frame is ++ // either interpreted or compiled and deoptimizes it if compiled.) ++ // In this case, we can't call dispatch_next() after the frame is ++ // popped, but instead must save the incoming arguments and restore ++ // them after deoptimization has occurred. ++ // ++ // Note that we don't compare the return PC against the ++ // deoptimization blob's unpack entry because of the presence of ++ // adapter frames in C2. ++ Label caller_not_deoptimized; ++ __ ld(A0, FP, frame::return_addr_offset * wordSize); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0); ++ __ bne(V0, R0, caller_not_deoptimized); ++ __ delayed()->nop(); ++ ++ // Compute size of arguments for saving when returning to deoptimized caller ++ __ get_method(A1); ++ __ verify_oop(A1); ++ __ ld( A1, A1, in_bytes(Method::const_offset())); ++ __ lhu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset())); ++ __ shl(A1, Interpreter::logStackElementSize); ++ __ restore_locals(); ++ __ dsubu(A2, LVP, A1); ++ __ daddiu(A2, A2, wordSize); ++ // Save these arguments ++#ifndef OPT_THREAD ++ __ get_thread(A0); ++#else ++ __ move(A0, TREG); ++#endif ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2); ++ ++ __ remove_activation(vtos, T9, false, false, false); ++ ++ // Inform deoptimization that it is responsible for restoring these arguments ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(AT, JavaThread::popframe_force_deopt_reexecution_bit); ++ __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ // Continue in deoptimization handler ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ __ bind(caller_not_deoptimized); ++ } ++#endif /* !CORE */ ++ ++ __ remove_activation(vtos, T3, ++ /* throw_monitor_exception */ false, ++ /* install_monitor_exception */ false, ++ /* notify_jvmdi */ false); ++ ++ // Clear the popframe condition flag ++ // Finish with popframe handling ++ // A previous I2C followed by a deoptimization might have moved the ++ // outgoing arguments further up the stack. PopFrame expects the ++ // mutations to those outgoing arguments to be preserved and other ++ // constraints basically require this frame to look exactly as ++ // though it had previously invoked an interpreted activation with ++ // no space between the top of the expression stack (current ++ // last_sp) and the top of stack. Rather than force deopt to ++ // maintain this kind of invariant all the time we call a small ++ // fixup routine to move the mutated arguments onto the top of our ++ // expression stack if necessary. ++ __ move(T8, SP); ++ __ ld(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // PC must point into interpreter here ++ __ set_last_Java_frame(thread, noreg, FP, __ pc()); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2); ++ __ reset_last_Java_frame(thread, true); ++ // Restore the last_sp and null it out ++ __ ld(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ ++ ++ __ move(AT, JavaThread::popframe_inactive); ++ __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++ // Finish with popframe handling ++ __ restore_bcp(); ++ __ restore_locals(); ++#ifndef CORE ++ // The method data pointer was incremented already during ++ // call profiling. We have to restore the mdp for the current bcp. ++ if (ProfileInterpreter) { ++ __ set_method_data_pointer_for_bcp(); ++ } ++#endif // !CORE ++ // Clear the popframe condition flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(AT, JavaThread::popframe_inactive); ++ __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#if INCLUDE_JVMTI ++ { ++ Label L_done; ++ ++ __ lbu(AT, BCP, 0); ++ __ daddiu(AT, AT, -1 * Bytecodes::_invokestatic); ++ __ bne(AT, R0, L_done); ++ __ delayed()->nop(); ++ ++ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. ++ // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. ++ ++ __ get_method(T9); ++ __ ld(T8, LVP, 0); ++ __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T9, BCP); ++ ++ __ beq(T8, R0, L_done); ++ __ delayed()->nop(); ++ ++ __ sd(T8, SP, 0); ++ __ bind(L_done); ++ } ++#endif // INCLUDE_JVMTI ++ ++ __ dispatch_next(vtos); ++ // end of PopFrame support ++ ++ Interpreter::_remove_activation_entry = __ pc(); ++ ++ // preserve exception over this code sequence ++ __ pop(T0); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ sd(T0, thread, in_bytes(JavaThread::vm_result_offset())); ++ // remove the activation (without doing throws on illegalMonitorExceptions) ++ __ remove_activation(vtos, T3, false, true, false); ++ // restore exception ++ __ get_vm_result(T0, thread); ++ __ verify_oop(T0); ++ ++ // In between activations - previous activation type unknown yet ++ // compute continuation point - the continuation point expects ++ // the following registers set up: ++ // ++ // T0: exception ++ // T1: return address/pc that threw exception ++ // SP: expression stack of caller ++ // FP: fp of caller ++ __ push2(T0, T3); // save exception and return address ++ __ move(A1, T3); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T9, V0); // save exception handler ++ __ pop2(V0, V1); // restore return address and exception ++ ++ // Note that an "issuing PC" is actually the next PC after the call ++ __ jr(T9); // jump to exception handler of caller ++ __ delayed()->nop(); ++} ++ ++ ++// ++// JVMTI ForceEarlyReturn support ++// ++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { ++ address entry = __ pc(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ load_earlyret_value(state); ++ ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ ld_ptr(T9, TREG, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address cond_addr(T9, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ // Clear the earlyret state ++ __ move(AT, JvmtiThreadState::earlyret_inactive); ++ __ sw(AT, cond_addr); ++ __ sync(); ++ ++ ++ __ remove_activation(state, T0, ++ false, /* throw_monitor_exception */ ++ false, /* install_monitor_exception */ ++ true); /* notify_jvmdi */ ++ __ sync(); ++ __ jr(T0); ++ __ delayed()->nop(); ++ return entry; ++} // end of ForceEarlyReturn support ++ ++ ++//----------------------------------------------------------------------------- ++// Helper for vtos entry point generation ++ ++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, ++ address& bep, ++ address& cep, ++ address& sep, ++ address& aep, ++ address& iep, ++ address& lep, ++ address& fep, ++ address& dep, ++ address& vep) { ++ assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); ++ Label L; ++ fep = __ pc(); __ push(ftos); __ b(L); __ delayed()->nop(); ++ dep = __ pc(); __ push(dtos); __ b(L); __ delayed()->nop(); ++ lep = __ pc(); __ push(ltos); __ b(L); __ delayed()->nop(); ++ aep =__ pc(); __ push(atos); __ b(L); __ delayed()->nop(); ++ bep = cep = sep = ++ iep = __ pc(); __ push(itos); ++ vep = __ pc(); ++ __ bind(L); ++ generate_and_dispatch(t); ++} ++ ++ ++/* ++//----------------------------------------------------------------------------- ++// Generation of individual instructions ++ ++// helpers for generate_and_dispatch ++ ++ ++InterpreterGenerator::InterpreterGenerator(StubQueue* code) ++ : TemplateInterpreterGenerator(code) { ++ generate_all(); // down here so it can be "virtual" ++} ++*/ ++ ++//----------------------------------------------------------------------------- ++ ++// Non-product code ++#ifndef PRODUCT ++address TemplateInterpreterGenerator::generate_trace_code(TosState state) { ++ address entry = __ pc(); ++ ++ // prepare expression stack ++ __ push(state); // save tosca ++ ++ // tos & tos2 ++ // trace_bytecode need actually 4 args, the last two is tos&tos2 ++ // this work fine for x86. but mips o32 call convention will store A2-A3 ++ // to the stack position it think is the tos&tos2 ++ // when the expression stack have no more than 2 data, error occur. ++ __ ld(A2, SP, 0); ++ __ ld(A3, SP, 1 * wordSize); ++ ++ // pass arguments & call tracer ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3); ++ __ move(RA, V0); // make sure return address is not destroyed by pop(state) ++ ++ // restore expression stack ++ __ pop(state); // restore tosca ++ ++ // return ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return entry; ++} ++ ++void TemplateInterpreterGenerator::count_bytecode() { ++ __ li(T8, (long)&BytecodeCounter::_counter_value); ++ __ lw(AT, T8, 0); ++ __ daddiu(AT, AT, 1); ++ __ sw(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ++ __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]); ++ __ lw(AT, T8, 0); ++ __ daddiu(AT, AT, 1); ++ __ sw(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ lw(T9, T8, 0); ++ __ dsrl(T9, T9, BytecodePairHistogram::log2_number_of_codes); ++ __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); ++ __ orr(T9, T9, T8); ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ sw(T9, T8, 0); ++ __ dsll(T9, T9, 2); ++ __ li(T8, (long)BytecodePairHistogram::_counters); ++ __ daddu(T8, T8, T9); ++ __ lw(AT, T8, 0); ++ __ daddiu(AT, AT, 1); ++ __ sw(AT, T8, 0); ++} ++ ++ ++void TemplateInterpreterGenerator::trace_bytecode(Template* t) { ++ // Call a little run-time stub to avoid blow-up for each bytecode. ++ // The run-time runtime saves the right registers, depending on ++ // the tosca in-state for the given template. ++ ++ address entry = Interpreter::trace_code(t->tos_in()); ++ assert(entry != NULL, "entry must have been generated"); ++ __ call(entry, relocInfo::none); ++ __ delayed()->nop(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++} ++ ++ ++void TemplateInterpreterGenerator::stop_interpreter_at() { ++ Label L; ++ __ li(T8, long(&BytecodeCounter::_counter_value)); ++ __ lw(T8, T8, 0); ++ __ move(AT, StopInterpreterAt); ++ __ bne(T8, AT, L); ++ __ delayed()->nop(); ++ __ brk(5); ++ __ delayed()->nop(); ++ __ bind(L); ++} ++#endif // !PRODUCT +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/templateTable_mips_64.cpp b/src/hotspot/cpu/mips/templateTable_mips_64.cpp +--- a/src/hotspot/cpu/mips/templateTable_mips_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/templateTable_mips_64.cpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,4688 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/templateTable.hpp" ++#include "memory/universe.hpp" ++#include "oops/methodData.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "utilities/macros.hpp" ++ ++ ++#ifndef CC_INTERP ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Platform-dependent initialization ++ ++void TemplateTable::pd_initialize() { ++ // No mips specific initialization ++} ++ ++// Address computation: local variables ++ ++static inline Address iaddress(int n) { ++ return Address(LVP, Interpreter::local_offset_in_bytes(n)); ++} ++ ++static inline Address laddress(int n) { ++ return iaddress(n + 1); ++} ++ ++static inline Address faddress(int n) { ++ return iaddress(n); ++} ++ ++static inline Address daddress(int n) { ++ return laddress(n); ++} ++ ++static inline Address aaddress(int n) { ++ return iaddress(n); ++} ++static inline Address haddress(int n) { return iaddress(n + 0); } ++ ++ ++static inline Address at_sp() { return Address(SP, 0); } ++static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); } ++static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); } ++ ++// At top of Java expression stack which may be different than sp(). It ++// isn't for category 1 objects. ++static inline Address at_tos () { ++ Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0)); ++ return tos; ++} ++ ++static inline Address at_tos_p1() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(1)); ++} ++ ++static inline Address at_tos_p2() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(2)); ++} ++ ++static inline Address at_tos_p3() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(3)); ++} ++ ++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator ++Address TemplateTable::at_bcp(int offset) { ++ assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); ++ return Address(BCP, offset); ++} ++ ++// Miscelaneous helper routines ++// Store an oop (or NULL) at the address described by obj. ++// If val == noreg this means store a NULL ++ ++static void do_oop_store(InterpreterMacroAssembler* _masm, ++ Address dst, ++ Register val, ++ DecoratorSet decorators = 0) { ++ assert(val == noreg || val == V0, "parameter is just for looks"); ++ __ store_heap_oop(dst, val, T9, T1, decorators); ++} ++ ++static void do_oop_load(InterpreterMacroAssembler* _masm, ++ Address src, ++ Register dst, ++ DecoratorSet decorators = 0) { ++ __ load_heap_oop(dst, src, T9, T1, decorators); ++} ++ ++// bytecode folding ++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, ++ Register tmp_reg, bool load_bc_into_bc_reg/*=true*/, ++ int byte_no) { ++ if (!RewriteBytecodes) return; ++ Label L_patch_done; ++ ++ switch (bc) { ++ case Bytecodes::_fast_aputfield: ++ case Bytecodes::_fast_bputfield: ++ case Bytecodes::_fast_zputfield: ++ case Bytecodes::_fast_cputfield: ++ case Bytecodes::_fast_dputfield: ++ case Bytecodes::_fast_fputfield: ++ case Bytecodes::_fast_iputfield: ++ case Bytecodes::_fast_lputfield: ++ case Bytecodes::_fast_sputfield: ++ { ++ // We skip bytecode quickening for putfield instructions when ++ // the put_code written to the constant pool cache is zero. ++ // This is required so that every execution of this instruction ++ // calls out to InterpreterRuntime::resolve_get_put to do ++ // additional, required work. ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ assert(load_bc_into_bc_reg, "we use bc_reg as temp"); ++ __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1); ++ __ daddiu(bc_reg, R0, bc); ++ __ beq(tmp_reg, R0, L_patch_done); ++ __ delayed()->nop(); ++ } ++ break; ++ default: ++ assert(byte_no == -1, "sanity"); ++ // the pair bytecodes have already done the load. ++ if (load_bc_into_bc_reg) { ++ __ move(bc_reg, bc); ++ } ++ } ++ ++ if (JvmtiExport::can_post_breakpoint()) { ++ Label L_fast_patch; ++ // if a breakpoint is present we can't rewrite the stream directly ++ __ lbu(tmp_reg, at_bcp(0)); ++ __ move(AT, Bytecodes::_breakpoint); ++ __ bne(tmp_reg, AT, L_fast_patch); ++ __ delayed()->nop(); ++ ++ __ get_method(tmp_reg); ++ // Let breakpoint table handling rewrite to quicker bytecode ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg); ++ ++ __ b(L_patch_done); ++ __ delayed()->nop(); ++ __ bind(L_fast_patch); ++ } ++ ++#ifdef ASSERT ++ Label L_okay; ++ __ lbu(tmp_reg, at_bcp(0)); ++ __ move(AT, (int)Bytecodes::java_code(bc)); ++ __ beq(tmp_reg, AT, L_okay); ++ __ delayed()->nop(); ++ __ beq(tmp_reg, bc_reg, L_patch_done); ++ __ delayed()->nop(); ++ __ stop("patching the wrong bytecode"); ++ __ bind(L_okay); ++#endif ++ ++ // patch bytecode ++ __ sb(bc_reg, at_bcp(0)); ++ __ bind(L_patch_done); ++} ++ ++ ++// Individual instructions ++ ++void TemplateTable::nop() { ++ transition(vtos, vtos); ++ // nothing to do ++} ++ ++void TemplateTable::shouldnotreachhere() { ++ transition(vtos, vtos); ++ __ stop("shouldnotreachhere bytecode"); ++} ++ ++void TemplateTable::aconst_null() { ++ transition(vtos, atos); ++ __ move(FSR, R0); ++} ++ ++void TemplateTable::iconst(int value) { ++ transition(vtos, itos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ move(FSR, value); ++ } ++} ++ ++void TemplateTable::lconst(int value) { ++ transition(vtos, ltos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ move(FSR, value); ++ } ++} ++ ++void TemplateTable::fconst(int value) { ++ transition(vtos, ftos); ++ switch( value ) { ++ case 0: __ mtc1(R0, FSF); return; ++ case 1: __ addiu(AT, R0, 1); break; ++ case 2: __ addiu(AT, R0, 2); break; ++ default: ShouldNotReachHere(); ++ } ++ __ mtc1(AT, FSF); ++ __ cvt_s_w(FSF, FSF); ++} ++ ++void TemplateTable::dconst(int value) { ++ transition(vtos, dtos); ++ switch( value ) { ++ case 0: __ dmtc1(R0, FSF); ++ return; ++ case 1: __ daddiu(AT, R0, 1); ++ __ dmtc1(AT, FSF); ++ __ cvt_d_w(FSF, FSF); ++ break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::bipush() { ++ transition(vtos, itos); ++ __ lb(FSR, at_bcp(1)); ++} ++ ++void TemplateTable::sipush() { ++ transition(vtos, itos); ++ __ lb(FSR, BCP, 1); ++ __ lbu(AT, BCP, 2); ++ __ dsll(FSR, FSR, 8); ++ __ orr(FSR, FSR, AT); ++} ++ ++// T1 : tags ++// T2 : index ++// T3 : cpool ++// T8 : tag ++void TemplateTable::ldc(bool wide) { ++ transition(vtos, vtos); ++ Label call_ldc, notFloat, notClass, notInt, Done; ++ // get index in cpool ++ if (wide) { ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ } else { ++ __ lbu(T2, at_bcp(1)); ++ } ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type ++ if (UseLEXT1 && Assembler::is_simm(sizeof(tags_offset), 8)) { ++ __ gslbx(T1, T1, T2, tags_offset); ++ } else { ++ __ daddu(AT, T1, T2); ++ __ lb(T1, AT, tags_offset); ++ } ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ //now T1 is the tag ++ ++ // unresolved class - get the resolved class ++ __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass); ++ __ beq(AT, R0, call_ldc); ++ __ delayed()->nop(); ++ ++ // unresolved class in error (resolution failed) - call into runtime ++ // so that the same error from first resolution attempt is thrown. ++ __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError); ++ __ beq(AT, R0, call_ldc); ++ __ delayed()->nop(); ++ ++ // resolved class - need to call vm to get java mirror of the class ++ __ daddiu(AT, T1, - JVM_CONSTANT_Class); ++ __ bne(AT, R0, notClass); ++ __ delayed()->dsll(T2, T2, Address::times_8); ++ ++ __ bind(call_ldc); ++ __ move(A1, wide); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1); ++ //__ push(atos); ++ __ daddiu(SP, SP, - Interpreter::stackElementSize); ++ __ b(Done); ++ __ delayed()->sd(FSR, SP, 0); // added for performance issue ++ ++ __ bind(notClass); ++ __ daddiu(AT, T1, -JVM_CONSTANT_Float); ++ __ bne(AT, R0, notFloat); ++ __ delayed()->nop(); ++ // ftos ++ if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) { ++ __ gslwxc1(FSF, T3, T2, base_offset); ++ } else { ++ __ daddu(AT, T3, T2); ++ __ lwc1(FSF, AT, base_offset); ++ } ++ //__ push_f(); ++ __ daddiu(SP, SP, - Interpreter::stackElementSize); ++ __ b(Done); ++ __ delayed()->swc1(FSF, SP, 0); ++ ++ __ bind(notFloat); ++ __ daddiu(AT, T1, -JVM_CONSTANT_Integer); ++ __ bne(AT, R0, notInt); ++ __ delayed()->nop(); ++ // itos ++ if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) { ++ __ gslwx(FSR, T3, T2, base_offset); ++ } else { ++ __ daddu(T0, T3, T2); ++ __ lw(FSR, T0, base_offset); ++ } ++ __ push(itos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // assume the tag is for condy; if not, the VM runtime will tell us ++ __ bind(notInt); ++ condy_helper(Done); ++ ++ __ bind(Done); ++} ++ ++void TemplateTable::condy_helper(Label& Done) { ++ const Register obj = FSR; ++ const Register off = SSR; ++ const Register flags = T3; ++ const Register rarg = A1; ++ __ move(rarg, (int)bytecode()); ++ __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg); ++ __ get_vm_result_2(flags, TREG); ++ // VMr = obj = base address to find primitive value to push ++ // VMr2 = flags = (tos, off) using format of CPCE::_flags ++ __ andi(off, flags, ConstantPoolCacheEntry::field_index_mask); ++ __ daddu(obj, off, obj); ++ const Address field(obj, 0 * wordSize); ++ ++ // What sort of thing are we loading? ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ ++ switch (bytecode()) { ++ case Bytecodes::_ldc: ++ case Bytecodes::_ldc_w: ++ { ++ // tos in (itos, ftos, stos, btos, ctos, ztos) ++ Label notInt, notFloat, notShort, notByte, notChar, notBool; ++ __ daddiu(AT, flags, -itos); ++ __ bne(AT, R0, notInt); ++ __ delayed()->nop(); ++ // itos ++ __ ld(obj, field); ++ __ push(itos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notInt); ++ __ daddiu(AT, flags, -ftos); ++ __ bne(AT, R0, notFloat); ++ __ delayed()->nop(); ++ // ftos ++ __ lwc1(FSF, field); ++ __ push(ftos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notFloat); ++ __ daddiu(AT, flags, -stos); ++ __ bne(AT, R0, notShort); ++ __ delayed()->nop(); ++ // stos ++ __ lh(obj, field); ++ __ push(stos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notShort); ++ __ daddiu(AT, flags, -btos); ++ __ bne(AT, R0, notByte); ++ __ delayed()->nop(); ++ // btos ++ __ lb(obj, field); ++ __ push(btos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notByte); ++ __ daddiu(AT, flags, -ctos); ++ __ bne(AT, R0, notChar); ++ __ delayed()->nop(); ++ // ctos ++ __ lhu(obj, field); ++ __ push(ctos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notChar); ++ __ daddiu(AT, flags, -ztos); ++ __ bne(AT, R0, notBool); ++ __ delayed()->nop(); ++ // ztos ++ __ lbu(obj, field); ++ __ push(ztos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notBool); ++ break; ++ } ++ ++ case Bytecodes::_ldc2_w: ++ { ++ Label notLong, notDouble; ++ __ daddiu(AT, flags, -ltos); ++ __ bne(AT, R0, notLong); ++ __ delayed()->nop(); ++ // ltos ++ __ ld(obj, field); ++ __ push(ltos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notLong); ++ __ daddiu(AT, flags, -dtos); ++ __ bne(AT, R0, notDouble); ++ __ delayed()->nop(); ++ // dtos ++ __ ldc1(FSF, field); ++ __ push(dtos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notDouble); ++ break; ++ } ++ ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ __ stop("bad ldc/condy"); ++} ++ ++// Fast path for caching oop constants. ++void TemplateTable::fast_aldc(bool wide) { ++ transition(vtos, atos); ++ ++ Register result = FSR; ++ Register tmp = SSR; ++ Register rarg = A1; ++ int index_size = wide ? sizeof(u2) : sizeof(u1); ++ ++ Label resolved; ++ ++ // We are resolved if the resolved reference cache entry contains a ++ // non-null object (String, MethodType, etc.) ++ assert_different_registers(result, tmp); ++ __ get_cache_index_at_bcp(tmp, 1, index_size); ++ __ load_resolved_reference_at_index(result, tmp, T9); ++ __ bne(result, R0, resolved); ++ __ delayed()->nop(); ++ ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); ++ // first time invocation - must resolve first ++ int i = (int)bytecode(); ++ __ move(rarg, i); ++ __ call_VM(result, entry, rarg); ++ ++ __ bind(resolved); ++ ++ { // Check for the null sentinel. ++ // If we just called the VM, it already did the mapping for us, ++ // but it's harmless to retry. ++ Label notNull; ++ __ set64(rarg, (long)Universe::the_null_sentinel_addr()); ++ __ ld_ptr(tmp, Address(rarg)); ++ __ bne(tmp, result, notNull); ++ __ delayed()->nop(); ++ __ xorr(result, result, result); // NULL object reference ++ __ bind(notNull); ++ } ++ ++ if (VerifyOops) { ++ __ verify_oop(result); ++ } ++} ++ ++ ++// used register: T2, T3, T1 ++// T2 : index ++// T3 : cpool ++// T1 : tag ++void TemplateTable::ldc2_w() { ++ transition(vtos, vtos); ++ Label notDouble, notLong, Done; ++ ++ // get index in cpool ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type in T1 ++ if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) { ++ __ gslbx(T1, T1, T2, tags_offset); ++ } else { ++ __ daddu(AT, T1, T2); ++ __ lb(T1, AT, tags_offset); ++ } ++ ++ __ daddiu(AT, T1, -JVM_CONSTANT_Double); ++ __ bne(AT, R0, notDouble); ++ __ delayed()->nop(); ++ ++ // dtos ++ __ dsll(T2, T2, Address::times_8); ++ if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) { ++ __ gsldxc1(FSF, T3, T2, base_offset); ++ } else { ++ __ daddu(AT, T3, T2); ++ __ ldc1(FSF, AT, base_offset); ++ } ++ __ push(dtos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notDouble); ++ __ daddiu(AT, T1, -JVM_CONSTANT_Long); ++ __ bne(AT, R0, notLong); ++ __ delayed()->nop(); ++ ++ // ltos ++ __ dsll(T2, T2, Address::times_8); ++ if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) { ++ __ gsldx(FSR, T3, T2, base_offset); ++ } else { ++ __ daddu(AT, T3, T2); ++ __ ld(FSR, AT, base_offset); ++ } ++ __ push(ltos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notLong); ++ condy_helper(Done); ++ ++ __ bind(Done); ++} ++ ++// we compute the actual local variable address here ++// the x86 dont do so for it has scaled index memory access model, we dont have, so do here ++void TemplateTable::locals_index(Register reg, int offset) { ++ __ lbu(reg, at_bcp(offset)); ++ __ dsll(reg, reg, Address::times_8); ++ __ dsubu(reg, LVP, reg); ++} ++ ++void TemplateTable::iload() { ++ iload_internal(); ++} ++ ++void TemplateTable::nofast_iload() { ++ iload_internal(may_not_rewrite); ++} ++ ++// this method will do bytecode folding of the two form: ++// iload iload iload caload ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::iload_internal(RewriteControl rc) { ++ transition(vtos, itos); ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); ++ // if _iload, wait to rewrite to iload2. We only want to rewrite the ++ // last two iloads in a pair. Comparing against fast_iload means that ++ // the next bytecode is neither an iload or a caload, and therefore ++ // an iload pair. ++ __ move(AT, Bytecodes::_iload); ++ __ beq(AT, T2, done); ++ __ delayed()->nop(); ++ ++ __ move(T3, Bytecodes::_fast_iload2); ++ __ move(AT, Bytecodes::_fast_iload); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // if _caload, rewrite to fast_icaload ++ __ move(T3, Bytecodes::_fast_icaload); ++ __ move(AT, Bytecodes::_caload); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // rewrite so iload doesn't check again. ++ __ move(T3, Bytecodes::_fast_iload); ++ ++ // rewrite ++ // T3 : fast bytecode ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_iload, T3, T2, false); ++ __ bind(done); ++ } ++ ++ // Get the local value into tos ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload2() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++ __ push(itos); ++ locals_index(T2, 3); ++ __ lw(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::lload() { ++ transition(vtos, ltos); ++ locals_index(T2); ++ __ ld(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fload() { ++ transition(vtos, ftos); ++ locals_index(T2); ++ __ lwc1(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::dload() { ++ transition(vtos, dtos); ++ locals_index(T2); ++ __ ldc1(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::aload() { ++ transition(vtos, atos); ++ locals_index(T2); ++ __ ld(FSR, T2, 0); ++} ++ ++void TemplateTable::locals_index_wide(Register reg) { ++ __ get_unsigned_2_byte_index_at_bcp(reg, 2); ++ __ dsll(reg, reg, Address::times_8); ++ __ dsubu(reg, LVP, reg); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_iload() { ++ transition(vtos, itos); ++ locals_index_wide(T2); ++ __ ld(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_lload() { ++ transition(vtos, ltos); ++ locals_index_wide(T2); ++ __ ld(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_fload() { ++ transition(vtos, ftos); ++ locals_index_wide(T2); ++ __ lwc1(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_dload() { ++ transition(vtos, dtos); ++ locals_index_wide(T2); ++ __ ldc1(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_aload() { ++ transition(vtos, atos); ++ locals_index_wide(T2); ++ __ ld(FSR, T2, 0); ++} ++ ++// we use A2 as the regiser for index, BE CAREFUL! ++// we dont use our tge 29 now, for later optimization ++void TemplateTable::index_check(Register array, Register index) { ++ // Pop ptr into array ++ __ pop_ptr(array); ++ index_check_without_pop(array, index); ++} ++ ++void TemplateTable::index_check_without_pop(Register array, Register index) { ++ // destroys A2 ++ // check array ++ __ null_check(array, arrayOopDesc::length_offset_in_bytes()); ++ ++ // sign extend since tos (index) might contain garbage in upper bits ++ __ sll(index, index, 0); ++ ++ // check index ++ Label ok; ++ __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); ++#ifndef OPT_RANGECHECK ++ __ sltu(AT, index, AT); ++ __ bne(AT, R0, ok); ++ __ delayed()->nop(); ++ ++ //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2 ++ if (A1 != array) __ move(A1, array); ++ if (A2 != index) __ move(A2, index); ++ __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); ++ __ delayed()->nop(); ++ __ bind(ok); ++#else ++ __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); ++ __ move(A2, index); ++ __ tgeu(A2, AT, 29); ++#endif ++} ++ ++void TemplateTable::iaload() { ++ transition(itos, itos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, 2); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ __ warn("iaload Unimplemented yet"); ++ __ gslwle(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::laload() { ++ transition(itos, ltos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, Address::times_8); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); ++ ++ __ warn("laload Unimplemented yet"); ++ __ gsldle(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(AT, FSR, Address::times_8); ++ __ daddu(T9, SSR, AT); ++ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T9, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::faload() { ++ transition(itos, ftos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ shl(FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ shl(AT, 2); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++ ++ __ warn("faload Unimplemented yet"); ++ __ gslwlec1(FSF, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ shl(FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::daload() { ++ transition(itos, dtos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, 3); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, 3); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); ++ ++ __ warn("daload Unimplemented yet"); ++ __ gsldlec1(FSF, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(AT, FSR, 3); ++ __ daddu(T9, SSR, AT); ++ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T9, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::aaload() { ++ transition(itos, atos); ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8); ++ __ daddu(FSR, SSR, FSR); ++ //add for compressedoops ++ do_oop_load(_masm, ++ Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), ++ FSR, ++ IS_ARRAY); ++} ++ ++void TemplateTable::baload() { ++ transition(itos, itos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR:index ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound ++ ++ __ warn("baload Unimplemented yet"); ++ __ gslble(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::caload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, Address::times_2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); ++} ++ ++// iload followed by caload frequent pair ++// used register : T2 ++// T2 : index ++void TemplateTable::fast_icaload() { ++ transition(vtos, itos); ++ // load index out of locals ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, 1); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); ++} ++ ++void TemplateTable::saload() { ++ transition(itos, itos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, Address::times_2); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, Address::times_2); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT)); ++ ++ __ warn("saload Unimplemented yet"); ++ __ gslhle(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, Address::times_2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::iload(int n) { ++ transition(vtos, itos); ++ __ lw(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lload(int n) { ++ transition(vtos, ltos); ++ __ ld(FSR, laddress(n)); ++} ++ ++void TemplateTable::fload(int n) { ++ transition(vtos, ftos); ++ __ lwc1(FSF, faddress(n)); ++} ++ ++void TemplateTable::dload(int n) { ++ transition(vtos, dtos); ++ __ ldc1(FSF, laddress(n)); ++} ++ ++void TemplateTable::aload(int n) { ++ transition(vtos, atos); ++ __ ld(FSR, aaddress(n)); ++} ++ ++void TemplateTable::aload_0() { ++ aload_0_internal(); ++} ++ ++void TemplateTable::nofast_aload_0() { ++ aload_0_internal(may_not_rewrite); ++} ++ ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::aload_0_internal(RewriteControl rc) { ++ transition(vtos, atos); ++ // According to bytecode histograms, the pairs: ++ // ++ // _aload_0, _fast_igetfield ++ // _aload_0, _fast_agetfield ++ // _aload_0, _fast_fgetfield ++ // ++ // occur frequently. If RewriteFrequentPairs is set, the (slow) ++ // _aload_0 bytecode checks if the next bytecode is either ++ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then ++ // rewrites the current bytecode into a pair bytecode; otherwise it ++ // rewrites the current bytecode into _fast_aload_0 that doesn't do ++ // the pair check anymore. ++ // ++ // Note: If the next bytecode is _getfield, the rewrite must be ++ // delayed, otherwise we may miss an opportunity for a pair. ++ // ++ // Also rewrite frequent pairs ++ // aload_0, aload_1 ++ // aload_0, iload_1 ++ // These bytecodes with a small amount of code are most profitable ++ // to rewrite ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); ++ ++ // do actual aload_0 ++ aload(0); ++ ++ // if _getfield then wait with rewrite ++ __ move(AT, Bytecodes::_getfield); ++ __ beq(AT, T2, done); ++ __ delayed()->nop(); ++ ++ // if _igetfield then reqrite to _fast_iaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_iaccess_0); ++ __ move(AT, Bytecodes::_fast_igetfield); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // if _agetfield then reqrite to _fast_aaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_aaccess_0); ++ __ move(AT, Bytecodes::_fast_agetfield); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // if _fgetfield then reqrite to _fast_faccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_faccess_0); ++ __ move(AT, Bytecodes::_fast_fgetfield); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // else rewrite to _fast_aload0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_aload_0); ++ ++ // rewrite ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_aload_0, T3, T2, false); ++ ++ __ bind(done); ++ } else { ++ aload(0); ++ } ++} ++ ++void TemplateTable::istore() { ++ transition(itos, vtos); ++ locals_index(T2); ++ __ sw(FSR, T2, 0); ++} ++ ++void TemplateTable::lstore() { ++ transition(ltos, vtos); ++ locals_index(T2); ++ __ sd(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::fstore() { ++ transition(ftos, vtos); ++ locals_index(T2); ++ __ swc1(FSF, T2, 0); ++} ++ ++void TemplateTable::dstore() { ++ transition(dtos, vtos); ++ locals_index(T2); ++ __ sdc1(FSF, T2, -wordSize); ++} ++ ++void TemplateTable::astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index(T2); ++ __ sd(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_istore() { ++ transition(vtos, vtos); ++ __ pop_i(FSR); ++ locals_index_wide(T2); ++ __ sd(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_lstore() { ++ transition(vtos, vtos); ++ __ pop_l(FSR); ++ locals_index_wide(T2); ++ __ sd(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::wide_fstore() { ++ wide_istore(); ++} ++ ++void TemplateTable::wide_dstore() { ++ wide_lstore(); ++} ++ ++void TemplateTable::wide_astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index_wide(T2); ++ __ sd(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::iastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); // T2: array SSR: index ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T2); ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_4); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); //bound ++ ++ __ warn("iastore Unimplemented yet"); ++ __ gsswle(FSR, SSR, AT); ++ } else { ++ index_check(T2, SSR); // prefer index in SSR ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(T2, T2, SSR); ++ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg); ++ } ++} ++ ++ ++ ++// used register T2, T3 ++void TemplateTable::lastore() { ++ transition(ltos, vtos); ++ __ pop_i (T2); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T3); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T2, T3, T2); ++ __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); // base ++ ++ __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); //bound ++ ++ __ warn("lastore Unimplemented yet"); ++ __ gssdle(FSR, T2, AT); ++ } else { ++ index_check(T3, T2); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T3, T3, T2); ++ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg); ++ } ++} ++ ++// used register T2 ++void TemplateTable::fastore() { ++ transition(ftos, vtos); ++ __ pop_i(SSR); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T2); ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_4); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); //bound ++ ++ __ warn("fastore Unimplemented yet"); ++ __ gsswlec1(FSF, SSR, AT); ++ } else { ++ index_check(T2, SSR); ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(T2, T2, SSR); ++ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg); ++ } ++} ++ ++// used register T2, T3 ++void TemplateTable::dastore() { ++ transition(dtos, vtos); ++ __ pop_i (T2); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T3); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T2, T3, T2); ++ __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); // base ++ ++ __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); //bound ++ ++ __ warn("dastore Unimplemented yet"); ++ __ gssdlec1(FSF, T2, AT); ++ } else { ++ index_check(T3, T2); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T3, T3, T2); ++ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg); ++ } ++} ++ ++// used register : T2, T3, T8 ++// T2 : array ++// T3 : subklass ++// T8 : supklass ++void TemplateTable::aastore() { ++ Label is_null, ok_is_subtype, done; ++ transition(vtos, vtos); ++ // stack: ..., array, index, value ++ __ ld(FSR, at_tos()); // Value ++ __ lw(SSR, at_tos_p1()); // Index ++ __ ld(T2, at_tos_p2()); // Array ++ ++ // index_check(T2, SSR); ++ index_check_without_pop(T2, SSR); ++ // do array store check - check for NULL value first ++ __ beq(FSR, R0, is_null); ++ __ delayed()->nop(); ++ ++ // Move subklass into T3 ++ //add for compressedoops ++ __ load_klass(T3, FSR); ++ // Move superklass into T8 ++ //add for compressedoops ++ __ load_klass(T8, T2); ++ __ ld(T8, Address(T8, ObjArrayKlass::element_klass_offset())); ++ // Compress array+index*4+12 into a single register. T2 ++ __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8); ++ __ daddu(T2, T2, AT); ++ __ daddiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ ++ // Generate subtype check. ++ // Superklass in T8. Subklass in T3. ++ __ gen_subtype_check(T8, T3, ok_is_subtype); ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ArrayStoreException_entry); ++ __ delayed()->nop(); ++ // Come here on success ++ __ bind(ok_is_subtype); ++ do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY); ++ __ b(done); ++ __ delayed()->nop(); ++ ++ // Have a NULL in FSR, T2=array, SSR=index. Store NULL at ary[idx] ++ __ bind(is_null); ++ __ profile_null_seen(T9); ++ __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8); ++ __ daddu(T2, T2, AT); ++ do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY); ++ ++ __ bind(done); ++ __ daddiu(SP, SP, 3 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::bastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ if(UseBoundCheckInstruction) { ++ guarantee(false, "unimplemented yet!"); ++ __ pop_ptr(T2); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound ++ ++ __ warn("bastore Unimplemented yet"); ++ __ gssble(FSR, SSR, AT); ++ } else { ++ index_check(T2, SSR); ++ ++ // Need to check whether array is boolean or byte ++ // since both types share the bastore bytecode. ++ __ load_klass(T9, T2); ++ __ lw(T9, T9, in_bytes(Klass::layout_helper_offset())); ++ ++ int diffbit = Klass::layout_helper_boolean_diffbit(); ++ __ move(AT, diffbit); ++ ++ Label L_skip; ++ __ andr(AT, T9, AT); ++ __ beq(AT, R0, L_skip); ++ __ delayed()->nop(); ++ __ andi(FSR, FSR, 0x1); ++ __ bind(L_skip); ++ ++ __ daddu(SSR, T2, SSR); ++ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg); ++ } ++} ++ ++void TemplateTable::castore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T2); ++ __ dsll(SSR, SSR, Address::times_2); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_2); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR)); //bound ++ ++ __ warn("castore Unimplemented yet"); ++ __ gsshle(FSR, SSR, AT); ++ } else { ++ index_check(T2, SSR); ++ __ dsll(SSR, SSR, Address::times_2); ++ __ daddu(SSR, T2, SSR); ++ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg); ++ } ++} ++ ++void TemplateTable::sastore() { ++ castore(); ++} ++ ++void TemplateTable::istore(int n) { ++ transition(itos, vtos); ++ __ sw(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lstore(int n) { ++ transition(ltos, vtos); ++ __ sd(FSR, laddress(n)); ++} ++ ++void TemplateTable::fstore(int n) { ++ transition(ftos, vtos); ++ __ swc1(FSF, faddress(n)); ++} ++ ++void TemplateTable::dstore(int n) { ++ transition(dtos, vtos); ++ __ sdc1(FSF, laddress(n)); ++} ++ ++void TemplateTable::astore(int n) { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ __ sd(FSR, aaddress(n)); ++} ++ ++void TemplateTable::pop() { ++ transition(vtos, vtos); ++ __ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void TemplateTable::pop2() { ++ transition(vtos, vtos); ++ __ daddiu(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::dup() { ++ transition(vtos, vtos); ++ // stack: ..., a ++ __ load_ptr(0, FSR); ++ __ push_ptr(FSR); ++ // stack: ..., a, a ++} ++ ++// blows FSR ++void TemplateTable::dup_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(0, FSR); // load b ++ __ load_ptr(1, A5); // load a ++ __ store_ptr(1, FSR); // store b ++ __ store_ptr(0, A5); // store a ++ __ push_ptr(FSR); // push b ++ // stack: ..., b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, FSR); // load c ++ __ load_ptr(2, A5); // load a ++ __ store_ptr(2, FSR); // store c in a ++ __ push_ptr(FSR); // push c ++ // stack: ..., c, b, c, c ++ __ load_ptr(2, FSR); // load b ++ __ store_ptr(2, A5); // store a in b ++ // stack: ..., c, a, c, c ++ __ store_ptr(1, FSR); // store b in c ++ // stack: ..., c, a, b, c ++} ++ ++// blows FSR ++void TemplateTable::dup2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(1, FSR); // load a ++ __ push_ptr(FSR); // push a ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ // stack: ..., a, b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup2_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, T2); // load c ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ __ push_ptr(T2); // push c ++ // stack: ..., a, b, c, b, c ++ __ store_ptr(3, T2); // store c in b ++ // stack: ..., a, c, c, b, c ++ __ load_ptr(4, T2); // load a ++ __ store_ptr(2, T2); // store a in 2nd c ++ // stack: ..., a, c, a, b, c ++ __ store_ptr(4, FSR); // store b in a ++ // stack: ..., b, c, a, b, c ++ ++ // stack: ..., b, c, a, b, c ++} ++ ++// blows FSR, SSR ++void TemplateTable::dup2_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c, d ++ // stack: ..., a, b, c, d ++ __ load_ptr(0, T2); // load d ++ __ load_ptr(1, FSR); // load c ++ __ push_ptr(FSR); // push c ++ __ push_ptr(T2); // push d ++ // stack: ..., a, b, c, d, c, d ++ __ load_ptr(4, FSR); // load b ++ __ store_ptr(2, FSR); // store b in d ++ __ store_ptr(4, T2); // store d in b ++ // stack: ..., a, d, c, b, c, d ++ __ load_ptr(5, T2); // load a ++ __ load_ptr(3, FSR); // load c ++ __ store_ptr(3, T2); // store a in c ++ __ store_ptr(5, FSR); // store c in a ++ // stack: ..., c, d, a, b, c, d ++ ++ // stack: ..., c, d, a, b, c, d ++} ++ ++// blows FSR ++void TemplateTable::swap() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ ++ __ load_ptr(1, A5); // load a ++ __ load_ptr(0, FSR); // load b ++ __ store_ptr(0, A5); // store a in b ++ __ store_ptr(1, FSR); // store b in a ++ ++ // stack: ..., b, a ++} ++ ++void TemplateTable::iop2(Operation op) { ++ transition(itos, itos); ++ ++ __ pop_i(SSR); ++ switch (op) { ++ case add : __ addu32(FSR, SSR, FSR); break; ++ case sub : __ subu32(FSR, SSR, FSR); break; ++ case mul : __ mul(FSR, SSR, FSR); break; ++ case _and : __ andr(FSR, SSR, FSR); break; ++ case _or : __ orr(FSR, SSR, FSR); break; ++ case _xor : __ xorr(FSR, SSR, FSR); break; ++ case shl : __ sllv(FSR, SSR, FSR); break; ++ case shr : __ srav(FSR, SSR, FSR); break; ++ case ushr : __ srlv(FSR, SSR, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// the result stored in FSR, SSR, ++// used registers : T2, T3 ++void TemplateTable::lop2(Operation op) { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ ++ switch (op) { ++ case add : __ daddu(FSR, T2, FSR); break; ++ case sub : __ dsubu(FSR, T2, FSR); break; ++ case _and: __ andr(FSR, T2, FSR); break; ++ case _or : __ orr(FSR, T2, FSR); break; ++ case _xor: __ xorr(FSR, T2, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, ++// the result is 0x80000000 ++// the godson2 cpu do the same, so we need not handle this specially like x86 ++void TemplateTable::idiv() { ++ transition(itos, itos); ++ Label not_zero; ++ ++ __ bne(FSR, R0, not_zero); ++ __ delayed()->nop(); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ __ bind(not_zero); ++ ++ __ pop_i(SSR); ++ if (UseLEXT1) { ++ __ gsdiv(FSR, SSR, FSR); ++ } else { ++ __ div(SSR, FSR); ++ __ mflo(FSR); ++ } ++} ++ ++void TemplateTable::irem() { ++ transition(itos, itos); ++ Label not_zero; ++ __ pop_i(SSR); ++ __ div(SSR, FSR); ++ ++ __ bne(FSR, R0, not_zero); ++ __ delayed()->nop(); ++ //__ brk(7); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ ++ __ bind(not_zero); ++ __ mfhi(FSR); ++} ++ ++void TemplateTable::lmul() { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ if (UseLEXT1) { ++ __ gsdmult(FSR, T2, FSR); ++ } else { ++ __ dmult(T2, FSR); ++ __ mflo(FSR); ++ } ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::ldiv() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ __ delayed()->nop(); ++ ++ //__ brk(7); //generate FPE ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ ++ __ bind(normal); ++ __ pop_l(A2); ++ if (UseLEXT1) { ++ __ gsddiv(FSR, A2, FSR); ++ } else { ++ __ ddiv(A2, FSR); ++ __ mflo(FSR); ++ } ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::lrem() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ __ delayed()->nop(); ++ ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ ++ __ bind(normal); ++ __ pop_l (A2); ++ ++ if (UseLEXT1) { ++ __ gsdmod(FSR, A2, FSR); ++ } else { ++ __ ddiv(A2, FSR); ++ __ mfhi(FSR); ++ } ++} ++ ++// result in FSR ++// used registers : T0 ++void TemplateTable::lshl() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ dsllv(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lshr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ dsrav(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lushr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ dsrlv(FSR, T0, FSR); ++} ++ ++// result in FSF ++void TemplateTable::fop2(Operation op) { ++ transition(ftos, ftos); ++ switch (op) { ++ case add: ++ __ lwc1(FTF, at_sp()); ++ __ add_s(FSF, FTF, FSF); ++ break; ++ case sub: ++ __ lwc1(FTF, at_sp()); ++ __ sub_s(FSF, FTF, FSF); ++ break; ++ case mul: ++ __ lwc1(FTF, at_sp()); ++ __ mul_s(FSF, FTF, FSF); ++ break; ++ case div: ++ __ lwc1(FTF, at_sp()); ++ __ div_s(FSF, FTF, FSF); ++ break; ++ case rem: ++ __ mov_s(F13, FSF); ++ __ lwc1(F12, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ daddiu(SP, SP, 1 * wordSize); ++} ++ ++// result in SSF||FSF ++// i dont handle the strict flags ++void TemplateTable::dop2(Operation op) { ++ transition(dtos, dtos); ++ switch (op) { ++ case add: ++ __ ldc1(FTF, at_sp()); ++ __ add_d(FSF, FTF, FSF); ++ break; ++ case sub: ++ __ ldc1(FTF, at_sp()); ++ __ sub_d(FSF, FTF, FSF); ++ break; ++ case mul: ++ __ ldc1(FTF, at_sp()); ++ __ mul_d(FSF, FTF, FSF); ++ break; ++ case div: ++ __ ldc1(FTF, at_sp()); ++ __ div_d(FSF, FTF, FSF); ++ break; ++ case rem: ++ __ mov_d(F13, FSF); ++ __ ldc1(F12, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ daddiu(SP, SP, 2 * wordSize); ++} ++ ++void TemplateTable::ineg() { ++ transition(itos, itos); ++ __ subu32(FSR, R0, FSR); ++} ++ ++void TemplateTable::lneg() { ++ transition(ltos, ltos); ++ __ dsubu(FSR, R0, FSR); ++} ++ ++void TemplateTable::fneg() { ++ transition(ftos, ftos); ++ __ neg_s(FSF, FSF); ++} ++ ++void TemplateTable::dneg() { ++ transition(dtos, dtos); ++ __ neg_d(FSF, FSF); ++} ++ ++// used registers : T2 ++void TemplateTable::iinc() { ++ transition(vtos, vtos); ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++ __ lb(AT, at_bcp(2)); // get constant ++ __ daddu(FSR, FSR, AT); ++ __ sw(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::wide_iinc() { ++ transition(vtos, vtos); ++ locals_index_wide(T2); ++ __ get_2_byte_integer_at_bcp(FSR, AT, 4); ++ __ hswap(FSR); ++ __ lw(AT, T2, 0); ++ __ daddu(FSR, AT, FSR); ++ __ sw(FSR, T2, 0); ++} ++ ++void TemplateTable::convert() { ++ // Checking ++#ifdef ASSERT ++ { ++ TosState tos_in = ilgl; ++ TosState tos_out = ilgl; ++ switch (bytecode()) { ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_in = itos; break; ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_l2d: tos_in = ltos; break; ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_f2d: tos_in = ftos; break; ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_d2l: // fall through ++ case Bytecodes::_d2f: tos_in = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ switch (bytecode()) { ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_out = itos; break; ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_d2l: tos_out = ltos; break; ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_d2f: tos_out = ftos; break; ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_l2d: // fall through ++ case Bytecodes::_f2d: tos_out = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ transition(tos_in, tos_out); ++ } ++#endif // ASSERT ++ ++ // Conversion ++ switch (bytecode()) { ++ case Bytecodes::_i2l: ++ __ sll(FSR, FSR, 0); ++ break; ++ case Bytecodes::_i2f: ++ __ mtc1(FSR, FSF); ++ __ cvt_s_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2d: ++ __ mtc1(FSR, FSF); ++ __ cvt_d_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2b: ++ __ seb(FSR, FSR); ++ break; ++ case Bytecodes::_i2c: ++ __ andi(FSR, FSR, 0xFFFF); // truncate upper 56 bits ++ break; ++ case Bytecodes::_i2s: ++ __ seh(FSR, FSR); ++ break; ++ case Bytecodes::_l2i: ++ __ sll(FSR, FSR, 0); ++ break; ++ case Bytecodes::_l2f: ++ __ dmtc1(FSR, FSF); ++ __ cvt_s_l(FSF, FSF); ++ break; ++ case Bytecodes::_l2d: ++ __ dmtc1(FSR, FSF); ++ __ cvt_d_l(FSF, FSF); ++ break; ++ case Bytecodes::_f2i: ++ { ++ Label L; ++ ++ __ trunc_w_s(F12, FSF); ++ __ move(AT, 0x7fffffff); ++ __ mfc1(FSR, F12); ++ __ c_un_s(FSF, FSF); //NaN? ++ __ movt(FSR, R0); ++ ++ __ bne(AT, FSR, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, FSF); ++ __ andr(AT, AT, T9); ++ ++ __ movn(FSR, T9, AT); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_f2l: ++ { ++ Label L; ++ ++ __ trunc_l_s(F12, FSF); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(FSR, F12); ++ __ c_un_s(FSF, FSF); //NaN? ++ __ movt(FSR, R0); ++ ++ __ bne(AT, FSR, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, FSF); ++ __ andr(AT, AT, T9); ++ ++ __ dsll32(T9, T9, 0); ++ __ movn(FSR, T9, AT); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_f2d: ++ __ cvt_d_s(FSF, FSF); ++ break; ++ case Bytecodes::_d2i: ++ { ++ Label L; ++ ++ __ trunc_w_d(F12, FSF); ++ __ move(AT, 0x7fffffff); ++ __ mfc1(FSR, F12); ++ ++ __ bne(FSR, AT, L); ++ __ delayed()->mtc1(R0, F12); ++ ++ __ cvt_d_w(F12, F12); ++ __ c_ult_d(FSF, F12); ++ __ bc1f(L); ++ __ delayed()->addiu(T9, R0, -1); ++ ++ __ c_un_d(FSF, FSF); //NaN? ++ __ subu32(FSR, T9, AT); ++ __ movt(FSR, R0); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_d2l: ++ { ++ Label L; ++ ++ __ trunc_l_d(F12, FSF); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(FSR, F12); ++ ++ __ bne(FSR, AT, L); ++ __ delayed()->mtc1(R0, F12); ++ ++ __ cvt_d_w(F12, F12); ++ __ c_ult_d(FSF, F12); ++ __ bc1f(L); ++ __ delayed()->daddiu(T9, R0, -1); ++ ++ __ c_un_d(FSF, FSF); //NaN? ++ __ subu(FSR, T9, AT); ++ __ movt(FSR, R0); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_d2f: ++ __ cvt_s_d(FSF, FSF); ++ break; ++ default : ++ ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::lcmp() { ++ transition(ltos, itos); ++ ++ __ pop(T0); ++ __ pop(R0); ++ ++ __ slt(AT, T0, FSR); ++ __ slt(FSR, FSR, T0); ++ __ subu(FSR, FSR, AT); ++} ++ ++void TemplateTable::float_cmp(bool is_float, int unordered_result) { ++ __ ori(FSR, R0, 1); ++ __ ori(AT, R0, 1); ++ ++ if (is_float) { ++ __ lwc1(FTF, at_sp()); ++ __ daddiu(SP, SP, 1 * wordSize); ++ if (unordered_result < 0) { ++ __ c_olt_s(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_ult_s(FTF, FSF); ++ } else { ++ __ c_ult_s(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_olt_s(FTF, FSF); ++ } ++ } else { ++ __ ldc1(FTF, at_sp()); ++ __ daddiu(SP, SP, 2 * wordSize); ++ if (unordered_result < 0) { ++ __ c_olt_d(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_ult_d(FTF, FSF); ++ } else { ++ __ c_ult_d(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_olt_d(FTF, FSF); ++ } ++ } ++ ++ __ movf(AT, R0); ++ __ subu(FSR, FSR, AT); ++} ++ ++ ++// used registers : T3, A7, Rnext ++// FSR : return bci, this is defined by the vm specification ++// T2 : MDO taken count ++// T3 : method ++// A7 : offset ++// Rnext : next bytecode, this is required by dispatch_base ++void TemplateTable::branch(bool is_jsr, bool is_wide) { ++ __ get_method(T3); ++ __ profile_taken_branch(A7, T2); // only C2 meaningful ++ ++ const ByteSize be_offset = MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset(); ++ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset(); ++ ++ // Load up T4 with the branch displacement ++ if (!is_wide) { ++ __ lb(A7, BCP, 1); ++ __ lbu(AT, BCP, 2); ++ __ dsll(A7, A7, 8); ++ __ orr(A7, A7, AT); ++ } else { ++ __ get_4_byte_integer_at_bcp(A7, AT, 1); ++ __ swap(A7); ++ } ++ ++ // Handle all the JSR stuff here, then exit. ++ // It's much shorter and cleaner than intermingling with the non-JSR ++ // normal-branch stuff occuring below. ++ if (is_jsr) { ++ // Pre-load the next target bytecode into Rnext ++ __ daddu(AT, BCP, A7); ++ __ lbu(Rnext, AT, 0); ++ ++ // compute return address as bci in FSR ++ __ daddiu(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset())); ++ __ ld(AT, T3, in_bytes(Method::const_offset())); ++ __ dsubu(FSR, FSR, AT); ++ // Adjust the bcp in BCP by the displacement in A7 ++ __ daddu(BCP, BCP, A7); ++ // jsr returns atos that is not an oop ++ // Push return address ++ __ push_i(FSR); ++ // jsr returns vtos ++ __ dispatch_only_noverify(vtos); ++ ++ return; ++ } ++ ++ // Normal (non-jsr) branch handling ++ ++ // Adjust the bcp in S0 by the displacement in T4 ++ __ daddu(BCP, BCP, A7); ++ ++ assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); ++ Label backedge_counter_overflow; ++ Label profile_method; ++ Label dispatch; ++ if (UseLoopCounter) { ++ // increment backedge counter for backward branches ++ // T3: method ++ // T4: target offset ++ // BCP: target bcp ++ // LVP: locals pointer ++ __ bgtz(A7, dispatch); // check if forward or backward branch ++ __ delayed()->nop(); ++ ++ // check if MethodCounters exists ++ Label has_counters; ++ __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ bne(AT, R0, has_counters); ++ __ delayed()->nop(); ++ __ push(T3); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), ++ T3); ++ __ pop(T3); ++ __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ beq(AT, R0, dispatch); ++ __ delayed()->nop(); ++ __ bind(has_counters); ++ ++ if (TieredCompilation) { ++ Label no_mdo; ++ int increment = InvocationCounter::count_increment; ++ int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(T0, Address(T3, in_bytes(Method::method_data_offset()))); ++ __ beq(T0, R0, no_mdo); ++ __ delayed()->nop(); ++ // Increment the MDO backedge counter ++ const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, ++ T1, false, Assembler::zero, &backedge_counter_overflow); ++ __ beq(R0, R0, dispatch); ++ __ delayed()->nop(); ++ } ++ __ bind(no_mdo); ++ // Increment backedge counter in MethodCounters* ++ __ ld(T0, Address(T3, Method::method_counters_offset())); ++ __ increment_mask_and_jump(Address(T0, be_offset), increment, mask, ++ T1, false, Assembler::zero, &backedge_counter_overflow); ++ if (!UseOnStackReplacement) { ++ __ bind(backedge_counter_overflow); ++ } ++ } else { ++ // increment back edge counter ++ __ ld(T1, T3, in_bytes(Method::method_counters_offset())); ++ __ lw(T0, T1, in_bytes(be_offset)); ++ __ increment(T0, InvocationCounter::count_increment); ++ __ sw(T0, T1, in_bytes(be_offset)); ++ ++ // load invocation counter ++ __ lw(T1, T1, in_bytes(inv_offset)); ++ // buffer bit added, mask no needed ++ ++ // dadd backedge counter & invocation counter ++ __ daddu(T1, T1, T0); ++ ++ if (ProfileInterpreter) { ++ // Test to see if we should create a method data oop ++ // T1 : backedge counter & invocation counter ++ if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { ++ __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T1, AT); ++ } ++ ++ __ bne(AT, R0, dispatch); ++ __ delayed()->nop(); ++ ++ // if no method data exists, go to profile method ++ __ test_method_data_pointer(T1, profile_method); ++ ++ if (UseOnStackReplacement) { ++ if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) { ++ __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T2, AT); ++ } ++ ++ __ bne(AT, R0, dispatch); ++ __ delayed()->nop(); ++ ++ // When ProfileInterpreter is on, the backedge_count comes ++ // from the methodDataOop, which value does not get reset on ++ // the call to frequency_counter_overflow(). ++ // To avoid excessive calls to the overflow routine while ++ // the method is being compiled, dadd a second test to make ++ // sure the overflow function is called only once every ++ // overflow_frequency. ++ const int overflow_frequency = 1024; ++ __ andi(AT, T2, overflow_frequency-1); ++ __ beq(AT, R0, backedge_counter_overflow); ++ __ delayed()->nop(); ++ } ++ } else { ++ if (UseOnStackReplacement) { ++ // check for overflow against AT, which is the sum of the counters ++ __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T1, AT); ++ __ beq(AT, R0, backedge_counter_overflow); ++ __ delayed()->nop(); ++ } ++ } ++ } ++ __ bind(dispatch); ++ } ++ ++ // Pre-load the next target bytecode into Rnext ++ __ lbu(Rnext, BCP, 0); ++ ++ // continue with the bytecode @ target ++ // FSR: return bci for jsr's, unused otherwise ++ // Rnext: target bytecode ++ // BCP: target bcp ++ __ dispatch_only(vtos, true); ++ ++ if (UseLoopCounter) { ++ if (ProfileInterpreter) { ++ // Out-of-line code to allocate method data oop. ++ __ bind(profile_method); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ __ b(dispatch); ++ __ delayed()->nop(); ++ } ++ ++ if (UseOnStackReplacement) { ++ // invocation counter overflow ++ __ bind(backedge_counter_overflow); ++ __ subu(A7, BCP, A7); // branch bcp ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), A7); ++ ++ // V0: osr nmethod (osr ok) or NULL (osr not possible) ++ // V1: osr adapter frame return address ++ // LVP: locals pointer ++ // BCP: bcp ++ __ beq(V0, R0, dispatch); ++ __ delayed()->nop(); ++ // nmethod may have been invalidated (VM may block upon call_VM return) ++ __ lb(T3, V0, nmethod::state_offset()); ++ __ move(AT, nmethod::in_use); ++ __ bne(AT, T3, dispatch); ++ __ delayed()->nop(); ++ ++ // We have the address of an on stack replacement routine in rax. ++ // In preparation of invoking it, first we must migrate the locals ++ // and monitors from off the interpreter frame on the stack. ++ // Ensure to save the osr nmethod over the migration call, ++ // it will be preserved in Rnext. ++ __ move(Rnext, V0); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); ++ ++ // V0 is OSR buffer, move it to expected parameter location ++ // refer to osrBufferPointer in c1_LIRAssembler_mips.cpp ++ __ move(T0, V0); ++ ++ // pop the interpreter frame ++ __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ //FIXME, shall we keep the return address on the stack? ++ __ leave(); // remove frame anchor ++ __ move(LVP, RA); ++ __ move(SP, A7); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP , SP , AT); ++ ++ // push the (possibly adjusted) return address ++ //refer to osr_entry in c1_LIRAssembler_mips.cpp ++ __ ld(AT, Rnext, nmethod::osr_entry_point_offset()); ++ __ jr(AT); ++ __ delayed()->nop(); ++ } ++ } ++} ++ ++ ++void TemplateTable::if_0cmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ case less: ++ __ bgez(FSR, not_taken); ++ break; ++ case less_equal: ++ __ bgtz(FSR, not_taken); ++ break; ++ case greater: ++ __ blez(FSR, not_taken); ++ break; ++ case greater_equal: ++ __ bltz(FSR, not_taken); ++ break; ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_icmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ ++ __ pop_i(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ case less: ++ __ slt(AT, SSR, FSR); ++ __ beq(AT, R0, not_taken); ++ break; ++ case less_equal: ++ __ slt(AT, FSR, SSR); ++ __ bne(AT, R0, not_taken); ++ break; ++ case greater: ++ __ slt(AT, FSR, SSR); ++ __ beq(AT, R0, not_taken); ++ break; ++ case greater_equal: ++ __ slt(AT, SSR, FSR); ++ __ bne(AT, R0, not_taken); ++ break; ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_nullcmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++ ++void TemplateTable::if_acmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ // __ lw(SSR, SP, 0); ++ __ pop_ptr(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::ret() { ++ transition(vtos, vtos); ++ ++ locals_index(T2); ++ __ ld(T2, T2, 0); ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld(BCP, T1, in_bytes(Method::const_offset())); ++ __ daddu(BCP, BCP, T2); ++ __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos, 0, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::wide_ret() { ++ transition(vtos, vtos); ++ ++ locals_index_wide(T2); ++ __ ld(T2, T2, 0); // get return bci, compute return bcp ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld(BCP, T1, in_bytes(Method::const_offset())); ++ __ daddu(BCP, BCP, T2); ++ __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos, 0, true); ++} ++ ++// used register T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : low ++// A7 : high ++// Rnext : dest bytecode, required by dispatch_base ++void TemplateTable::tableswitch() { ++ Label default_case, continue_execution; ++ transition(itos, vtos); ++ ++ // align BCP ++ __ daddiu(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // load lo & hi ++ __ lw(T3, T2, 1 * BytesPerInt); ++ __ swap(T3); ++ __ lw(A7, T2, 2 * BytesPerInt); ++ __ swap(A7); ++ ++ // check against lo & hi ++ __ slt(AT, FSR, T3); ++ __ bne(AT, R0, default_case); ++ __ delayed()->nop(); ++ ++ __ slt(AT, A7, FSR); ++ __ bne(AT, R0, default_case); ++ __ delayed()->nop(); ++ ++ // lookup dispatch offset, in A7 big endian ++ __ dsubu(FSR, FSR, T3); ++ __ dsll(AT, FSR, Address::times_4); ++ __ daddu(AT, T2, AT); ++ __ lw(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(FSR, T9, T3); ++ ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ daddu(BCP, BCP, A7); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++ ++ // handle default ++ __ bind(default_case); ++ __ profile_switch_default(FSR); ++ __ lw(A7, T2, 0); ++ __ b(continue_execution); ++ __ delayed()->nop(); ++} ++ ++void TemplateTable::lookupswitch() { ++ transition(itos, itos); ++ __ stop("lookupswitch bytecode should have been rewritten"); ++} ++ ++// used registers : T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : pair index ++// A7 : offset ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_linearswitch() { ++ transition(itos, vtos); ++ Label loop_entry, loop, found, continue_execution; ++ ++ // swap FSR so we can avoid swapping the table entries ++ __ swap(FSR); ++ ++ // align BCP ++ __ daddiu(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // set counter ++ __ lw(T3, T2, BytesPerInt); ++ __ swap(T3); ++ __ b(loop_entry); ++ __ delayed()->nop(); ++ ++ // table search ++ __ bind(loop); ++ // get the entry value ++ __ dsll(AT, T3, Address::times_8); ++ __ daddu(AT, T2, AT); ++ __ lw(AT, AT, 2 * BytesPerInt); ++ ++ // found? ++ __ beq(FSR, AT, found); ++ __ delayed()->nop(); ++ ++ __ bind(loop_entry); ++ __ bgtz(T3, loop); ++ __ delayed()->daddiu(T3, T3, -1); ++ ++ // default case ++ __ profile_switch_default(FSR); ++ __ lw(A7, T2, 0); ++ __ b(continue_execution); ++ __ delayed()->nop(); ++ ++ // entry found -> get offset ++ __ bind(found); ++ __ dsll(AT, T3, Address::times_8); ++ __ daddu(AT, T2, AT); ++ __ lw(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(T3, FSR, T2); ++ ++ // continue execution ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ daddu(BCP, BCP, A7); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++} ++ ++// used registers : T0, T1, T2, T3, A7, Rnext ++// T2 : pairs address(array) ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_binaryswitch() { ++ transition(itos, vtos); ++ // Implementation using the following core algorithm: ++ // ++ // int binary_search(int key, LookupswitchPair* array, int n) { ++ // // Binary search according to "Methodik des Programmierens" by ++ // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. ++ // int i = 0; ++ // int j = n; ++ // while (i+1 < j) { ++ // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) ++ // // with Q: for all i: 0 <= i < n: key < a[i] ++ // // where a stands for the array and assuming that the (inexisting) ++ // // element a[n] is infinitely big. ++ // int h = (i + j) >> 1; ++ // // i < h < j ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // } ++ // // R: a[i] <= key < a[i+1] or Q ++ // // (i.e., if key is within array, i is the correct index) ++ // return i; ++ // } ++ ++ // register allocation ++ const Register array = T2; ++ const Register i = T3, j = A7; ++ const Register h = T1; ++ const Register temp = T0; ++ const Register key = FSR; ++ ++ // setup array ++ __ daddiu(array, BCP, 3*BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(array, array, AT); ++ ++ // initialize i & j ++ __ move(i, R0); ++ __ lw(j, array, - 1 * BytesPerInt); ++ // Convert j into native byteordering ++ __ swap(j); ++ ++ // and start ++ Label entry; ++ __ b(entry); ++ __ delayed()->nop(); ++ ++ // binary search loop ++ { ++ Label loop; ++ __ bind(loop); ++ // int h = (i + j) >> 1; ++ __ daddu(h, i, j); ++ __ dsrl(h, h, 1); ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // Convert array[h].match to native byte-ordering before compare ++ __ dsll(AT, h, Address::times_8); ++ __ daddu(AT, array, AT); ++ __ lw(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ ++ __ slt(AT, key, temp); ++ __ movz(i, h, AT); ++ __ movn(j, h, AT); ++ ++ // while (i+1 < j) ++ __ bind(entry); ++ __ daddiu(h, i, 1); ++ __ slt(AT, h, j); ++ __ bne(AT, R0, loop); ++ __ delayed()->nop(); ++ } ++ ++ // end of binary search, result index is i (must check again!) ++ Label default_case; ++ // Convert array[i].match to native byte-ordering before compare ++ __ dsll(AT, i, Address::times_8); ++ __ daddu(AT, array, AT); ++ __ lw(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ __ bne(key, temp, default_case); ++ __ delayed()->nop(); ++ ++ // entry found -> j = offset ++ __ dsll(AT, i, Address::times_8); ++ __ daddu(AT, array, AT); ++ __ lw(j, AT, 1 * BytesPerInt); ++ __ profile_switch_case(i, key, array); ++ __ swap(j); ++ ++ __ daddu(BCP, BCP, j); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++ ++ // default case -> j = default offset ++ __ bind(default_case); ++ __ profile_switch_default(i); ++ __ lw(j, array, - 2 * BytesPerInt); ++ __ swap(j); ++ __ daddu(BCP, BCP, j); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++} ++ ++void TemplateTable::_return(TosState state) { ++ transition(state, state); ++ assert(_desc->calls_vm(), ++ "inconsistent calls_vm information"); // call in remove_activation ++ ++ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { ++ assert(state == vtos, "only valid state"); ++ __ ld(T1, aaddress(0)); ++ __ load_klass(LVP, T1); ++ __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset())); ++ __ move(AT, JVM_ACC_HAS_FINALIZER); ++ __ andr(AT, AT, LVP); ++ Label skip_register_finalizer; ++ __ beq(AT, R0, skip_register_finalizer); ++ __ delayed()->nop(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::register_finalizer), T1); ++ __ bind(skip_register_finalizer); ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) { ++ Label no_safepoint; ++ NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll")); ++ __ lb(AT, thread, in_bytes(Thread::polling_page_offset())); ++ __ andi(AT, AT, SafepointMechanism::poll_bit()); ++ __ beq(AT, R0, no_safepoint); ++ __ delayed()->nop(); ++ __ push(state); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::at_safepoint)); ++ __ pop(state); ++ __ bind(no_safepoint); ++ } ++ ++ // Narrow result if state is itos but result type is smaller. ++ // Need to narrow in the return bytecode rather than in generate_return_entry ++ // since compiled code callers expect the result to already be narrowed. ++ if (state == itos) { ++ __ narrow(FSR); ++ } ++ ++ __ remove_activation(state, T9); ++ __ sync(); ++ ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++// ---------------------------------------------------------------------------- ++// Volatile variables demand their effects be made known to all CPU's ++// in order. Store buffers on most chips allow reads & writes to ++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode ++// without some kind of memory barrier (i.e., it's not sufficient that ++// the interpreter does not reorder volatile references, the hardware ++// also must not reorder them). ++// ++// According to the new Java Memory Model (JMM): ++// (1) All volatiles are serialized wrt to each other. ALSO reads & ++// writes act as aquire & release, so: ++// (2) A read cannot let unrelated NON-volatile memory refs that ++// happen after the read float up to before the read. It's OK for ++// non-volatile memory refs that happen before the volatile read to ++// float down below it. ++// (3) Similar a volatile write cannot let unrelated NON-volatile ++// memory refs that happen BEFORE the write float down to after the ++// write. It's OK for non-volatile memory refs that happen after the ++// volatile write to float up before it. ++// ++// We only put in barriers around volatile refs (they are expensive), ++// not _between_ memory refs (that would require us to track the ++// flavor of the previous memory refs). Requirements (2) and (3) ++// require some barriers before volatile stores and after volatile ++// loads. These nearly cover requirement (1) but miss the ++// volatile-store-volatile-load case. This final case is placed after ++// volatile-stores although it could just as well go before ++// volatile-loads. ++void TemplateTable::volatile_barrier() { ++ if(os::is_MP()) __ sync(); ++} ++ ++// we dont shift left 2 bits in get_cache_and_index_at_bcp ++// for we always need shift the index we use it. the ConstantPoolCacheEntry ++// is 16-byte long, index is the index in ++// ConstantPoolCache, so cache + base_offset() + index * 16 is ++// the corresponding ConstantPoolCacheEntry ++// used registers : T2 ++// NOTE : the returned index need also shift left 4 to get the address! ++void TemplateTable::resolve_cache_and_index(int byte_no, ++ Register Rcache, ++ Register index, ++ size_t index_size) { ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ const Register temp = A1; ++ assert_different_registers(Rcache, index); ++ ++ Label resolved; ++ ++ Bytecodes::Code code = bytecode(); ++ switch (code) { ++ case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; ++ case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; ++ default: break; ++ } ++ ++ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); ++ // is resolved? ++ int i = (int)code; ++ __ addiu(temp, temp, -i); ++ __ beq(temp, R0, resolved); ++ __ delayed()->nop(); ++ ++ // resolve first time through ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); ++ ++ __ move(temp, i); ++ __ call_VM(NOREG, entry, temp); ++ ++ // Update registers with resolved info ++ __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); ++ __ bind(resolved); ++} ++ ++// The Rcache and index registers must be set before call ++void TemplateTable::load_field_cp_cache_entry(Register obj, ++ Register cache, ++ Register index, ++ Register off, ++ Register flags, ++ bool is_static = false) { ++ assert_different_registers(cache, index, flags, off); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ // Field offset ++ __ dsll(AT, index, Address::times_ptr); ++ __ daddu(AT, cache, AT); ++ __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())); ++ // Flags ++ __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())); ++ ++ // klass overwrite register ++ if (is_static) { ++ __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ __ ld(obj, Address(obj, mirror_offset)); ++ ++ __ resolve_oop_handle(obj, T9); ++ } ++} ++ ++// get the method, itable_index and flags of the current invoke ++void TemplateTable::load_invoke_cp_cache_entry(int byte_no, ++ Register method, ++ Register itable_index, ++ Register flags, ++ bool is_invokevirtual, ++ bool is_invokevfinal, /*unused*/ ++ bool is_invokedynamic) { ++ // setup registers ++ const Register cache = T3; ++ const Register index = T1; ++ assert_different_registers(method, flags); ++ assert_different_registers(method, cache, index); ++ assert_different_registers(itable_index, flags); ++ assert_different_registers(itable_index, cache, index); ++ assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant"); ++ // determine constant pool cache field offsets ++ const int method_offset = in_bytes( ++ ConstantPoolCache::base_offset() + ++ ((byte_no == f2_byte) ++ ? ConstantPoolCacheEntry::f2_offset() ++ : ConstantPoolCacheEntry::f1_offset())); ++ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()); ++ // access constant pool cache fields ++ const int index_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::f2_offset()); ++ ++ size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2)); ++ resolve_cache_and_index(byte_no, cache, index, index_size); ++ ++ //assert(wordSize == 8, "adjust code below"); ++ // note we shift 4 not 2, for we get is the true inde ++ // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version ++ __ dsll(AT, index, Address::times_ptr); ++ __ daddu(AT, cache, AT); ++ __ ld(method, AT, method_offset); ++ ++ if (itable_index != NOREG) { ++ __ ld(itable_index, AT, index_offset); ++ } ++ __ ld(flags, AT, flags_offset); ++} ++ ++// The registers cache and index expected to be set before call. ++// Correct values of the cache and index registers are preserved. ++void TemplateTable::jvmti_post_field_access(Register cache, Register index, ++ bool is_static, bool has_tos) { ++ // do the JVMTI work here to avoid disturbing the register state below ++ // We use c_rarg registers here because we want to use the register used in ++ // the call to the VM ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we ++ // take the time to call into the VM. ++ Label L1; ++ // kill FSR ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ assert_different_registers(cache, index, AT); ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ lw(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ __ delayed()->nop(); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp3, 1); ++ ++ // cache entry pointer ++ __ daddiu(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset())); ++ __ shl(tmp3, LogBytesPerWord); ++ __ daddu(tmp2, tmp2, tmp3); ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ __ ld(tmp1, SP, 0); ++ __ verify_oop(tmp1); ++ } ++ // tmp1: object pointer or NULL ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_access), ++ tmp1, tmp2, tmp3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++void TemplateTable::pop_and_check_object(Register r) { ++ __ pop_ptr(r); ++ __ null_check(r); // for field access must check obj. ++ __ verify_oop(r); ++} ++ ++// used registers : T1, T2, T3, T1 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T1 : field address ++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the ++// following mapping to the TosState states: ++// btos: 0 ++// ctos: 1 ++// stos: 2 ++// itos: 3 ++// ltos: 4 ++// ftos: 5 ++// dtos: 6 ++// atos: 7 ++// vtos: 8 ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_access(cache, index, is_static, false); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ { ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ if (!is_static) pop_and_check_object(obj); ++ __ daddu(index, obj, off); ++ ++ const Address field(index, 0); ++ ++ Label Done, notByte, notBool, notInt, notShort, notChar, ++ notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ __ delayed()->nop(); ++ ++ // btos ++ __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(btos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ ++ __ bind(notByte); ++ __ move(AT, ztos); ++ __ bne(flags, AT, notBool); ++ __ delayed()->nop(); ++ ++ // ztos ++ __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(ztos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ ++ __ bind(notBool); ++ __ move(AT, itos); ++ __ bne(flags, AT, notInt); ++ __ delayed()->nop(); ++ ++ // itos ++ __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(itos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_igetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notInt); ++ __ move(AT, atos); ++ __ bne(flags, AT, notObj); ++ __ delayed()->nop(); ++ ++ // atos ++ //add for compressedoops ++ do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP); ++ __ push(atos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_agetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notObj); ++ __ move(AT, ctos); ++ __ bne(flags, AT, notChar); ++ __ delayed()->nop(); ++ ++ // ctos ++ __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(ctos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notChar); ++ __ move(AT, stos); ++ __ bne(flags, AT, notShort); ++ __ delayed()->nop(); ++ ++ // stos ++ __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(stos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notShort); ++ __ move(AT, ltos); ++ __ bne(flags, AT, notLong); ++ __ delayed()->nop(); ++ ++ // FIXME : the load/store should be atomic, we have no simple method to do this in mips32 ++ // ltos ++ __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg); ++ __ push(ltos); ++ ++ // Don't rewrite to _fast_lgetfield for potential volatile case. ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notLong); ++ __ move(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ __ delayed()->nop(); ++ ++ // ftos ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); ++ __ push(ftos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notFloat); ++ __ move(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++ __ delayed()->nop(); ++#endif ++ ++ // dtos ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg); ++ __ push(dtos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ __ delayed()->nop(); ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++ ++void TemplateTable::getfield(int byte_no) { ++ getfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_getfield(int byte_no) { ++ getfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::getstatic(int byte_no) { ++ getfield_or_static(byte_no, true); ++} ++ ++// The registers cache and index expected to be set before call. ++// The function may destroy various registers, just not the cache and index registers. ++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { ++ transition(vtos, vtos); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L1; ++ //kill AT, T1, T2, T3, T9 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T9; ++ assert_different_registers(cache, index, tmp4); ++ ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ lw(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ __ delayed()->nop(); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp4, 1); ++ ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ // Life is harder. The stack holds the value on top, followed by ++ // the object. We don't know the size of the value, though; it ++ // could be one or two words depending on its type. As a result, ++ // we must find the type to determine where the object is. ++ Label two_word, valsize_known; ++ __ dsll(AT, tmp4, Address::times_8); ++ __ daddu(AT, tmp2, AT); ++ __ ld(tmp3, AT, in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::flags_offset())); ++ __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift); ++ ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ __ move(tmp1, SP); ++ __ move(AT, ltos); ++ __ beq(tmp3, AT, two_word); ++ __ delayed()->nop(); ++ __ move(AT, dtos); ++ __ beq(tmp3, AT, two_word); ++ __ delayed()->nop(); ++ __ b(valsize_known); ++ __ delayed()->daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) ); ++ ++ __ bind(two_word); ++ __ daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2)); ++ ++ __ bind(valsize_known); ++ // setup object pointer ++ __ ld(tmp1, tmp1, 0*wordSize); ++ } ++ // cache entry pointer ++ __ daddiu(tmp2, tmp2, in_bytes(cp_base_offset)); ++ __ shl(tmp4, LogBytesPerWord); ++ __ daddu(tmp2, tmp2, tmp4); ++ // object (tos) ++ __ move(tmp3, SP); ++ // tmp1: object pointer set up above (NULL if static) ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++// used registers : T0, T1, T2, T3, T8 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T8 : volatile bit ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ const Register bc = T3; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_mod(cache, index, is_static); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ Label Done; ++ { ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ ++ Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ ++ // btos ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ __ delayed()->nop(); ++ ++ __ pop(btos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_BYTE, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ztos ++ __ bind(notByte); ++ __ move(AT, ztos); ++ __ bne(flags, AT, notBool); ++ __ delayed()->nop(); ++ ++ __ pop(ztos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ andi(FSR, FSR, 0x1); ++ __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // itos ++ __ bind(notBool); ++ __ move(AT, itos); ++ __ bne(flags, AT, notInt); ++ __ delayed()->nop(); ++ ++ __ pop(itos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_INT, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // atos ++ __ bind(notInt); ++ __ move(AT, atos); ++ __ bne(flags, AT, notObj); ++ __ delayed()->nop(); ++ ++ __ pop(atos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ ++ do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ctos ++ __ bind(notObj); ++ __ move(AT, ctos); ++ __ bne(flags, AT, notChar); ++ __ delayed()->nop(); ++ ++ __ pop(ctos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_CHAR, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // stos ++ __ bind(notChar); ++ __ move(AT, stos); ++ __ bne(flags, AT, notShort); ++ __ delayed()->nop(); ++ ++ __ pop(stos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_SHORT, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ltos ++ __ bind(notShort); ++ __ move(AT, ltos); ++ __ bne(flags, AT, notLong); ++ __ delayed()->nop(); ++ ++ __ pop(ltos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_LONG, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ftos ++ __ bind(notLong); ++ __ move(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ __ delayed()->nop(); ++ ++ __ pop(ftos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_FLOAT, IN_HEAP, Address(T9), noreg, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ ++ // dtos ++ __ bind(notFloat); ++ __ move(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++ __ delayed()->nop(); ++#endif ++ ++ __ pop(dtos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_DOUBLE, IN_HEAP, Address(T9), noreg, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++void TemplateTable::putfield(int byte_no) { ++ putfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_putfield(int byte_no) { ++ putfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::putstatic(int byte_no) { ++ putfield_or_static(byte_no, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : cp_entry ++// T2 : obj ++// T3 : value pointer ++void TemplateTable::jvmti_post_fast_field_mod() { ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L2; ++ //kill AT, T1, T2, T3, T9 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T9; ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ lw(tmp3, AT, 0); ++ __ beq(tmp3, R0, L2); ++ __ delayed()->nop(); ++ __ pop_ptr(tmp1); ++ __ verify_oop(tmp1); ++ __ push_ptr(tmp1); ++ switch (bytecode()) { // load values into the jvalue object ++ case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ push_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ push_d(FSF); break; ++ case Bytecodes::_fast_fputfield: __ push_f(); break; ++ case Bytecodes::_fast_lputfield: __ push_l(FSR); break; ++ default: ShouldNotReachHere(); ++ } ++ __ move(tmp3, SP); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1); ++ __ verify_oop(tmp1); ++ // tmp1: object pointer copied above ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ ++ switch (bytecode()) { // restore tos values ++ case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ pop_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ pop_d(); break; ++ case Bytecodes::_fast_fputfield: __ pop_f(); break; ++ case Bytecodes::_fast_lputfield: __ pop_l(FSR); break; ++ default: break; ++ } ++ __ bind(L2); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T2 : index & off & field address ++// T3 : cache & obj ++// T1 : flags ++void TemplateTable::fast_storefield(TosState state) { ++ transition(state, vtos); ++ ++ const Register scratch = T8; ++ ++ ByteSize base = ConstantPoolCache::base_offset(); ++ ++ jvmti_post_fast_field_mod(); ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ sync(); ++ ++ // test for volatile with T1 ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset())); ++ ++ // replace index with field offset from cache entry ++ __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset())); ++ ++ Label Done; ++ { ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, T1); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ // Get object from stack ++ pop_and_check_object(T3); ++ ++ if (bytecode() != Bytecodes::_fast_aputfield) { ++ // field address ++ __ daddu(T2, T3, T2); ++ } ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_zputfield: ++ __ andi(FSR, FSR, 0x1); // boolean is true if LSB is 1 ++ __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_bputfield: ++ __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_sputfield: ++ __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_cputfield: ++ __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_iputfield: ++ __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_lputfield: ++ __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_fputfield: ++ __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_dputfield: ++ __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_aputfield: ++ do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T3 : cp_entry & cache ++// T2 : index & offset ++void TemplateTable::fast_accessfield(TosState state) { ++ transition(atos, state); ++ ++ const Register scratch = T8; ++ ++ // do the JVMTI work here to avoid disturbing the register state below ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we take ++ // the time to call into the VM. ++ Label L1; ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ lw(T3, AT, 0); ++ __ beq(T3, R0, L1); ++ __ delayed()->nop(); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(T3, T1, 1); ++ __ move(TSR, FSR); ++ __ verify_oop(FSR); ++ // FSR: object pointer copied above ++ // T3: cache entry pointer ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), ++ FSR, T3); ++ __ move(FSR, TSR); ++ __ bind(L1); ++ } ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ sync(); ++ ++ // replace index with field offset from cache entry ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ // FSR: object ++ __ verify_oop(FSR); ++ __ null_check(FSR); ++ // field addresses ++ __ daddu(FSR, FSR, T2); ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_bgetfield: ++ __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_sgetfield: ++ __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_cgetfield: ++ __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_igetfield: ++ __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_lgetfield: ++ __ stop("should not be rewritten"); ++ break; ++ case Bytecodes::_fast_fgetfield: ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_dgetfield: ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_agetfield: ++ //add for compressedoops ++ do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP); ++ __ verify_oop(FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0 ++// used registers : T1, T2, T3, T1 ++// T1 : obj & field address ++// T2 : off ++// T3 : cache ++// T1 : index ++void TemplateTable::fast_xaccess(TosState state) { ++ transition(vtos, state); ++ ++ const Register scratch = T8; ++ ++ // get receiver ++ __ ld(T1, aaddress(0)); ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 2); ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ // make sure exception is reported in correct bcp range (getfield is ++ // next instruction) ++ __ daddiu(BCP, BCP, 1); ++ __ null_check(T1); ++ __ daddu(T1, T1, T2); ++ ++ if (state == itos) { ++ __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg); ++ } else if (state == atos) { ++ do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP); ++ __ verify_oop(FSR); ++ } else if (state == ftos) { ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg); ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ daddiu(BCP, BCP, -1); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++ ++ ++//----------------------------------------------------------------------------- ++// Calls ++ ++void TemplateTable::count_calls(Register method, Register temp) { ++ // implemented elsewhere ++ ShouldNotReachHere(); ++} ++ ++// method, index, recv, flags: T1, T2, T3, T1 ++// byte_no = 2 for _invokevirtual, 1 else ++// T0 : return address ++// get the method & index of the invoke, and push the return address of ++// the invoke(first word in the frame) ++// this address is where the return code jmp to. ++// NOTE : this method will set T3&T1 as recv&flags ++void TemplateTable::prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index, // itable index, MethodType, etc. ++ Register recv, // if caller wants to see it ++ Register flags // if caller wants to test it ++ ) { ++ // determine flags ++ const Bytecodes::Code code = bytecode(); ++ const bool is_invokeinterface = code == Bytecodes::_invokeinterface; ++ const bool is_invokedynamic = code == Bytecodes::_invokedynamic; ++ const bool is_invokehandle = code == Bytecodes::_invokehandle; ++ const bool is_invokevirtual = code == Bytecodes::_invokevirtual; ++ const bool is_invokespecial = code == Bytecodes::_invokespecial; ++ const bool load_receiver = (recv != noreg); ++ const bool save_flags = (flags != noreg); ++ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),""); ++ assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); ++ assert(flags == noreg || flags == T1, "error flags reg."); ++ assert(recv == noreg || recv == T3, "error recv reg."); ++ ++ // setup registers & access constant pool cache ++ if(recv == noreg) recv = T3; ++ if(flags == noreg) flags = T1; ++ assert_different_registers(method, index, recv, flags); ++ ++ // save 'interpreter return address' ++ __ save_bcp(); ++ ++ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); ++ ++ if (is_invokedynamic || is_invokehandle) { ++ Label L_no_push; ++ __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift)); ++ __ andr(AT, AT, flags); ++ __ beq(AT, R0, L_no_push); ++ __ delayed()->nop(); ++ // Push the appendix as a trailing parameter. ++ // This must be done before we get the receiver, ++ // since the parameter_size includes it. ++ Register tmp = SSR; ++ __ push(tmp); ++ __ move(tmp, index); ++ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); ++ __ load_resolved_reference_at_index(index, tmp, recv); ++ __ pop(tmp); ++ __ push(index); // push appendix (MethodType, CallSite, etc.) ++ __ bind(L_no_push); ++ } ++ ++ // load receiver if needed (after appendix is pushed so parameter size is correct) ++ // Note: no return address pushed yet ++ if (load_receiver) { ++ __ move(AT, ConstantPoolCacheEntry::parameter_size_mask); ++ __ andr(recv, flags, AT); ++ // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0. ++ const int no_return_pc_pushed_yet = 0; // argument slot correction before we push return address ++ const int receiver_is_at_end = -1; // back off one slot to get receiver ++ Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); ++ __ ld(recv, recv_addr); ++ __ verify_oop(recv); ++ } ++ if(save_flags) { ++ __ move(BCP, flags); ++ } ++ ++ // compute return type ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, 0xf); ++ ++ // Make sure we don't need to mask flags for tos_state_shift after the above shift ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ // load return address ++ { ++ const address table = (address) Interpreter::invoke_return_entry_table_for(code); ++ __ li(AT, (long)table); ++ __ dsll(flags, flags, LogBytesPerWord); ++ __ daddu(AT, AT, flags); ++ __ ld(RA, AT, 0); ++ } ++ ++ if (save_flags) { ++ __ move(flags, BCP); ++ __ restore_bcp(); ++ } ++} ++ ++// used registers : T0, T3, T1, T2 ++// T3 : recv, this two register using convention is by prepare_invoke ++// T1 : flags, klass ++// Rmethod : method, index must be Rmethod ++void TemplateTable::invokevirtual_helper(Register index, ++ Register recv, ++ Register flags) { ++ ++ assert_different_registers(index, recv, flags, T2); ++ ++ // Test for an invoke of a final method ++ Label notFinal; ++ __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, flags, AT); ++ __ beq(AT, R0, notFinal); ++ __ delayed()->nop(); ++ ++ Register method = index; // method must be Rmethod ++ assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention"); ++ ++ // do the call - the index is actually the method to call ++ // the index is indeed methodOop, for this is vfinal, ++ // see ConstantPoolCacheEntry::set_method for more info ++ ++ ++ // It's final, need a null check here! ++ __ null_check(recv); ++ ++ // profile this call ++ __ profile_final_call(T2); ++ ++ // T2: tmp, used for mdp ++ // method: callee ++ // T9: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T2, method, T9, true); ++ ++ __ jump_from_interpreted(method, T2); ++ ++ __ bind(notFinal); ++ ++ // get receiver klass ++ __ null_check(recv, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T2, recv); ++ ++ // profile this call ++ __ profile_virtual_call(T2, T0, T1); ++ ++ // get target methodOop & entry point ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ __ dsll(AT, index, Address::times_ptr); ++ // T2: receiver ++ __ daddu(AT, T2, AT); ++ //this is a ualign read ++ __ ld(method, AT, base + vtableEntry::method_offset_in_bytes()); ++ __ profile_arguments_type(T2, method, T9, true); ++ __ jump_from_interpreted(method, T2); ++ ++} ++ ++void TemplateTable::invokevirtual(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3, T1); ++ // now recv & flags in T3, T1 ++ invokevirtual_helper(Rmethod, T3, T1); ++} ++ ++// T9 : entry ++// Rmethod : method ++void TemplateTable::invokespecial(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3); ++ // now recv & flags in T3, T1 ++ __ verify_oop(T3); ++ __ null_check(T3); ++ __ profile_call(T9); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T9: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T9, false); ++ ++ __ jump_from_interpreted(Rmethod, T9); ++ __ move(T0, T3); ++} ++ ++void TemplateTable::invokestatic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG); ++ ++ __ profile_call(T9); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T9: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T9, false); ++ ++ __ jump_from_interpreted(Rmethod, T9); ++} ++ ++// i have no idea what to do here, now. for future change. FIXME. ++void TemplateTable::fast_invokevfinal(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ __ stop("fast_invokevfinal not used on mips64"); ++} ++ ++// used registers : T0, T1, T2, T3, T1, A7 ++// T0 : itable, vtable, entry ++// T1 : interface ++// T3 : receiver ++// T1 : flags, klass ++// Rmethod : index, method, this is required by interpreter_entry ++void TemplateTable::invokeinterface(int byte_no) { ++ transition(vtos, vtos); ++ //this method will use T1-T4 and T0 ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, T2, Rmethod, T3, T1); ++ // T2: reference klass (from f1) if interface method ++ // Rmethod: method (from f2) ++ // T3: receiver ++ // T1: flags ++ ++ // First check for Object case, then private interface method, ++ // then regular interface method. ++ ++ // Special case of invokeinterface called for virtual method of ++ // java.lang.Object. See cpCache.cpp for details. ++ Label notObjectMethod; ++ __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notObjectMethod); ++ __ delayed()->nop(); ++ ++ invokevirtual_helper(Rmethod, T3, T1); ++ // no return from above ++ __ bind(notObjectMethod); ++ ++ Label no_such_interface; // for receiver subtype check ++ Register recvKlass; // used for exception processing ++ ++ // Check for private method invocation - indicated by vfinal ++ Label notVFinal; ++ __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notVFinal); ++ __ delayed()->nop(); ++ ++ // Get receiver klass into FSR - also a null check ++ __ null_check(T3, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(FSR, T3); ++ ++ Label subtype; ++ __ check_klass_subtype(FSR, T2, T0, subtype); ++ // If we get here the typecheck failed ++ recvKlass = T1; ++ __ move(recvKlass, FSR); ++ __ b(no_such_interface); ++ __ delayed()->nop(); ++ ++ __ bind(subtype); ++ ++ // do the call - rbx is actually the method to call ++ ++ __ profile_final_call(T1); ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ __ jump_from_interpreted(Rmethod, T1); ++ // no return from above ++ __ bind(notVFinal); ++ ++ // Get receiver klass into T1 - also a null check ++ __ restore_locals(); ++ __ null_check(T3, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T1, T3); ++ ++ Label no_such_method; ++ ++ // Preserve method for throw_AbstractMethodErrorVerbose. ++ __ move(T3, Rmethod); ++ // Receiver subtype check against REFC. ++ // Superklass in T2. Subklass in T1. ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ T1, T2, noreg, ++ // outputs: scan temp. reg, scan temp. reg ++ T0, FSR, ++ no_such_interface, ++ /*return_method=*/false); ++ ++ ++ // profile this call ++ __ restore_bcp(); ++ __ profile_virtual_call(T1, T0, FSR); ++ ++ // Get declaring interface class from method, and itable index ++ __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes()); ++ __ lw(Rmethod, Rmethod, in_bytes(Method::itable_index_offset())); ++ __ addiu(Rmethod, Rmethod, (-1) * Method::itable_index_max); ++ __ subu32(Rmethod, R0, Rmethod); ++ ++ // Preserve recvKlass for throw_AbstractMethodErrorVerbose. ++ __ move(FSR, T1); ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ FSR, T2, Rmethod, ++ // outputs: method, scan temp. reg ++ Rmethod, T0, ++ no_such_interface); ++ ++ // Rmethod: Method* to call ++ // T3: receiver ++ // Check for abstract method error ++ // Note: This should be done more efficiently via a throw_abstract_method_error ++ // interpreter entry point and a conditional jump to it in case of a null ++ // method. ++ __ beq(Rmethod, R0, no_such_method); ++ __ delayed()->nop(); ++ ++ __ profile_called_method(Rmethod, T0, T1); ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ // do the call ++ // T3: receiver ++ // Rmethod: Method* ++ __ jump_from_interpreted(Rmethod, T1); ++ __ should_not_reach_here(); ++ ++ // exception handling code follows... ++ // note: must restore interpreter registers to canonical ++ // state for exception handling to work correctly! ++ ++ __ bind(no_such_method); ++ // throw exception ++ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) ++ __ restore_bcp(); ++ __ restore_locals(); ++ // Pass arguments for generating a verbose error message. ++ recvKlass = A1; ++ Register method = A2; ++ if (recvKlass != T1) { __ move(recvKlass, T1); } ++ if (method != T3) { __ move(method, T3); } ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ __ bind(no_such_interface); ++ // throw exception ++ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) ++ __ restore_bcp(); ++ __ restore_locals(); ++ // Pass arguments for generating a verbose error message. ++ if (recvKlass != T1) { __ move(recvKlass, T1); } ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++} ++ ++ ++void TemplateTable::invokehandle(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ const Register T2_method = Rmethod; ++ const Register FSR_mtype = FSR; ++ const Register T3_recv = T3; ++ ++ prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv); ++ //??__ verify_method_ptr(T2_method); ++ __ verify_oop(T3_recv); ++ __ null_check(T3_recv); ++ ++ // T9: MethodType object (from cpool->resolved_references[f1], if necessary) ++ // T2_method: MH.invokeExact_MT method (from f2) ++ ++ // Note: T9 is already pushed (if necessary) by prepare_invoke ++ ++ // FIXME: profile the LambdaForm also ++ __ profile_final_call(T9); ++ ++ // T8: tmp, used for mdp ++ // T2_method: callee ++ // T9: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T8, T2_method, T9, true); ++ ++ __ jump_from_interpreted(T2_method, T9); ++} ++ ++ void TemplateTable::invokedynamic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ ++ //const Register Rmethod = T2; ++ const Register T2_callsite = T2; ++ ++ prepare_invoke(byte_no, Rmethod, T2_callsite); ++ ++ // T2: CallSite object (from cpool->resolved_references[f1]) ++ // Rmethod: MH.linkToCallSite method (from f2) ++ ++ // Note: T2_callsite is already pushed by prepare_invoke ++ // %%% should make a type profile for any invokedynamic that takes a ref argument ++ // profile this call ++ __ profile_call(T9); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T9: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T9, false); ++ ++ __ verify_oop(T2_callsite); ++ ++ __ jump_from_interpreted(Rmethod, T9); ++ } ++ ++//----------------------------------------------------------------------------- ++// Allocation ++// T1 : tags & buffer end & thread ++// T2 : object end ++// T3 : klass ++// T1 : object size ++// A1 : cpool ++// A2 : cp index ++// return object in FSR ++void TemplateTable::_new() { ++ transition(vtos, atos); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ ++ Label slow_case; ++ Label done; ++ Label initialize_header; ++ Label initialize_object; // including clearing the fields ++ Label allocate_shared; ++ ++ __ get_cpool_and_tags(A1, T1); ++ ++ // make sure the class we're about to instantiate has been resolved. ++ // Note: slow_case does a pop of stack, which is why we loaded class/pushed above ++ const int tags_offset = Array::base_offset_in_bytes(); ++ if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) { ++ __ gslbx(AT, T1, A2, tags_offset); ++ } else { ++ __ daddu(T1, T1, A2); ++ __ lb(AT, T1, tags_offset); ++ } ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // get InstanceKlass ++ __ load_resolved_klass_at_index(A1, A2, T3); ++ ++ // make sure klass is initialized & doesn't have finalizer ++ // make sure klass is fully initialized ++ __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset())); ++ __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // has_finalizer ++ __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) ); ++ __ andi(AT, T0, Klass::_lh_instance_slow_path_bit); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // Allocate the instance ++ // 1) Try to allocate in the TLAB ++ // 2) if fail and the object is large allocate in the shared Eden ++ // 3) if the above fails (or is not applicable), go to a slow case ++ // (creates a new TLAB, etc.) ++ ++ const bool allow_shared_alloc = ++ Universe::heap()->supports_inline_contig_alloc(); ++ ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ if (UseTLAB || allow_shared_alloc) { ++ __ get_thread(thread); ++ } ++#else ++ const Register thread = TREG; ++#endif ++ ++ if (UseTLAB) { ++ // get tlab_top ++ __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset())); ++ // get tlab_end ++ __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset())); ++ __ daddu(T2, FSR, T0); ++ __ slt(AT, AT, T2); ++ __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case); ++ __ delayed()->nop(); ++ __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset())); ++ ++ if (ZeroTLAB) { ++ // the fields have been already cleared ++ __ beq(R0, R0, initialize_header); ++ } else { ++ // initialize both the header and fields ++ __ beq(R0, R0, initialize_object); ++ } ++ __ delayed()->nop(); ++ } ++ ++ // Allocation in the shared Eden , if allowed ++ // T0 : instance size in words ++ if(allow_shared_alloc){ ++ __ bind(allocate_shared); ++ ++ Label done, retry; ++ Address heap_top(T1); ++ __ set64(T1, (long)Universe::heap()->top_addr()); ++ __ ld(FSR, heap_top); ++ ++ __ bind(retry); ++ __ set64(AT, (long)Universe::heap()->end_addr()); ++ __ ld(AT, AT, 0); ++ __ daddu(T2, FSR, T0); ++ __ slt(AT, AT, T2); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // Compare FSR with the top addr, and if still equal, store the new ++ // top addr in T2 at the address of the top addr pointer. Sets AT if was ++ // equal, and clears it otherwise. Use lock prefix for atomicity on MPs. ++ // ++ // FSR: object begin ++ // T2: object end ++ // T0: instance size in words ++ ++ // if someone beat us on the allocation, try again, otherwise continue ++ __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry); ++ ++ __ bind(done); ++ ++ __ incr_allocated_bytes(thread, T0, 0); ++ } ++ ++ if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { ++ // The object is initialized before the header. If the object size is ++ // zero, go directly to the header initialization. ++ __ bind(initialize_object); ++ __ set64(AT, - sizeof(oopDesc)); ++ __ daddu(T0, T0, AT); ++ __ beq(T0, R0, initialize_header); ++ __ delayed()->nop(); ++ ++ // initialize remaining object fields: T0 is a multiple of 2 ++ { ++ Label loop; ++ __ daddu(T1, FSR, T0); ++ __ daddiu(T1, T1, -oopSize); ++ ++ __ bind(loop); ++ __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize); ++ __ bne(T1, FSR, loop); //dont clear header ++ __ delayed()->daddiu(T1, T1, -oopSize); ++ } ++ ++ //klass in T3, ++ // initialize object header only. ++ __ bind(initialize_header); ++ if (UseBiasedLocking) { ++ __ ld(AT, T3, in_bytes(Klass::prototype_header_offset())); ++ __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ()); ++ } else { ++ __ set64(AT, (long)markOopDesc::prototype()); ++ __ sd(AT, FSR, oopDesc::mark_offset_in_bytes()); ++ } ++ ++ __ store_klass_gap(FSR, R0); ++ __ store_klass(FSR, T3); ++ ++ { ++ SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); ++ // Trigger dtrace event for fastpath ++ __ push(atos); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR); ++ __ pop(atos); ++ ++ } ++ __ b(done); ++ __ delayed()->nop(); ++ } ++ ++ // slow case ++ __ bind(slow_case); ++ __ get_constant_pool(A1); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2); ++ ++ // continue ++ __ bind(done); ++ __ sync(); ++} ++ ++void TemplateTable::newarray() { ++ transition(itos, atos); ++ __ lbu(A1, at_bcp(1)); ++ //type, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR); ++ __ sync(); ++} ++ ++void TemplateTable::anewarray() { ++ transition(itos, atos); ++ __ get_2_byte_integer_at_bcp(A2, AT, 1); ++ __ huswap(A2); ++ __ get_constant_pool(A1); ++ // cp, index, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR); ++ __ sync(); ++} ++ ++void TemplateTable::arraylength() { ++ transition(atos, itos); ++ __ null_check(FSR, arrayOopDesc::length_offset_in_bytes()); ++ __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes()); ++} ++ ++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always) ++// T2 : sub klass ++// T3 : cpool ++// T3 : super klass ++void TemplateTable::checkcast() { ++ transition(atos, atos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ __ beq(FSR, R0, is_null); ++ __ delayed()->nop(); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ huswap(T2); ++ ++ // See if bytecode has already been quicked ++ __ daddu(AT, T1, T2); ++ __ lb(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ __ delayed()->nop(); ++ ++ // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded. ++ // Then, GC will move the object in V0 to another places in heap. ++ // Therefore, We should never save such an object in register. ++ // Instead, we should save it in the stack. It can be modified automatically by the GC thread. ++ // After GC, the object address in FSR is changed to a new place. ++ // ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ __ delayed()->nop(); ++ ++ // klass already in cp, get superklass in T3 ++ __ bind(quicked); ++ __ load_resolved_klass_at_index(T3, T2, T3); ++ ++ __ bind(resolved); ++ ++ // get subklass in T2 ++ //add for compressedoops ++ __ load_klass(T2, FSR); ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ClassCastException_entry); ++ __ delayed()->nop(); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ ++ // Collect counts on whether this check-cast sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ b(done); ++ __ delayed()->nop(); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); ++ } ++ __ bind(done); ++} ++ ++// i use T3 as cpool, T1 as tags, T2 as index ++// object always in FSR, superklass in T3, subklass in T2 ++void TemplateTable::instanceof() { ++ transition(atos, itos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ ++ __ beq(FSR, R0, is_null); ++ __ delayed()->nop(); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ // get index ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ huswap(T2); ++ ++ // See if bytecode has already been quicked ++ // quicked ++ __ daddu(AT, T1, T2); ++ __ lb(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ __ delayed()->nop(); ++ ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ __ delayed()->nop(); ++ ++ // get superklass in T3, subklass in T2 ++ __ bind(quicked); ++ __ load_resolved_klass_at_index(T3, T2, T3); ++ ++ __ bind(resolved); ++ // get subklass in T2 ++ //add for compressedoops ++ __ load_klass(T2, FSR); ++ ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ // Come here on failure ++ __ b(done); ++ __ delayed(); __ move(FSR, R0); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ __ move(FSR, 1); ++ ++ // Collect counts on whether this test sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ beq(R0, R0, done); ++ __ delayed()->nop(); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); // same as 'done' ++ } ++ __ bind(done); ++ // FSR = 0: obj == NULL or obj is not an instanceof the specified klass ++ // FSR = 1: obj != NULL and obj is an instanceof the specified klass ++} ++ ++//-------------------------------------------------------- ++//-------------------------------------------- ++// Breakpoints ++void TemplateTable::_breakpoint() { ++ // Note: We get here even if we are single stepping.. ++ // jbug inists on setting breakpoints at every bytecode ++ // even if we are in single step mode. ++ ++ transition(vtos, vtos); ++ ++ // get the unpatched byte code ++ __ get_method(A1); ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::get_original_bytecode_at), ++ A1, BCP); ++ __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal ++ ++ // post the breakpoint event ++ __ get_method(A1); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP); ++ ++ // complete the execution of original bytecode ++ __ dispatch_only_normal(vtos); ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateTable::athrow() { ++ transition(atos, vtos); ++ __ null_check(FSR); ++ __ jmp(Interpreter::throw_exception_entry()); ++ __ delayed()->nop(); ++} ++ ++//----------------------------------------------------------------------------- ++// Synchronization ++// ++// Note: monitorenter & exit are symmetric routines; which is reflected ++// in the assembly code structure as well ++// ++// Stack layout: ++// ++// [expressions ] <--- SP = expression stack top ++// .. ++// [expressions ] ++// [monitor entry] <--- monitor block top = expression stack bot ++// .. ++// [monitor entry] ++// [frame data ] <--- monitor block bot ++// ... ++// [return addr ] <--- FP ++ ++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer ++// object always in FSR ++void TemplateTable::monitorenter() { ++ transition(atos, vtos); ++ ++ // check for NULL object ++ __ null_check(FSR); ++ ++ const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset ++ * wordSize); ++ const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize); ++ Label allocated; ++ ++ // initialize entry pointer ++ __ move(c_rarg0, R0); ++ ++ // find a free slot in the monitor block (result in c_rarg0) ++ { ++ Label entry, loop, exit; ++ __ ld(T2, monitor_block_top); ++ __ b(entry); ++ __ delayed()->daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ ++ // free slot? ++ __ bind(loop); ++ __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes()); ++ __ movz(c_rarg0, T2, AT); ++ ++ __ beq(FSR, AT, exit); ++ __ delayed()->nop(); ++ __ daddiu(T2, T2, entry_size); ++ ++ __ bind(entry); ++ __ bne(T3, T2, loop); ++ __ delayed()->nop(); ++ __ bind(exit); ++ } ++ ++ __ bne(c_rarg0, R0, allocated); ++ __ delayed()->nop(); ++ ++ // allocate one if there's no free slot ++ { ++ Label entry, loop; ++ // 1. compute new pointers // SP: old expression stack top ++ __ ld(c_rarg0, monitor_block_top); ++ __ daddiu(SP, SP, - entry_size); ++ __ daddiu(c_rarg0, c_rarg0, - entry_size); ++ __ sd(c_rarg0, monitor_block_top); ++ __ b(entry); ++ __ delayed(); __ move(T3, SP); ++ ++ // 2. move expression stack contents ++ __ bind(loop); ++ __ ld(AT, T3, entry_size); ++ __ sd(AT, T3, 0); ++ __ daddiu(T3, T3, wordSize); ++ __ bind(entry); ++ __ bne(T3, c_rarg0, loop); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(allocated); ++ // Increment bcp to point to the next bytecode, ++ // so exception handling for async. exceptions work correctly. ++ // The object has already been poped from the stack, so the ++ // expression stack looks correct. ++ __ daddiu(BCP, BCP, 1); ++ __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ lock_object(c_rarg0); ++ // check to make sure this monitor doesn't cause stack overflow after locking ++ __ save_bcp(); // in case of exception ++ __ generate_stack_overflow_check(0); ++ // The bcp has already been incremented. Just need to dispatch to next instruction. ++ ++ __ dispatch_next(vtos); ++} ++ ++// T2 : top ++// c_rarg0 : entry ++void TemplateTable::monitorexit() { ++ transition(atos, vtos); ++ ++ __ null_check(FSR); ++ ++ const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize); ++ Label found; ++ ++ // find matching slot ++ { ++ Label entry, loop; ++ __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ b(entry); ++ __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ ++ __ bind(loop); ++ __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ beq(FSR, AT, found); ++ __ delayed()->nop(); ++ __ daddiu(c_rarg0, c_rarg0, entry_size); ++ __ bind(entry); ++ __ bne(T2, c_rarg0, loop); ++ __ delayed()->nop(); ++ } ++ ++ // error handling. Unlocking was not block-structured ++ Label end; ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ // call run-time routine ++ // c_rarg0: points to monitor entry ++ __ bind(found); ++ __ move(TSR, FSR); ++ __ unlock_object(c_rarg0); ++ __ move(FSR, TSR); ++ __ bind(end); ++} ++ ++ ++// Wide instructions ++void TemplateTable::wide() { ++ transition(vtos, vtos); ++ __ lbu(Rnext, at_bcp(1)); ++ __ dsll(T9, Rnext, Address::times_8); ++ __ li(AT, (long)Interpreter::_wentry_point); ++ __ daddu(AT, T9, AT); ++ __ ld(T9, AT, 0); ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++ ++void TemplateTable::multianewarray() { ++ transition(vtos, atos); ++ // last dim is on top of stack; we want address of first one: ++ // first_addr = last_addr + (ndims - 1) * wordSize ++ __ lbu(A1, at_bcp(3)); // dimension ++ __ daddiu(A1, A1, -1); ++ __ dsll(A1, A1, Address::times_8); ++ __ daddu(A1, SP, A1); // now A1 pointer to the count array on the stack ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1); ++ __ lbu(AT, at_bcp(3)); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(SP, SP, AT); ++ __ sync(); ++} ++#endif // !CC_INTERP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/templateTable_mips.hpp b/src/hotspot/cpu/mips/templateTable_mips.hpp +--- a/src/hotspot/cpu/mips/templateTable_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/templateTable_mips.hpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP ++#define CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP ++ ++ static void prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index = noreg, // itable index, MethodType, etc. ++ Register recv = noreg, // if caller wants to see it ++ Register flags = noreg // if caller wants to test it ++ ); ++ static void invokevirtual_helper(Register index, Register recv, ++ Register flags); ++ static void volatile_barrier(); ++ ++ // Helpers ++ static void index_check(Register array, Register index); ++ static void index_check_without_pop(Register array, Register index); ++ ++#endif // CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vmreg_mips.cpp b/src/hotspot/cpu/mips/vmreg_mips.cpp +--- a/src/hotspot/cpu/mips/vmreg_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/vmreg_mips.cpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "code/vmreg.hpp" ++ ++ ++ ++void VMRegImpl::set_regName() { ++ Register reg = ::as_Register(0); ++ int i; ++ for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { ++ regName[i++] = reg->name(); ++ regName[i++] = reg->name(); ++ reg = reg->successor(); ++ } ++ ++ FloatRegister freg = ::as_FloatRegister(0); ++ for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { ++ regName[i++] = freg->name(); ++ regName[i++] = freg->name(); ++ freg = freg->successor(); ++ } ++ ++ for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { ++ regName[i] = "NON-GPR-FPR"; ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vmreg_mips.hpp b/src/hotspot/cpu/mips/vmreg_mips.hpp +--- a/src/hotspot/cpu/mips/vmreg_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/vmreg_mips.hpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VMREG_MIPS_HPP ++#define CPU_MIPS_VM_VMREG_MIPS_HPP ++ ++inline Register as_Register() { ++ assert( is_Register(), "must be"); ++ return ::as_Register(value() >> 1); ++} ++ ++inline FloatRegister as_FloatRegister() { ++ assert( is_FloatRegister(), "must be" ); ++ assert( is_even(value()), "must be" ); ++ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); ++} ++ ++inline bool is_Register() { ++ return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; ++} ++ ++inline bool is_FloatRegister() { ++ return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; ++} ++ ++inline bool is_concrete() { ++ assert(is_reg(), "must be"); ++ if(is_Register()) return true; ++ if(is_FloatRegister()) return true; ++ assert(false, "what register?"); ++ return false; ++} ++ ++#endif // CPU_MIPS_VM_VMREG_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vmreg_mips.inline.hpp b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp +--- a/src/hotspot/cpu/mips/vmreg_mips.inline.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP ++ ++inline VMReg RegisterImpl::as_VMReg() { ++ if( this==noreg ) return VMRegImpl::Bad(); ++ return VMRegImpl::as_VMReg(encoding() << 1 ); ++} ++ ++inline VMReg FloatRegisterImpl::as_VMReg() { ++ return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); ++} ++ ++#endif // CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vmStructs_mips.hpp b/src/hotspot/cpu/mips/vmStructs_mips.hpp +--- a/src/hotspot/cpu/mips/vmStructs_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/vmStructs_mips.hpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP ++#define CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP ++ ++// These are the CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* JavaCallWrapper */ \ ++ /******************************/ \ ++ /******************************/ \ ++ /* JavaFrameAnchor */ \ ++ /******************************/ \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ ++ \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_STRUCTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_TYPES_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#endif // CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vm_version_ext_mips.cpp b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp +--- a/src/hotspot/cpu/mips/vm_version_ext_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,90 @@ ++/* ++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "memory/allocation.inline.hpp" ++#include "runtime/os.inline.hpp" ++#include "vm_version_ext_mips.hpp" ++ ++// VM_Version_Ext statics ++int VM_Version_Ext::_no_of_threads = 0; ++int VM_Version_Ext::_no_of_cores = 0; ++int VM_Version_Ext::_no_of_sockets = 0; ++bool VM_Version_Ext::_initialized = false; ++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; ++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; ++ ++void VM_Version_Ext::initialize_cpu_information(void) { ++ // do nothing if cpu info has been initialized ++ if (_initialized) { ++ return; ++ } ++ ++ _no_of_cores = os::processor_count(); ++ _no_of_threads = _no_of_cores; ++ _no_of_sockets = _no_of_cores; ++ if (is_loongson()) { ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Loongson MIPS"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Loongson MIPS %s", cpu_features()); ++ } else { ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "MIPS"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "MIPS %s", cpu_features()); ++ } ++ _initialized = true; ++} ++ ++int VM_Version_Ext::number_of_threads(void) { ++ initialize_cpu_information(); ++ return _no_of_threads; ++} ++ ++int VM_Version_Ext::number_of_cores(void) { ++ initialize_cpu_information(); ++ return _no_of_cores; ++} ++ ++int VM_Version_Ext::number_of_sockets(void) { ++ initialize_cpu_information(); ++ return _no_of_sockets; ++} ++ ++const char* VM_Version_Ext::cpu_name(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); ++ return tmp; ++} ++ ++const char* VM_Version_Ext::cpu_description(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); ++ return tmp; ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vm_version_ext_mips.hpp b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp +--- a/src/hotspot/cpu/mips/vm_version_ext_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP ++#define CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP ++ ++#include "runtime/vm_version.hpp" ++#include "utilities/macros.hpp" ++ ++class VM_Version_Ext : public VM_Version { ++ private: ++ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; ++ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; ++ ++ static int _no_of_threads; ++ static int _no_of_cores; ++ static int _no_of_sockets; ++ static bool _initialized; ++ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; ++ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; ++ ++ public: ++ static int number_of_threads(void); ++ static int number_of_cores(void); ++ static int number_of_sockets(void); ++ ++ static const char* cpu_name(void); ++ static const char* cpu_description(void); ++ static void initialize_cpu_information(void); ++}; ++ ++#endif // CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vm_version_mips.cpp b/src/hotspot/cpu/mips/vm_version_mips.cpp +--- a/src/hotspot/cpu/mips/vm_version_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/vm_version_mips.cpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,516 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/java.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/vm_version.hpp" ++#ifdef TARGET_OS_FAMILY_linux ++# include "os_linux.inline.hpp" ++#endif ++ ++int VM_Version::_cpuFeatures; ++const char* VM_Version::_features_str = ""; ++VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; ++volatile bool VM_Version::_is_determine_cpucfg_supported_running = false; ++bool VM_Version::_is_cpucfg_instruction_supported = true; ++bool VM_Version::_cpu_info_is_initialized = false; ++ ++static BufferBlob* stub_blob; ++static const int stub_size = 600; ++ ++extern "C" { ++ typedef void (*get_cpu_info_stub_t)(void*); ++} ++static get_cpu_info_stub_t get_cpu_info_stub = NULL; ++ ++ ++class VM_Version_StubGenerator: public StubCodeGenerator { ++ public: ++ ++ VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} ++ ++ address generate_get_cpu_info() { ++ assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized"); ++ StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); ++# define __ _masm-> ++ ++ address start = __ pc(); ++ ++ __ enter(); ++ __ push(AT); ++ __ push(V0); ++ ++ __ li(AT, (long)0); ++ __ cpucfg(V0, AT); ++ __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); ++ __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); ++ ++ __ li(AT, 1); ++ __ cpucfg(V0, AT); ++ __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); ++ __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); ++ ++ __ li(AT, 2); ++ __ cpucfg(V0, AT); ++ __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); ++ __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); ++ ++ __ pop(V0); ++ __ pop(AT); ++ __ leave(); ++ __ jr(RA); ++ __ delayed()->nop(); ++# undef __ ++ ++ return start; ++ }; ++}; ++ ++uint32_t VM_Version::get_feature_flags_by_cpucfg() { ++ uint32_t result = 0; ++ if (_cpuid_info.cpucfg_info_id1.bits.MMI != 0) ++ result |= CPU_MMI; ++ if (_cpuid_info.cpucfg_info_id1.bits.MSA1 != 0) ++ result |= CPU_MSA1_0; ++ if (_cpuid_info.cpucfg_info_id1.bits.MSA2 != 0) ++ result |= CPU_MSA2_0; ++ if (_cpuid_info.cpucfg_info_id1.bits.CGP != 0) ++ result |= CPU_CGP; ++ if (_cpuid_info.cpucfg_info_id1.bits.LSX1 != 0) ++ result |= CPU_LSX1; ++ if (_cpuid_info.cpucfg_info_id1.bits.LSX2 != 0) ++ result |= CPU_LSX2; ++ if (_cpuid_info.cpucfg_info_id1.bits.LASX != 0) ++ result |= CPU_LASX; ++ if (_cpuid_info.cpucfg_info_id1.bits.LLSYNC != 0) ++ result |= CPU_LLSYNC; ++ if (_cpuid_info.cpucfg_info_id1.bits.TGTSYNC != 0) ++ result |= CPU_TGTSYNC; ++ if (_cpuid_info.cpucfg_info_id1.bits.MUALP != 0) ++ result |= CPU_MUALP; ++ if (_cpuid_info.cpucfg_info_id2.bits.LEXT1 != 0) ++ result |= CPU_LEXT1; ++ if (_cpuid_info.cpucfg_info_id2.bits.LEXT2 != 0) ++ result |= CPU_LEXT2; ++ if (_cpuid_info.cpucfg_info_id2.bits.LEXT3 != 0) ++ result |= CPU_LEXT3; ++ if (_cpuid_info.cpucfg_info_id2.bits.LAMO != 0) ++ result |= CPU_LAMO; ++ if (_cpuid_info.cpucfg_info_id2.bits.LPIXU != 0) ++ result |= CPU_LPIXU; ++ ++ result |= CPU_ULSYNC; ++ ++ return result; ++} ++ ++void read_cpu_info(const char *path, char *result) { ++ FILE *ptr; ++ char buf[1024]; ++ int i = 0; ++ if((ptr=fopen(path, "r")) != NULL) { ++ while(fgets(buf, 1024, ptr)!=NULL) { ++ strcat(result,buf); ++ i++; ++ if (i == 10) break; ++ } ++ fclose(ptr); ++ } else { ++ warning("Can't detect CPU info - cannot open %s", path); ++ } ++} ++ ++void strlwr(char *str) { ++ for (; *str!='\0'; str++) ++ *str = tolower(*str); ++} ++ ++int VM_Version::get_feature_flags_by_cpuinfo(int features) { ++ assert(!cpu_info_is_initialized(), "VM_Version should not be initialized"); ++ ++ char res[10240]; ++ int i; ++ memset(res, '\0', 10240 * sizeof(char)); ++ read_cpu_info("/proc/cpuinfo", res); ++ // res is converted to lower case ++ strlwr(res); ++ ++ if (strstr(res, "loongson")) { ++ // Loongson CPU ++ features |= CPU_LOONGSON; ++ ++ const struct Loongson_Cpuinfo loongson_cpuinfo[] = { ++ {L_3A1000, "3a1000"}, ++ {L_3B1500, "3b1500"}, ++ {L_3A2000, "3a2000"}, ++ {L_3B2000, "3b2000"}, ++ {L_3A3000, "3a3000"}, ++ {L_3B3000, "3b3000"}, ++ {L_2K1000, "2k1000"}, ++ {L_UNKNOWN, "unknown"} ++ }; ++ ++ // Loongson Family ++ int detected = 0; ++ for (i = 0; i <= L_UNKNOWN; i++) { ++ switch (i) { ++ // 3A1000 and 3B1500 may use an old kernel and further comparsion is needed ++ // test PRID REV in /proc/cpuinfo ++ // 3A1000: V0.5, model name: ICT Loongson-3A V0.5 FPU V0.1 ++ // 3B1500: V0.7, model name: ICT Loongson-3B V0.7 FPU V0.1 ++ case L_3A1000: ++ if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3a v0.5")) { ++ features |= CPU_LOONGSON_GS464; ++ detected++; ++ //tty->print_cr("3A1000 platform"); ++ } ++ break; ++ case L_3B1500: ++ if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3b v0.7")) { ++ features |= CPU_LOONGSON_GS464; ++ detected++; ++ //tty->print_cr("3B1500 platform"); ++ } ++ break; ++ case L_3A2000: ++ case L_3B2000: ++ case L_3A3000: ++ case L_3B3000: ++ if (strstr(res, loongson_cpuinfo[i].match_str)) { ++ features |= CPU_LOONGSON_GS464E; ++ detected++; ++ //tty->print_cr("3A2000/3A3000/3B2000/3B3000 platform"); ++ } ++ break; ++ case L_2K1000: ++ if (strstr(res, loongson_cpuinfo[i].match_str)) { ++ features |= CPU_LOONGSON_GS264; ++ detected++; ++ //tty->print_cr("2K1000 platform"); ++ } ++ break; ++ case L_UNKNOWN: ++ if (detected == 0) { ++ detected++; ++ //tty->print_cr("unknown Loongson platform"); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ assert (detected == 1, "one and only one of LOONGSON_CPU_FAMILY should be detected"); ++ } else { // not Loongson ++ // Not Loongson CPU ++ //tty->print_cr("MIPS platform"); ++ } ++ ++ if (features & CPU_LOONGSON_GS264) { ++ features |= CPU_LEXT1; ++ features |= CPU_LEXT2; ++ features |= CPU_TGTSYNC; ++ features |= CPU_ULSYNC; ++ features |= CPU_MSA1_0; ++ features |= CPU_LSX1; ++ } else if (features & CPU_LOONGSON_GS464) { ++ features |= CPU_LEXT1; ++ features |= CPU_LLSYNC; ++ features |= CPU_TGTSYNC; ++ } else if (features & CPU_LOONGSON_GS464E) { ++ features |= CPU_LEXT1; ++ features |= CPU_LEXT2; ++ features |= CPU_LEXT3; ++ features |= CPU_TGTSYNC; ++ features |= CPU_ULSYNC; ++ } else if (features & CPU_LOONGSON) { ++ // unknow loongson ++ features |= CPU_LLSYNC; ++ features |= CPU_TGTSYNC; ++ features |= CPU_ULSYNC; ++ } ++ VM_Version::_cpu_info_is_initialized = true; ++ ++ return features; ++} ++ ++void VM_Version::get_processor_features() { ++ ++ clean_cpuFeatures(); ++ ++ // test if cpucfg instruction is supported ++ VM_Version::_is_determine_cpucfg_supported_running = true; ++ __asm__ __volatile__( ++ ".insn \n\t" ++ ".word (0xc8080118)\n\t" // cpucfg zero, zero ++ : ++ : ++ : ++ ); ++ VM_Version::_is_determine_cpucfg_supported_running = false; ++ ++ if (supports_cpucfg()) { ++ get_cpu_info_stub(&_cpuid_info); ++ _cpuFeatures = get_feature_flags_by_cpucfg(); ++ // Only Loongson CPUs support cpucfg ++ _cpuFeatures |= CPU_LOONGSON; ++ } else { ++ _cpuFeatures = get_feature_flags_by_cpuinfo(0); ++ } ++ ++ _supports_cx8 = true; ++ ++ if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) { ++ FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650); ++ } ++ ++#ifdef COMPILER2 ++ if (MaxVectorSize > 0) { ++ if (!is_power_of_2(MaxVectorSize)) { ++ warning("MaxVectorSize must be a power of 2"); ++ MaxVectorSize = 8; ++ } ++ if (MaxVectorSize > 0 && supports_ps()) { ++ MaxVectorSize = 8; ++ } else { ++ MaxVectorSize = 0; ++ } ++ } ++ // ++ // Vector optimization of MIPS works in most cases, but cannot pass hotspot/test/compiler/6340864/TestFloatVect.java. ++ // Vector optimization was closed by default. ++ // The reasons: ++ // 1. The kernel does not have emulation of PS instructions yet, so the emulation of PS instructions must be done in JVM, see JVM_handle_linux_signal. ++ // 2. It seems the gcc4.4.7 had some bug related to ucontext_t, which is used in signal handler to emulate PS instructions. ++ // ++ if (FLAG_IS_DEFAULT(MaxVectorSize)) { ++ MaxVectorSize = 0; ++ } ++ ++#endif ++ ++ if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 1000); ++ } ++ } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 2000); ++ } ++ } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 3000); ++ } ++ } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 4000); ++ } ++ } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 10000); ++ } ++ } else { ++ assert(false, "Should Not Reach Here, what is the cpu type?"); ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 10000); ++ } ++ } ++ ++ if (supports_lext1()) { ++ if (FLAG_IS_DEFAULT(UseLEXT1)) { ++ FLAG_SET_DEFAULT(UseLEXT1, true); ++ } ++ } else if (UseLEXT1) { ++ warning("LEXT1 instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLEXT1, false); ++ } ++ ++ if (supports_lext2()) { ++ if (FLAG_IS_DEFAULT(UseLEXT2)) { ++ FLAG_SET_DEFAULT(UseLEXT2, true); ++ } ++ } else if (UseLEXT2) { ++ warning("LEXT2 instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLEXT2, false); ++ } ++ ++ if (supports_lext3()) { ++ if (FLAG_IS_DEFAULT(UseLEXT3)) { ++ FLAG_SET_DEFAULT(UseLEXT3, true); ++ } ++ } else if (UseLEXT3) { ++ warning("LEXT3 instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLEXT3, false); ++ } ++ ++ if (UseLEXT2) { ++ if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) { ++ FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 1); ++ } ++ } else if (UseCountTrailingZerosInstructionMIPS64) { ++ if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) ++ warning("ctz/dctz instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 0); ++ } ++ ++ if (TieredCompilation) { ++ if (!FLAG_IS_DEFAULT(TieredCompilation)) ++ warning("TieredCompilation not supported"); ++ FLAG_SET_DEFAULT(TieredCompilation, false); ++ } ++ ++ char buf[256]; ++ bool is_unknown_loongson_cpu = is_loongson() && !is_gs464() && !is_gs464e() && !is_gs264() && !supports_cpucfg(); ++ ++ // A note on the _features_string format: ++ // There are jtreg tests checking the _features_string for various properties. ++ // For some strange reason, these tests require the string to contain ++ // only _lowercase_ characters. Keep that in mind when being surprised ++ // about the unusual notation of features - and when adding new ones. ++ // Features may have one comma at the end. ++ // Furthermore, use one, and only one, separator space between features. ++ // Multiple spaces are considered separate tokens, messing up everything. ++ jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, usesynclevel:%d", ++ (is_loongson() ? "mips-compatible loongson cpu" : "mips cpu"), ++ (is_gs464() ? ", gs464 (3a1000/3b1500)" : ""), ++ (is_gs464e() ? ", gs464e (3a2000/3a3000/3b2000/3b3000)" : ""), ++ (is_gs264() ? ", gs264 (2k1000)" : ""), ++ (is_unknown_loongson_cpu ? ", unknown loongson cpu" : ""), ++ (supports_dsp() ? ", dsp" : ""), ++ (supports_ps() ? ", ps" : ""), ++ (supports_3d() ? ", 3d" : ""), ++ (supports_mmi() ? ", mmi" : ""), ++ (supports_msa1_0() ? ", msa1_0" : ""), ++ (supports_msa2_0() ? ", msa2_0" : ""), ++ (supports_lsx1() ? ", lsx1" : ""), ++ (supports_lsx2() ? ", lsx2" : ""), ++ (supports_lasx() ? ", lasx" : ""), ++ (supports_lext1() ? ", lext1" : ""), ++ (supports_lext2() ? ", lext2" : ""), ++ (supports_lext3() ? ", lext3" : ""), ++ (supports_cgp() ? ", aes, crc, sha1, sha256, sha512" : ""), ++ (supports_lamo() ? ", lamo" : ""), ++ (supports_lpixu() ? ", lpixu" : ""), ++ (needs_llsync() ? ", llsync" : ""), ++ (needs_tgtsync() ? ", tgtsync": ""), ++ (needs_ulsync() ? ", ulsync": ""), ++ (supports_mualp() ? ", mualp" : ""), ++ UseSyncLevel); ++ _features_str = strdup(buf); ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchLines, 1); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1); ++ } ++ ++ if (UseSHA) { ++ warning("SHA instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA, false); ++ } ++ ++ if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) { ++ warning("SHA intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); ++ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); ++ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); ++ } ++ ++ if (UseAES) { ++ if (!FLAG_IS_DEFAULT(UseAES)) { ++ warning("AES instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAES, false); ++ } ++ } ++ ++ if (UseCRC32Intrinsics) { ++ if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { ++ warning("CRC32Intrinsics instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); ++ } ++ } ++ ++ if (UseCRC32CIntrinsics) { ++ if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { ++ warning("CRC32CIntrinsics instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); ++ } ++ } ++ ++ if (UseAESIntrinsics) { ++ if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) { ++ warning("AES intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESIntrinsics, false); ++ } ++ } ++ ++#ifdef COMPILER2 ++ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { ++ UseMontgomeryMultiplyIntrinsic = true; ++ } ++ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { ++ UseMontgomerySquareIntrinsic = true; ++ } ++#endif ++ ++ if (FLAG_IS_DEFAULT(UseFMA)) { ++ FLAG_SET_DEFAULT(UseFMA, true); ++ } ++ ++ UNSUPPORTED_OPTION(CriticalJNINatives); ++} ++ ++void VM_Version::initialize() { ++ ResourceMark rm; ++ // Making this stub must be FIRST use of assembler ++ ++ stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size); ++ if (stub_blob == NULL) { ++ vm_exit_during_initialization("Unable to allocate get_cpu_info_stub"); ++ } ++ CodeBuffer c(stub_blob); ++ VM_Version_StubGenerator g(&c); ++ get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, ++ g.generate_get_cpu_info()); ++ ++ get_processor_features(); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vm_version_mips.hpp b/src/hotspot/cpu/mips/vm_version_mips.hpp +--- a/src/hotspot/cpu/mips/vm_version_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/vm_version_mips.hpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,221 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VM_VERSION_MIPS_HPP ++#define CPU_MIPS_VM_VM_VERSION_MIPS_HPP ++ ++#include "runtime/abstract_vm_version.hpp" ++#include "runtime/globals_extension.hpp" ++#include "utilities/sizes.hpp" ++ ++class VM_Version: public Abstract_VM_Version { ++public: ++ ++ union Loongson_Cpucfg_Id1 { ++ uint32_t value; ++ struct { ++ uint32_t FP_CFG : 1, ++ FPREV : 3, ++ MMI : 1, ++ MSA1 : 1, ++ MSA2 : 1, ++ CGP : 1, ++ WRP : 1, ++ LSX1 : 1, ++ LSX2 : 1, ++ LASX : 1, ++ R6FXP : 1, ++ R6CRCP : 1, ++ R6FPP : 1, ++ CNT64 : 1, ++ LSLDR0 : 1, ++ LSPREF : 1, ++ LSPREFX : 1, ++ LSSYNCI : 1, ++ LSUCA : 1, ++ LLSYNC : 1, ++ TGTSYNC : 1, ++ LLEXC : 1, ++ SCRAND : 1, ++ MUALP : 1, ++ KMUALEn : 1, ++ ITLBT : 1, ++ LSUPERF : 1, ++ SFBP : 1, ++ CDMAP : 1, ++ : 1; ++ } bits; ++ }; ++ ++ union Loongson_Cpucfg_Id2 { ++ uint32_t value; ++ struct { ++ uint32_t LEXT1 : 1, ++ LEXT2 : 1, ++ LEXT3 : 1, ++ LSPW : 1, ++ LBT1 : 1, ++ LBT2 : 1, ++ LBT3 : 1, ++ LBTMMU : 1, ++ LPMP : 1, ++ LPMRev : 3, ++ LAMO : 1, ++ LPIXU : 1, ++ LPIXNU : 1, ++ LVZP : 1, ++ LVZRev : 3, ++ LGFTP : 1, ++ LGFTRev : 3, ++ LLFTP : 1, ++ LLFTRev : 3, ++ LCSRP : 1, ++ DISBLKLY : 1, ++ : 3; ++ } bits; ++ }; ++ ++protected: ++ ++ enum { ++ CPU_LOONGSON = (1 << 1), ++ CPU_LOONGSON_GS464 = (1 << 2), ++ CPU_LOONGSON_GS464E = (1 << 3), ++ CPU_LOONGSON_GS264 = (1 << 4), ++ CPU_MMI = (1 << 11), ++ CPU_MSA1_0 = (1 << 12), ++ CPU_MSA2_0 = (1 << 13), ++ CPU_CGP = (1 << 14), ++ CPU_LSX1 = (1 << 15), ++ CPU_LSX2 = (1 << 16), ++ CPU_LASX = (1 << 17), ++ CPU_LEXT1 = (1 << 18), ++ CPU_LEXT2 = (1 << 19), ++ CPU_LEXT3 = (1 << 20), ++ CPU_LAMO = (1 << 21), ++ CPU_LPIXU = (1 << 22), ++ CPU_LLSYNC = (1 << 23), ++ CPU_TGTSYNC = (1 << 24), ++ CPU_ULSYNC = (1 << 25), ++ CPU_MUALP = (1 << 26), ++ ++ //////////////////////add some other feature here////////////////// ++ } cpuFeatureFlags; ++ ++ enum Loongson_Family { ++ L_3A1000 = 0, ++ L_3B1500 = 1, ++ L_3A2000 = 2, ++ L_3B2000 = 3, ++ L_3A3000 = 4, ++ L_3B3000 = 5, ++ L_2K1000 = 6, ++ L_UNKNOWN = 7 ++ }; ++ ++ struct Loongson_Cpuinfo { ++ Loongson_Family id; ++ const char* const match_str; ++ }; ++ ++ static int _cpuFeatures; ++ static const char* _features_str; ++ static volatile bool _is_determine_cpucfg_supported_running; ++ static bool _is_cpucfg_instruction_supported; ++ static bool _cpu_info_is_initialized; ++ ++ struct CpuidInfo { ++ uint32_t cpucfg_info_id0; ++ Loongson_Cpucfg_Id1 cpucfg_info_id1; ++ Loongson_Cpucfg_Id2 cpucfg_info_id2; ++ uint32_t cpucfg_info_id3; ++ uint32_t cpucfg_info_id4; ++ uint32_t cpucfg_info_id5; ++ uint32_t cpucfg_info_id6; ++ uint32_t cpucfg_info_id8; ++ }; ++ ++ // The actual cpuid info block ++ static CpuidInfo _cpuid_info; ++ ++ static uint32_t get_feature_flags_by_cpucfg(); ++ static int get_feature_flags_by_cpuinfo(int features); ++ static void get_processor_features(); ++ ++public: ++ // Offsets for cpuid asm stub ++ static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); } ++ static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); } ++ static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); } ++ static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); } ++ static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); } ++ static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); } ++ static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); } ++ static ByteSize Loongson_Cpucfg_id8_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id8); } ++ ++ static bool is_determine_features_test_running() { return _is_determine_cpucfg_supported_running; } ++ ++ static void clean_cpuFeatures() { _cpuFeatures = 0; } ++ ++ // Initialization ++ static void initialize(); ++ ++ static bool cpu_info_is_initialized() { return _cpu_info_is_initialized; } ++ ++ static bool supports_cpucfg() { return _is_cpucfg_instruction_supported; } ++ static bool set_supports_cpucfg(bool value) { return _is_cpucfg_instruction_supported = value; } ++ ++ static bool is_loongson() { return _cpuFeatures & CPU_LOONGSON; } ++ static bool is_gs264() { return _cpuFeatures & CPU_LOONGSON_GS264; } ++ static bool is_gs464() { return _cpuFeatures & CPU_LOONGSON_GS464; } ++ static bool is_gs464e() { return _cpuFeatures & CPU_LOONGSON_GS464E; } ++ static bool supports_dsp() { return 0; /*not supported yet*/} ++ static bool supports_ps() { return 0; /*not supported yet*/} ++ static bool supports_3d() { return 0; /*not supported yet*/} ++ static bool supports_msa1_0() { return _cpuFeatures & CPU_MSA1_0; } ++ static bool supports_msa2_0() { return _cpuFeatures & CPU_MSA2_0; } ++ static bool supports_cgp() { return _cpuFeatures & CPU_CGP; } ++ static bool supports_mmi() { return _cpuFeatures & CPU_MMI; } ++ static bool supports_lsx1() { return _cpuFeatures & CPU_LSX1; } ++ static bool supports_lsx2() { return _cpuFeatures & CPU_LSX2; } ++ static bool supports_lasx() { return _cpuFeatures & CPU_LASX; } ++ static bool supports_lext1() { return _cpuFeatures & CPU_LEXT1; } ++ static bool supports_lext2() { return _cpuFeatures & CPU_LEXT2; } ++ static bool supports_lext3() { return _cpuFeatures & CPU_LEXT3; } ++ static bool supports_lamo() { return _cpuFeatures & CPU_LAMO; } ++ static bool supports_lpixu() { return _cpuFeatures & CPU_LPIXU; } ++ static bool needs_llsync() { return _cpuFeatures & CPU_LLSYNC; } ++ static bool needs_tgtsync() { return _cpuFeatures & CPU_TGTSYNC; } ++ static bool needs_ulsync() { return _cpuFeatures & CPU_ULSYNC; } ++ static bool supports_mualp() { return _cpuFeatures & CPU_MUALP; } ++ ++ //mips has no such instructions, use ll/sc instead ++ static bool supports_compare_and_exchange() { return false; } ++ ++ static const char* cpu_features() { return _features_str; } ++ ++}; ++ ++#endif // CPU_MIPS_VM_VM_VERSION_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp +--- a/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp 2024-01-30 10:00:11.848098317 +0800 +@@ -0,0 +1,340 @@ ++/* ++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/vtableStubs.hpp" ++#include "interp_masm_mips.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klass.inline.hpp" ++#include "oops/klassVtable.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_mips.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++ ++// machine-dependent part of VtableStubs: create VtableStub of correct size and ++// initialize its code ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#ifndef PRODUCT ++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); ++#endif ++ ++// used by compiler only; reciever in T0. ++// used registers : ++// Rmethod : receiver klass & method ++// NOTE: If this code is used by the C1, the receiver_location is always 0. ++// when reach here, receiver in T0, klass in T8 ++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(true); ++ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ int load_const_maxLen = 6*BytesPerInstWord; // load_const generates 6 instructions. Assume that as max size for li ++ // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation. ++ const int index_dependent_slop = 0; ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ Register t1 = T8, t2 = Rmethod; ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ start_pc = __ pc(); ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ lw(t1, AT , 0); ++ __ addiu(t1, t1, 1); ++ __ sw(t1, AT,0); ++ } ++#endif ++ ++ // get receiver (need to skip return address on top of stack) ++ //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0"); ++ ++ // get receiver klass ++ address npe_addr = __ pc(); ++ //add for compressedoops ++ __ load_klass(t1, T0); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ // check offset vs vtable length ++ __ lw(t2, t1, in_bytes(Klass::vtable_length_offset())); ++ assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code"); ++ __ move(AT, vtable_index*vtableEntry::size()); ++ __ slt(AT, AT, t2); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ move(A2, vtable_index); ++ __ move(A1, A0); ++ ++ // VTABLE TODO: find upper bound for call_VM length. ++ start_pc = __ pc(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2); ++ const ptrdiff_t estimate = 512; ++ const ptrdiff_t codesize = __ pc() - start_pc; ++ slop_delta = estimate - codesize; // call_VM varies in length, depending on data ++ assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ const Register method = Rmethod; ++ ++ // load methodOop and target address ++ start_pc = __ pc(); ++ // lookup_virtual_method generates 18 instructions (worst case) ++ __ lookup_virtual_method(t1, vtable_index, method); ++ slop_delta = 18*BytesPerInstWord - (int)(__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ __ beq(method, R0, L); ++ __ delayed()->nop(); ++ __ ld(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("Vtable entry is NULL"); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ ++ // T8: receiver klass ++ // T0: receiver ++ // Rmethod: methodOop ++ // T9: entry ++ address ame_addr = __ pc(); ++ __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset())); ++ __ jr(T9); ++ __ delayed()->nop(); ++ masm->flush(); ++ slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop); ++ ++ return s; ++} ++ ++ ++// used registers : ++// T1 T2 ++// when reach here, the receiver in T0, klass in T1 ++VtableStub* VtableStubs::create_itable_stub(int itable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(false); ++ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ int load_const_maxLen = 6*BytesPerInstWord; // load_const generates 6 instructions. Assume that as max size for li ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler *masm = new MacroAssembler(&cb); ++ ++ // we T8,T9 as temparary register, they are free from register allocator ++ Register t1 = T8, t2 = T2; ++ // Entry arguments: ++ // T1: Interface ++ // T0: Receiver ++ ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ start_pc = __ pc(); ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ lw(T8, AT, 0); ++ __ addiu(T8, T8,1); ++ __ sw(T8, AT, 0); ++ } ++#endif // PRODUCT ++ ++ const Register holder_klass_reg = T1; // declaring interface klass (DECC) ++ const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC) ++ ++ const Register icholder_reg = T1; ++ __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(holder_klass_reg, icholder_reg, CompiledICHolder::holder_metadata_offset()); ++ ++ Label L_no_such_interface; ++ ++ // get receiver klass (also an implicit null-check) ++ address npe_addr = __ pc(); ++ __ load_klass(t1, T0); ++ { ++ // x86 use lookup_interface_method, but lookup_interface_method does not work on MIPS. ++ // No dynamic code size variance here, so slop_bytes is not needed. ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); ++ assert(Assembler::is_simm16(base), "change this code"); ++ __ daddiu(t2, t1, base); ++ __ lw(AT, t1, in_bytes(Klass::vtable_length_offset())); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(t2, t2, AT); ++ if (HeapWordsPerLong > 1) { ++ __ round_to(t2, BytesPerLong); ++ } ++ ++ Label hit, entry; ++ __ bind(entry); ++ ++ // Check that the entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ beq(AT, R0, L_no_such_interface); ++ __ delayed()->nop(); ++ ++ __ bne(AT, resolved_klass_reg, entry); ++ __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize); ++ ++ } ++ ++ // add for compressedoops ++ __ load_klass(t1, T0); ++ // compute itable entry offset (in words) ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ __ daddiu(t2, t1, base); ++ __ lw(AT, t1, in_bytes(Klass::vtable_length_offset())); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(t2, t2, AT); ++ if (HeapWordsPerLong > 1) { ++ __ round_to(t2, BytesPerLong); ++ } ++ ++ Label hit, entry; ++ __ bind(entry); ++ ++ // Check that the entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ beq(AT, R0, L_no_such_interface); ++ __ delayed()->nop(); ++ ++ __ bne(AT, holder_klass_reg, entry); ++ __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize); ++ ++ // We found a hit, move offset into T9 ++ __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize); ++ ++ // Compute itableMethodEntry. ++ const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) + ++ itableMethodEntry::method_offset_in_bytes(); ++ ++ // Get methodOop and entrypoint for compiler ++ const Register method = Rmethod; ++ __ dsll(AT, t2, Address::times_1); ++ __ addu(AT, AT, t1 ); ++ start_pc = __ pc(); ++ __ set64(t1, method_offset); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ addu(AT, AT, t1 ); ++ __ ld_ptr(method, AT, 0); ++ ++#ifdef ASSERT ++ if (DebugVtables) { ++ Label L1; ++ __ beq(method, R0, L1); ++ __ delayed()->nop(); ++ __ ld(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L1); ++ __ delayed()->nop(); ++ __ stop("methodOop is null"); ++ __ bind(L1); ++ } ++#endif // ASSERT ++ ++ // Rmethod: methodOop ++ // T0: receiver ++ // T9: entry point ++ address ame_addr = __ pc(); ++ __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset())); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ __ bind(L_no_such_interface); ++ // Handle IncompatibleClassChangeError in itable stubs. ++ // More detailed error message. ++ // We force resolving of the call site by jumping to the "handle ++ // wrong method" stub, and so let the interpreter runtime do all the ++ // dirty work. ++ start_pc = __ pc(); ++ __ set64(T9, (long)SharedRuntime::get_handle_wrong_method_stub()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ masm->flush(); ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); ++ ++ return s; ++} ++ ++// NOTE : whenever you change the code above, dont forget to change the const here ++int VtableStub::pd_code_alignment() { ++ const unsigned int icache_line_size = wordSize; ++ return icache_line_size; ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp 2024-01-30 10:00:11.851431611 +0800 +@@ -488,6 +488,9 @@ + } + } + ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + Bytecodes::Code code = op->bytecode(); +@@ -1608,6 +1611,10 @@ + __ bind(skip); + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} ++ + + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, + CodeEmitInfo* info, bool pop_fpu_stack) { +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp +--- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp 2024-01-30 10:00:11.851431611 +0800 +@@ -273,21 +273,29 @@ + __ move(temp, addr); + } + +- +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + LIR_Opr tmp = FrameMap::R0_opr; + __ load(new LIR_Address(base, disp, T_INT), tmp, info); +- __ cmp(condition, tmp, c); ++ __ cmp_branch(condition, tmp, c, T_INT, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, +- int disp, BasicType type, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + LIR_Opr tmp = FrameMap::R0_opr; + __ load(new LIR_Address(base, disp, type), tmp, info); +- __ cmp(condition, reg, tmp); ++ __ cmp_branch(condition, reg, tmp, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + assert(left != result, "should be different registers"); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp +--- a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp 2024-01-30 10:00:11.851431611 +0800 +@@ -62,3 +62,24 @@ + #endif + } + #endif // PRODUCT ++ ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp 2024-01-30 10:00:11.861431492 +0800 +@@ -379,6 +379,9 @@ + } + } + ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); +@@ -1503,6 +1506,10 @@ + } + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} ++ + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, + CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp +--- a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp 2024-01-30 10:00:11.861431492 +0800 +@@ -213,16 +213,29 @@ + __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr); + } + +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + LIR_Opr scratch = FrameMap::Z_R1_opr; + __ load(new LIR_Address(base, disp, T_INT), scratch, info); +- __ cmp(condition, scratch, c); ++ __ cmp_branch(condition, scratch, c, T_INT, tgt); + } + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); ++ ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); ++ __ branch(condition, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); ++ + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + if (tmp->is_valid()) { + if (is_power_of_2(c + 1)) { +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/s390/c1_LIR_s390.cpp b/src/hotspot/cpu/s390/c1_LIR_s390.cpp +--- a/src/hotspot/cpu/s390/c1_LIR_s390.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/s390/c1_LIR_s390.cpp 2024-01-30 10:00:11.861431492 +0800 +@@ -56,3 +56,23 @@ + } + #endif // PRODUCT + ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp +--- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp 2024-01-30 10:00:11.878097961 +0800 +@@ -599,6 +599,9 @@ + // The peephole pass fills the delay slot + } + ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + Bytecodes::Code code = op->bytecode(); +@@ -1638,6 +1641,9 @@ + __ bind(skip); + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "unused on this code path"); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp +--- a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp 2024-01-30 10:00:11.878097961 +0800 +@@ -267,19 +267,29 @@ + __ move(temp, addr); + } + +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + LIR_Opr o7opr = FrameMap::O7_opr; + __ load(new LIR_Address(base, disp, T_INT), o7opr, info); +- __ cmp(condition, o7opr, c); ++ __ cmp_branch(condition, o7opr, c, T_INT, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + LIR_Opr o7opr = FrameMap::O7_opr; + __ load(new LIR_Address(base, disp, type), o7opr, info); +- __ cmp(condition, reg, o7opr); ++ __ cmp_branch(condition, reg, o7opr, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) { + assert(left != result, "should be different registers"); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp +--- a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp 2024-01-30 10:00:11.878097961 +0800 +@@ -54,3 +54,24 @@ + "wrong type for addresses"); + } + #endif // PRODUCT ++ ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp 2024-01-30 10:00:11.888097840 +0800 +@@ -1442,6 +1442,10 @@ + } + } + ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} ++ + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); + LIR_Opr dest = op->result_opr(); +@@ -2030,6 +2034,9 @@ + } + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp 2024-01-30 10:00:11.888097840 +0800 +@@ -255,15 +255,27 @@ + __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr); + } + +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + __ cmp_mem_int(condition, base, disp, c, info); ++ __ branch(condition, T_INT, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); ++ __ branch(condition, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + if (tmp->is_valid() && c > 0 && c < max_jint) { +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/x86/c1_LIR_x86.cpp b/src/hotspot/cpu/x86/c1_LIR_x86.cpp +--- a/src/hotspot/cpu/x86/c1_LIR_x86.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/x86/c1_LIR_x86.cpp 2024-01-30 10:00:11.888097840 +0800 +@@ -72,3 +72,24 @@ + #endif + } + #endif // PRODUCT ++ ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp +--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp 2024-01-30 10:00:11.891431134 +0800 +@@ -263,7 +263,8 @@ + #define __ ce->masm()-> + + void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, +- LIR_Opr ref) const { ++ LIR_Opr ref, ++ LIR_Opr res) const { + __ testptr(ref->as_register(), address_bad_mask_from_thread(r15_thread)); + } + +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp +--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp 2024-01-30 10:00:11.891431134 +0800 +@@ -77,7 +77,8 @@ + + #ifdef COMPILER1 + void generate_c1_load_barrier_test(LIR_Assembler* ce, +- LIR_Opr ref) const; ++ LIR_Opr ref, ++ LIR_Opr res) const; + + void generate_c1_load_barrier_stub(LIR_Assembler* ce, + ZLoadBarrierStubC1* stub) const; +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp +--- a/src/hotspot/os/linux/os_linux.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/os/linux/os_linux.cpp 2024-01-30 10:00:11.914764190 +0800 +@@ -23,6 +23,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2021 Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + // no precompiled headers + #include "jvm.h" + #include "classfile/classLoader.hpp" +@@ -4076,6 +4082,8 @@ + IA64_ONLY(256 * M) + PPC_ONLY(4 * M) + S390_ONLY(1 * M) ++ MIPS64_ONLY(4 * M) ++ LOONGARCH64_ONLY(4 * M); //In MIPS _large_page_size is seted 4*M. // TODO: LA + SPARC_ONLY(4 * M); + #endif // ZERO + +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp +--- a/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,24 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp +--- a/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,160 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP ++ ++#include "runtime/vm_version.hpp" ++ ++// Implementation of class atomic ++ ++template ++struct Atomic::PlatformAdd ++ : Atomic::AddAndFetch > ++{ ++ template ++ D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { ++ //Unimplemented(); ++ return __sync_add_and_fetch(dest, add_value); ++ } ++}; ++ ++template<> ++template ++inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, ++ T volatile* dest, ++ atomic_memory_order order) const { ++ T __ret, __tmp; ++ ++ STATIC_ASSERT(4 == sizeof(T)); ++ __asm__ __volatile__ ( ++ "1: ll.w %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " sc.w %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __ret; ++} ++ ++template<> ++template ++inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, ++ T volatile* dest, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(8 == sizeof(T)); ++ T __ret; ++ jlong __tmp; ++ __asm__ __volatile__ ( ++ "1: ll.d %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " sc.d %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "ZC" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __ret; ++} ++ ++#if 0 ++template<> ++template ++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(1 == sizeof(T)); ++} ++ ++#else ++// No direct support for cmpxchg of bytes; emulate using int. ++template<> ++struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; ++#endif ++ ++template<> ++template ++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(4 == sizeof(T)); ++ T __prev; ++ jint __cmp; ++ ++ __asm__ __volatile__ ( ++ "1: ll.w %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $r0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " sc.w %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ "2: \n\t" ++ " dbar 0x700 \n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "ZC" (*(volatile jint*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __prev; ++} ++ ++template<> ++template ++inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(8 == sizeof(T)); ++ T __prev; ++ jlong __cmp; ++ ++ __asm__ __volatile__ ( ++ "1: ll.d %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $r0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " sc.d %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ "2: \n\t" ++ " dbar 0x700 \n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "ZC" (*(volatile jlong*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ return __prev; ++} ++ ++ ++#endif // OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp +--- a/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP ++ ++#include ++ ++// Efficient swapping of data bytes from Java byte ++// ordering to native byte ordering and vice versa. ++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); } ++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); } ++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); } ++ ++#endif // OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp +--- a/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,125 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP ++ ++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ (void)memmove(to, from, count * HeapWordSize); ++} ++ ++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ (void)memcpy(to, from, count * HeapWordSize); ++ break; ++ } ++} ++ ++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ while (count-- > 0) { ++ *to++ = *from++; ++ } ++ break; ++ } ++} ++ ++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_words(from, to, count); ++} ++ ++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words(from, to, count); ++} ++ ++static void pd_conjoint_bytes(const void* from, void* to, size_t count) { ++ (void)memmove(to, from, count); ++} ++ ++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { ++ pd_conjoint_bytes(from, to, count); ++} ++ ++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_bytes_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); ++} ++ ++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); ++ pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); ++} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp +--- a/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, DontYieldALot, false); ++define_pd_global(intx, ThreadStackSize, 2048); // 0 => use system default ++define_pd_global(intx, VMThreadStackSize, 2048); ++ ++define_pd_global(intx, CompilerThreadStackSize, 2048); ++ ++define_pd_global(uintx,JVMInvokeMethodSlack, 8192); ++ ++// Used on 64 bit platforms for UseCompressedOops base address ++define_pd_global(uintx,HeapBaseMinAddress, 2*G); ++ ++#endif // OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s +--- a/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,25 @@ ++# ++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++# ++# This code is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License version 2 only, as ++# published by the Free Software Foundation. ++# ++# This code is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# version 2 for more details (a copy is included in the LICENSE file that ++# accompanied this code). ++# ++# You should have received a copy of the GNU General Public License version ++# 2 along with this work; if not, write to the Free Software Foundation, ++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++# ++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++# or visit www.oracle.com if you need additional information or have any ++# questions. ++# ++ ++ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp +--- a/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP ++ ++#include "runtime/os.hpp" ++ ++// Included in orderAccess.hpp header file. ++ ++// Implementation of class OrderAccess. ++#define inlasm_sync(v) if (os::is_ActiveCoresMP()) \ ++ __asm__ __volatile__ ("nop" : : : "memory"); \ ++ else \ ++ __asm__ __volatile__ ("dbar %0" : :"K"(v) : "memory"); ++ ++inline void OrderAccess::loadload() { inlasm_sync(0x15); } ++inline void OrderAccess::storestore() { inlasm_sync(0x1a); } ++inline void OrderAccess::loadstore() { inlasm_sync(0x16); } ++inline void OrderAccess::storeload() { inlasm_sync(0x19); } ++ ++inline void OrderAccess::acquire() { inlasm_sync(0x14); } ++inline void OrderAccess::release() { inlasm_sync(0x12); } ++inline void OrderAccess::fence() { inlasm_sync(0x10); } ++ ++ ++#undef inlasm_sync ++ ++#endif // OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp +--- a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,710 @@ ++/* ++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// no precompiled headers ++#include "asm/macroAssembler.hpp" ++#include "classfile/classLoader.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "classfile/vmSymbols.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/allocation.inline.hpp" ++#include "os_share_linux.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/extendedPC.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/java.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/osThread.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/timer.hpp" ++#include "utilities/events.hpp" ++#include "utilities/vmError.hpp" ++#include "compiler/disassembler.hpp" ++ ++// put OS-includes here ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++ ++#define REG_SP 3 ++#define REG_FP 22 ++ ++NOINLINE address os::current_stack_pointer() { ++ register void *sp __asm__ ("$r3"); ++ return (address) sp; ++} ++ ++char* os::non_memory_address_word() { ++ // Must never look like an address returned by reserve_memory, ++ // even in its subfields (as defined by the CPU immediate fields, ++ // if the CPU splits constants across multiple instructions). ++ ++ return (char*) -1; ++} ++ ++address os::Linux::ucontext_get_pc(const ucontext_t * uc) { ++ return (address)uc->uc_mcontext.__pc; ++} ++ ++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { ++ uc->uc_mcontext.__pc = (intptr_t)pc; ++} ++ ++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP]; ++} ++ ++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread ++// is currently interrupted by SIGPROF. ++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal ++// frames. Currently we don't do that on Linux, so it's the same as ++// os::fetch_frame_from_context(). ++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, ++ const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ assert(thread != NULL, "just checking"); ++ assert(ret_sp != NULL, "just checking"); ++ assert(ret_fp != NULL, "just checking"); ++ ++ return os::fetch_frame_from_context(uc, ret_sp, ret_fp); ++} ++ ++ExtendedPC os::fetch_frame_from_context(const void* ucVoid, ++ intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ ExtendedPC epc; ++ ucontext_t* uc = (ucontext_t*)ucVoid; ++ ++ if (uc != NULL) { ++ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); ++ if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); ++ if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); ++ } else { ++ // construct empty ExtendedPC for return value checking ++ epc = ExtendedPC(NULL); ++ if (ret_sp) *ret_sp = (intptr_t *)NULL; ++ if (ret_fp) *ret_fp = (intptr_t *)NULL; ++ } ++ ++ return epc; ++} ++ ++frame os::fetch_frame_from_context(const void* ucVoid) { ++ intptr_t* sp; ++ intptr_t* fp; ++ ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp); ++ return frame(sp, fp, epc.pc()); ++} ++ ++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) { ++ address pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (Interpreter::contains(pc)) { ++ // interpreter performs stack banging after the fixed frame header has ++ // been generated while the compilers perform it before. To maintain ++ // semantic consistency between interpreted and compiled frames, the ++ // method returns the Java sender of the current frame. ++ *fr = os::fetch_frame_from_context(uc); ++ if (!fr->is_first_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } else { ++ // more complex code with compiled code ++ assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above"); ++ CodeBlob* cb = CodeCache::find_blob(pc); ++ if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) { ++ // Not sure where the pc points to, fallback to default ++ // stack overflow handling ++ return false; ++ } else { ++ // In compiled code, the stack banging is performed before LR ++ // has been saved in the frame. RA is live, and SP and FP ++ // belong to the caller. ++ intptr_t* fp = os::Linux::ucontext_get_fp(uc); ++ intptr_t* sp = os::Linux::ucontext_get_sp(uc); ++ address pc = (address)(uc->uc_mcontext.__gregs[1]); ++ *fr = frame(sp, fp, pc); ++ if (!fr->is_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ assert(!fr->is_first_frame(), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } ++ } ++ assert(fr->is_java_frame(), "Safety check"); ++ return true; ++} ++ ++// By default, gcc always save frame pointer on stack. It may get ++// turned off by -fomit-frame-pointer, ++frame os::get_sender_for_C_frame(frame* fr) { ++ return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); ++} ++ ++frame os::current_frame() { ++ intptr_t *fp = ((intptr_t **)__builtin_frame_address(0))[frame::native_frame_link_offset]; ++ frame myframe((intptr_t*)os::current_stack_pointer(), ++ (intptr_t*)fp, ++ CAST_FROM_FN_PTR(address, os::current_frame)); ++ if (os::is_first_C_frame(&myframe)) { ++ // stack is not walkable ++ return frame(); ++ } else { ++ return os::get_sender_for_C_frame(&myframe); ++ } ++} ++ ++extern "C" int ++JVM_handle_linux_signal(int sig, ++ siginfo_t* info, ++ void* ucVoid, ++ int abort_if_unrecognized) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx", ++ info->si_signo, ++ info->si_code, ++ info->si_errno, ++ info->si_addr); ++#endif ++ ++ ucontext_t* uc = (ucontext_t*) ucVoid; ++ ++ Thread* t = Thread::current_or_null_safe(); ++ ++ SignalHandlerMark shm(t); ++ ++ // Note: it's not uncommon that JNI code uses signal/sigset to install ++ // then restore certain signal handler (e.g. to temporarily block SIGPIPE, ++ // or have a SIGILL handler when detecting CPU type). When that happens, ++ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To ++ // avoid unnecessary crash when libjsig is not preloaded, try handle signals ++ // that do not require siginfo/ucontext first. ++ ++ if (sig == SIGPIPE/* || sig == SIGXFSZ*/) { ++ // allow chained handler to go first ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++ return true; ++ } else { ++ if (PrintMiscellaneous && (WizardMode || Verbose)) { ++ warning("Ignoring SIGPIPE - see bug 4229104"); ++ } ++ return true; ++ } ++ } ++ ++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT ++ if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { ++ handle_assert_poison_fault(ucVoid, info->si_addr); ++ return 1; ++ } ++#endif ++ ++ JavaThread* thread = NULL; ++ VMThread* vmthread = NULL; ++ if (os::Linux::signal_handlers_are_installed) { ++ if (t != NULL ){ ++ if(t->is_Java_thread()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("this thread is a java thread"); ++#endif ++ thread = (JavaThread*)t; ++ } ++ else if(t->is_VM_thread()){ ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("this thread is a VM thread\n"); ++#endif ++ vmthread = (VMThread *)t; ++ } ++ } ++ } ++ ++ // Handle SafeFetch faults: ++ if (uc != NULL) { ++ address const pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (pc && StubRoutines::is_safefetch_fault(pc)) { ++ os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); ++ return 1; ++ } ++ } ++ ++ // decide if this trap can be handled by a stub ++ address stub = NULL; ++ address pc = NULL; ++ ++ pc = (address) os::Linux::ucontext_get_pc(uc); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("pc=%lx", pc); ++ os::print_context(tty, uc); ++#endif ++ //%note os_trap_1 ++ if (info != NULL && uc != NULL && thread != NULL) { ++ pc = (address) os::Linux::ucontext_get_pc(uc); ++ ++ // Handle ALL stack overflow variations here ++ if (sig == SIGSEGV) { ++ address addr = (address) info->si_addr; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("handle all stack overflow variations: "); ++ /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n", ++ addr, ++ thread->stack_base(), ++ thread->stack_base() - thread->stack_size()); ++ */ ++#endif ++ ++ // check if fault address is within thread stack ++ if (thread->on_local_stack(addr)) { ++ // stack overflow ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("stack exception check \n"); ++#endif ++ if (thread->in_stack_yellow_reserved_zone(addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is in yellow zone\n"); ++#endif ++ if (thread->thread_state() == _thread_in_Java) { ++ if (thread->in_stack_reserved_zone(addr)) { ++ frame fr; ++ if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { ++ assert(fr.is_java_frame(), "Must be a Java frame"); ++ frame activation = ++ SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); ++ if (activation.sp() != NULL) { ++ thread->disable_stack_reserved_zone(); ++ if (activation.is_interpreted_frame()) { ++ thread->set_reserved_stack_activation((address)( ++ activation.fp() + frame::interpreter_frame_initial_sp_offset)); ++ } else { ++ thread->set_reserved_stack_activation((address)activation.unextended_sp()); ++ } ++ return 1; ++ } ++ } ++ } ++ // Throw a stack overflow exception. Guard pages will be reenabled ++ // while unwinding the stack. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("this thread is in java\n"); ++#endif ++ thread->disable_stack_yellow_reserved_zone(); ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); ++ } else { ++ // Thread was in the vm or native code. Return and try to finish. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("this thread is in vm or native codes and return\n"); ++#endif ++ thread->disable_stack_yellow_reserved_zone(); ++ return 1; ++ } ++ } else if (thread->in_stack_red_zone(addr)) { ++ // Fatal red zone violation. Disable the guard pages and fall through ++ // to handle_unexpected_exception way down below. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is in red zone\n"); ++#endif ++ thread->disable_stack_red_zone(); ++ tty->print_raw_cr("An irrecoverable stack overflow has occurred."); ++ ++ // This is a likely cause, but hard to verify. Let's just print ++ // it as a hint. ++ tty->print_raw_cr("Please check if any of your loaded .so files has " ++ "enabled executable stack (see man page execstack(8))"); ++ } else { ++ // Accessing stack address below sp may cause SEGV if current ++ // thread has MAP_GROWSDOWN stack. This should only happen when ++ // current thread was created by user code with MAP_GROWSDOWN flag ++ // and then attached to VM. See notes in os_linux.cpp. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is neither in yellow zone nor in the red one\n"); ++#endif ++ if (thread->osthread()->expanding_stack() == 0) { ++ thread->osthread()->set_expanding_stack(); ++ if (os::Linux::manually_expand_stack(thread, addr)) { ++ thread->osthread()->clear_expanding_stack(); ++ return 1; ++ } ++ thread->osthread()->clear_expanding_stack(); ++ } else { ++ fatal("recursive segv. expanding stack."); ++ } ++ } ++ } ++ } // sig == SIGSEGV ++ ++ if (thread->thread_state() == _thread_in_Java) { ++ // Java thread running in Java code => find exception handler if any ++ // a fault inside compiled code, the interpreter, or a stub ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("java thread running in java code\n"); ++#endif ++ ++ // Handle signal from NativeJump::patch_verified_entry(). ++ if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig); ++#endif ++ stub = SharedRuntime::get_handle_wrong_method_stub(); ++ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig); ++#endif ++ stub = SharedRuntime::get_poll_stub(pc); ++ } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { ++ // BugId 4454115: A read from a MappedByteBuffer can fault ++ // here if the underlying file has been truncated. ++ // Do not crash the VM in such a case. ++ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); ++ CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("cb = %lx, nm = %lx\n", cb, nm); ++#endif ++ if (nm != NULL && nm->has_unsafe_access()) { ++ address next_pc = (address)((unsigned long)pc + sizeof(unsigned int)); ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) { ++ // HACK: si_code does not work on linux 2.2.12-20!!! ++ int op = pc[0] & 0x3f; ++ int op1 = pc[3] & 0x3f; ++ //FIXME, Must port to LA code!! ++ switch (op) { ++ case 0x1e: //ddiv ++ case 0x1f: //ddivu ++ case 0x1a: //div ++ case 0x1b: //divu ++ case 0x34: //trap ++ // In LA, div_by_zero exception can only be triggered by explicit 'trap'. ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, ++ pc, ++ SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); ++ break; ++ default: ++ // TODO: handle more cases if we are using other x86 instructions ++ // that can generate SIGFPE signal on linux. ++ tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1); ++ //fatal("please update this code."); ++ } ++ } else if (sig == SIGSEGV && ++ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("continuation for implicit exception\n"); ++#endif ++ // Determination of interpreter/vtable stub/compiled code null exception ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("continuation_for_implicit_exception stub: %lx", stub); ++#endif ++ } ++ } else if (thread->thread_state() == _thread_in_vm && ++ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ ++ thread->doing_unsafe_access()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("SIGBUS in vm thread \n"); ++#endif ++ address next_pc = (address)((unsigned long)pc + sizeof(unsigned int)); ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ ++ // jni_fast_GetField can trap at certain pc's if a GC kicks in ++ // and the heap gets shrunk before the field access. ++ if ((sig == SIGSEGV) || (sig == SIGBUS)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("jni fast get trap: "); ++#endif ++ address addr = JNI_FastGetField::find_slowcase_pc(pc); ++ if (addr != (address)-1) { ++ stub = addr; ++ } ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("addr = %d, stub = %lx", addr, stub); ++#endif ++ } ++ ++ // Check to see if we caught the safepoint code in the ++ // process of write protecting the memory serialization page. ++ // It write enables the page immediately after protecting it ++ // so we can just return to retry the write. ++ if ((sig == SIGSEGV) && ++ os::is_memory_serialize_page(thread, (address) info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("write protecting the memory serialiazation page\n"); ++#endif ++ // Block current thread until the memory serialize page permission restored. ++ os::block_on_serialize_page_trap(); ++ return true; ++ } ++ } ++ ++ if (stub != NULL) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("resolved stub=%lx\n",stub); ++#endif ++ // save all thread context in case we need to restore it ++ if (thread != NULL) thread->set_saved_exception_pc(pc); ++ ++ os::Linux::ucontext_set_pc(uc, stub); ++ return true; ++ } ++ ++ // signal-chaining ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("signal chaining\n"); ++#endif ++ return true; ++ } ++ ++ if (!abort_if_unrecognized) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("abort becauce of unrecognized\n"); ++#endif ++ // caller wants another chance, so give it to him ++ return false; ++ } ++ ++ if (pc == NULL && uc != NULL) { ++ pc = os::Linux::ucontext_get_pc(uc); ++ } ++ ++ // unmask current signal ++ sigset_t newset; ++ sigemptyset(&newset); ++ sigaddset(&newset, sig); ++ sigprocmask(SIG_UNBLOCK, &newset, NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("VMError in signal handler\n"); ++#endif ++ VMError::report_and_die(t, sig, pc, info, ucVoid); ++ ++ ShouldNotReachHere(); ++ return true; // Mute compiler ++} ++ ++void os::Linux::init_thread_fpu_state(void) { ++} ++ ++int os::Linux::get_fpu_control_word(void) { ++ return 0; // mute compiler ++} ++ ++void os::Linux::set_fpu_control_word(int fpu_control) { ++} ++ ++bool os::is_allocatable(size_t bytes) { ++ ++ if (bytes < 2 * G) { ++ return true; ++ } ++ ++ char* addr = reserve_memory(bytes, NULL); ++ ++ if (addr != NULL) { ++ release_memory(addr, bytes); ++ } ++ ++ return addr != NULL; ++} ++ ++//////////////////////////////////////////////////////////////////////////////// ++// thread stack ++ ++// Minimum usable stack sizes required to get to user code. Space for ++// HotSpot guard pages is added later. ++size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K; ++size_t os::Posix::_java_thread_min_stack_allowed = 40 * K; ++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K; ++ ++// Return default stack size for thr_type ++size_t os::Posix::default_stack_size(os::ThreadType thr_type) { ++ // Default stack size (compiler thread needs larger stack) ++ size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); ++ return s; ++} ++ ++///////////////////////////////////////////////////////////////////////////// ++// helper functions for fatal error handler ++void os::print_register_info(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ ucontext_t *uc = (ucontext_t*)context; ++ ++ st->print_cr("Register to memory mapping:"); ++ st->cr(); ++ // this is horrendously verbose but the layout of the registers in the ++ // // context does not match how we defined our abstract Register set, so ++ // // we can't just iterate through the gregs area ++ // ++ // // this is only for the "general purpose" registers ++ st->print("ZERO=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[0]); ++ st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[1]); ++ st->print("TP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[2]); ++ st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[3]); ++ st->cr(); ++ st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[4]); ++ st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[5]); ++ st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[6]); ++ st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[7]); ++ st->cr(); ++ st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[8]); ++ st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[9]); ++ st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[10]); ++ st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[11]); ++ st->cr(); ++ st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[12]); ++ st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[13]); ++ st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[14]); ++ st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[15]); ++ st->cr(); ++ st->print("T4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[16]); ++ st->print("T5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[17]); ++ st->print("T6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[18]); ++ st->print("T7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[19]); ++ st->cr(); ++ st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[20]); ++ st->print("RX=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[21]); ++ st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[22]); ++ st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[23]); ++ st->cr(); ++ st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[24]); ++ st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[25]); ++ st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[26]); ++ st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[27]); ++ st->cr(); ++ st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[28]); ++ st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[29]); ++ st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[30]); ++ st->print("S8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[31]); ++ st->cr(); ++ ++} ++ ++void os::print_context(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ const ucontext_t *uc = (const ucontext_t*)context; ++ st->print_cr("Registers:"); ++ st->print( "ZERO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[0]); ++ st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[1]); ++ st->print(", TP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[2]); ++ st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[3]); ++ st->cr(); ++ st->print( "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[4]); ++ st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[5]); ++ st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[6]); ++ st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[7]); ++ st->cr(); ++ st->print( "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[8]); ++ st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[9]); ++ st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[10]); ++ st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[11]); ++ st->cr(); ++ st->print( "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[12]); ++ st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[13]); ++ st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[14]); ++ st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[15]); ++ st->cr(); ++ st->print( "T4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[16]); ++ st->print(", T5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[17]); ++ st->print(", T6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[18]); ++ st->print(", T7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[19]); ++ st->cr(); ++ st->print( "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[20]); ++ st->print(", RX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[21]); ++ st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[22]); ++ st->print(", S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[23]); ++ st->cr(); ++ st->print( "S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[24]); ++ st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[25]); ++ st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[26]); ++ st->print(", S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[27]); ++ st->cr(); ++ st->print( "S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[28]); ++ st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[29]); ++ st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[30]); ++ st->print(", S8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[31]); ++ st->cr(); ++ st->cr(); ++ ++ intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); ++ st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); ++ print_hex_dump(st, (address)(sp - 32), (address)(sp + 32), sizeof(intptr_t)); ++ st->cr(); ++ ++ // Note: it may be unsafe to inspect memory near pc. For example, pc may ++ // point to garbage if entry point in an nmethod is corrupted. Leave ++ // this at the end, and hope for the best. ++ address pc = os::Linux::ucontext_get_pc(uc); ++ st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); ++ print_hex_dump(st, pc - 64, pc + 64, sizeof(char)); ++ Disassembler::decode(pc - 80, pc + 80, st); ++} ++ ++void os::setup_fpu() { ++ // no use for LA ++} ++ ++#ifndef PRODUCT ++void os::verify_stack_alignment() { ++ assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); ++} ++#endif ++ ++int os::extra_bang_size_in_bytes() { ++ // LA does not require the additional stack bang. ++ return 0; ++} ++ ++bool os::is_ActiveCoresMP() { ++ return UseActiveCoresMP && _initial_active_processor_count == 1; ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp +--- a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP ++ ++ static void setup_fpu(); ++ static bool is_allocatable(size_t bytes); ++ ++ // Used to register dynamic code cache area with the OS ++ // Note: Currently only used in 64 bit Windows implementations ++ static bool register_code_area(char *low, char *high) { return true; } ++ ++ static bool is_ActiveCoresMP(); ++ ++#endif // OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp +--- a/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP ++ ++ ++inline void Prefetch::read (void *loc, intx interval) { ++// According to previous and present SPECjbb2015 score, ++// comment prefetch is better than if (interval >= 0) prefetch branch. ++// So choose comment prefetch as the base line. ++#if 0 ++ __asm__ __volatile__ ( ++ " preld 0, %[__loc] \n" ++ : ++ : [__loc] "m"( *((address)loc + interval) ) ++ : "memory" ++ ); ++#endif ++} ++ ++inline void Prefetch::write(void *loc, intx interval) { ++// Ditto ++#if 0 ++ __asm__ __volatile__ ( ++ " preld 8, %[__loc] \n" ++ : ++ : [__loc] "m"( *((address)loc + interval) ) ++ : "memory" ++ ); ++#endif ++} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp +--- a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,116 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "memory/metaspaceShared.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++ ++void JavaThread::pd_initialize() ++{ ++ _anchor.clear(); ++} ++ ++frame JavaThread::pd_last_frame() { ++ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); ++ if (_anchor.last_Java_pc() != NULL) { ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); ++ } else { ++ // This will pick up pc from sp ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); ++ } ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is ++// currently interrupted by SIGPROF ++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, ++ void* ucontext, bool isInJava) { ++ ++ assert(Thread::current() == this, "caller must be current thread"); ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++ ++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { ++ assert(this->is_Java_thread(), "must be JavaThread"); ++ JavaThread* jt = (JavaThread *)this; ++ ++ // If we have a last_Java_frame, then we should use it even if ++ // isInJava == true. It should be more reliable than ucontext info. ++ if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { ++ *fr_addr = jt->pd_last_frame(); ++ return true; ++ } ++ ++ // At this point, we don't have a last_Java_frame, so ++ // we try to glean some information out of the ucontext ++ // if we were running Java code when SIGPROF came in. ++ if (isInJava) { ++ ucontext_t* uc = (ucontext_t*) ucontext; ++ ++ intptr_t* ret_fp; ++ intptr_t* ret_sp; ++ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, ++ &ret_sp, &ret_fp); ++ if (addr.pc() == NULL || ret_sp == NULL ) { ++ // ucontext wasn't useful ++ return false; ++ } ++ ++ if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { ++ // In the middle of a trampoline call. Bail out for safety. ++ // This happens rarely so shouldn't affect profiling. ++ return false; ++ } ++ ++ frame ret_frame(ret_sp, ret_fp, addr.pc()); ++ if (!ret_frame.safe_for_sender(jt)) { ++#ifdef COMPILER2 ++ // C2 and JVMCI use ebp as a general register see if NULL fp helps ++ frame ret_frame2(ret_sp, NULL, addr.pc()); ++ if (!ret_frame2.safe_for_sender(jt)) { ++ // nothing else to try if the frame isn't good ++ return false; ++ } ++ ret_frame = ret_frame2; ++#else ++ // nothing else to try if the frame isn't good ++ return false; ++#endif // COMPILER2_OR_JVMCI ++ } ++ *fr_addr = ret_frame; ++ return true; ++ } ++ ++ // nothing else to try ++ return false; ++} ++ ++void JavaThread::cache_global_variables() { } +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp +--- a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,66 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP ++ ++ private: ++ void pd_initialize(); ++ ++ frame pd_last_frame(); ++ ++ public: ++ // Mutators are highly dangerous.... ++ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } ++ void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } ++ ++ void set_base_of_stack_pointer(intptr_t* base_sp) { ++ } ++ ++ static ByteSize last_Java_fp_offset() { ++ return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); ++ } ++ ++ intptr_t* base_of_stack_pointer() { ++ return NULL; ++ } ++ void record_base_of_stack_pointer() { ++ } ++ ++ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, ++ bool isInJava); ++ ++ bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); ++private: ++ bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); ++public: ++ ++ // These routines are only used on cpu architectures that ++ // have separate register stacks (Itanium). ++ static bool register_stack_overflow() { return false; } ++ static void enable_register_stack_guard() {} ++ static void disable_register_stack_guard() {} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp +--- a/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP ++ ++// These are the OS and CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* Threads (NOTE: incomplete) */ \ ++ /******************************/ \ ++ nonstatic_field(OSThread, _thread_id, pid_t) \ ++ nonstatic_field(OSThread, _pthread_id, pthread_t) ++ ++ ++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ \ ++ /**********************/ \ ++ /* Posix Thread IDs */ \ ++ /**********************/ \ ++ \ ++ declare_integer_type(pid_t) \ ++ declare_unsigned_integer_type(pthread_t) ++ ++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp +--- a/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,93 @@ ++/* ++ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/os.hpp" ++#include "runtime/vm_version.hpp" ++ ++#include ++#include ++ ++#ifndef HWCAP_LOONGARCH_LAM ++#define HWCAP_LOONGARCH_LAM (1 << 1) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_UAL ++#define HWCAP_LOONGARCH_UAL (1 << 2) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LSX ++#define HWCAP_LOONGARCH_LSX (1 << 4) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LASX ++#define HWCAP_LOONGARCH_LASX (1 << 5) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_COMPLEX ++#define HWCAP_LOONGARCH_COMPLEX (1 << 7) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_CRYPTO ++#define HWCAP_LOONGARCH_CRYPTO (1 << 8) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LBT_X86 ++#define HWCAP_LOONGARCH_LBT_X86 (1 << 10) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LBT_ARM ++#define HWCAP_LOONGARCH_LBT_ARM (1 << 11) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LBT_MIPS ++#define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) ++#endif ++ ++void VM_Version::get_os_cpu_info() { ++ ++ uint64_t auxv = getauxval(AT_HWCAP); ++ ++ STATIC_ASSERT(CPU_LAM == HWCAP_LOONGARCH_LAM); ++ STATIC_ASSERT(CPU_UAL == HWCAP_LOONGARCH_UAL); ++ STATIC_ASSERT(CPU_LSX == HWCAP_LOONGARCH_LSX); ++ STATIC_ASSERT(CPU_LASX == HWCAP_LOONGARCH_LASX); ++ STATIC_ASSERT(CPU_COMPLEX == HWCAP_LOONGARCH_COMPLEX); ++ STATIC_ASSERT(CPU_CRYPTO == HWCAP_LOONGARCH_CRYPTO); ++ STATIC_ASSERT(CPU_LBT_X86 == HWCAP_LOONGARCH_LBT_X86); ++ STATIC_ASSERT(CPU_LBT_ARM == HWCAP_LOONGARCH_LBT_ARM); ++ STATIC_ASSERT(CPU_LBT_MIPS == HWCAP_LOONGARCH_LBT_MIPS); ++ ++ _features = auxv & ( ++ HWCAP_LOONGARCH_LAM | ++ HWCAP_LOONGARCH_UAL | ++ HWCAP_LOONGARCH_LSX | ++ HWCAP_LOONGARCH_LASX | ++ HWCAP_LOONGARCH_COMPLEX | ++ HWCAP_LOONGARCH_CRYPTO | ++ HWCAP_LOONGARCH_LBT_X86 | ++ HWCAP_LOONGARCH_LBT_ARM | ++ HWCAP_LOONGARCH_LBT_MIPS); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp +--- a/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,24 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp +--- a/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,191 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP ++ ++#include "runtime/vm_version.hpp" ++ ++// Implementation of class atomic ++ ++template ++struct Atomic::PlatformAdd ++ : Atomic::AddAndFetch > ++{ ++ template ++ D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { ++ //Unimplemented(); ++ return __sync_add_and_fetch(dest, add_value); ++ } ++}; ++ ++template<> ++template ++inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, ++ T volatile* dest, ++ atomic_memory_order order) const { ++ T __ret, __tmp; ++ ++ STATIC_ASSERT(4 == sizeof(T)); ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1: sync\n\t" ++ " ll %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " sc %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ " nop \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __ret; ++} ++ ++template<> ++template ++inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, ++ T volatile* dest, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(8 == sizeof(T)); ++ T __ret; ++ jlong __tmp; ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1: sync\n\t" ++ " lld %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " scd %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ " nop \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "m" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ return __ret; ++} ++ ++#if 0 ++template<> ++template ++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(1 == sizeof(T)); ++} ++ ++#else ++// No direct support for cmpxchg of bytes; emulate using int. ++template<> ++struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; ++#endif ++ ++template<> ++template ++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(4 == sizeof(T)); ++ T __prev; ++ jint __cmp; ++ ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1:sync \n\t" ++ " ll %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " sc %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ " nop \n\t" ++ "2: \n\t" ++ " sync \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "m" (*(volatile jint*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __prev; ++} ++ ++template<> ++template ++inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(8 == sizeof(T)); ++ T __prev; ++ jlong __cmp; ++ ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1:sync \n\t" ++ " lld %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " scd %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ " nop \n\t" ++ "2: \n\t" ++ " sync \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "m" (*(volatile jlong*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ return __prev; ++} ++ ++ ++#endif // OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp +--- a/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP ++ ++#include ++ ++// Efficient swapping of data bytes from Java byte ++// ordering to native byte ordering and vice versa. ++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); } ++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); } ++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); } ++ ++#endif // OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp +--- a/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,125 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP ++ ++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ (void)memmove(to, from, count * HeapWordSize); ++} ++ ++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ (void)memcpy(to, from, count * HeapWordSize); ++ break; ++ } ++} ++ ++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ while (count-- > 0) { ++ *to++ = *from++; ++ } ++ break; ++ } ++} ++ ++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_words(from, to, count); ++} ++ ++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words(from, to, count); ++} ++ ++static void pd_conjoint_bytes(const void* from, void* to, size_t count) { ++ (void)memmove(to, from, count); ++} ++ ++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { ++ pd_conjoint_bytes(from, to, count); ++} ++ ++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_bytes_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); ++} ++ ++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); ++ pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); ++} ++ ++#endif // OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp +--- a/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, DontYieldALot, false); ++#ifdef MIPS64 ++define_pd_global(intx, ThreadStackSize, 1024); // 0 => use system default ++define_pd_global(intx, VMThreadStackSize, 1024); ++#else ++// ThreadStackSize 320 allows a couple of test cases to run while ++// keeping the number of threads that can be created high. System ++// default ThreadStackSize appears to be 512 which is too big. ++define_pd_global(intx, ThreadStackSize, 320); ++define_pd_global(intx, VMThreadStackSize, 512); ++#endif // MIPS64 ++ ++define_pd_global(intx, CompilerThreadStackSize, 0); ++ ++define_pd_global(uintx,JVMInvokeMethodSlack, 8192); ++ ++// Used on 64 bit platforms for UseCompressedOops base address ++define_pd_global(uintx,HeapBaseMinAddress, 2*G); ++ ++#endif // OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/linux_mips.s b/src/hotspot/os_cpu/linux_mips/linux_mips.s +--- a/src/hotspot/os_cpu/linux_mips/linux_mips.s 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_mips/linux_mips.s 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,25 @@ ++# ++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++# ++# This code is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License version 2 only, as ++# published by the Free Software Foundation. ++# ++# This code is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# version 2 for more details (a copy is included in the LICENSE file that ++# accompanied this code). ++# ++# You should have received a copy of the GNU General Public License version ++# 2 along with this work; if not, write to the Free Software Foundation, ++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++# ++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++# or visit www.oracle.com if you need additional information or have any ++# questions. ++# ++ ++ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp +--- a/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP ++ ++#include "runtime/os.hpp" ++ ++// Included in orderAccess.hpp header file. ++ ++// Implementation of class OrderAccess. ++#define inlasm_sync() if (os::is_ActiveCoresMP()) \ ++ __asm__ __volatile__ ("nop" : : : "memory"); \ ++ else \ ++ __asm__ __volatile__ ("sync" : : : "memory"); ++ ++inline void OrderAccess::loadload() { inlasm_sync(); } ++inline void OrderAccess::storestore() { inlasm_sync(); } ++inline void OrderAccess::loadstore() { inlasm_sync(); } ++inline void OrderAccess::storeload() { inlasm_sync(); } ++ ++inline void OrderAccess::acquire() { inlasm_sync(); } ++inline void OrderAccess::release() { inlasm_sync(); } ++inline void OrderAccess::fence() { inlasm_sync(); } ++ ++ ++#undef inlasm_sync ++ ++#endif // OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp +--- a/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,1020 @@ ++/* ++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// no precompiled headers ++#include "asm/macroAssembler.hpp" ++#include "classfile/classLoader.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "classfile/vmSymbols.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/allocation.inline.hpp" ++#include "os_share_linux.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/extendedPC.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/java.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/osThread.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/timer.hpp" ++#include "utilities/events.hpp" ++#include "utilities/vmError.hpp" ++#include "compiler/disassembler.hpp" ++ ++// put OS-includes here ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++ ++#define REG_SP 29 ++#define REG_FP 30 ++ ++address os::current_stack_pointer() { ++ register void *sp __asm__ ("$29"); ++ return (address) sp; ++} ++ ++char* os::non_memory_address_word() { ++ // Must never look like an address returned by reserve_memory, ++ // even in its subfields (as defined by the CPU immediate fields, ++ // if the CPU splits constants across multiple instructions). ++ ++ return (char*) -1; ++} ++ ++address os::Linux::ucontext_get_pc(const ucontext_t * uc) { ++ return (address)uc->uc_mcontext.pc; ++} ++ ++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { ++ uc->uc_mcontext.pc = (intptr_t)pc; ++} ++ ++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.gregs[REG_SP]; ++} ++ ++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.gregs[REG_FP]; ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread ++// is currently interrupted by SIGPROF. ++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal ++// frames. Currently we don't do that on Linux, so it's the same as ++// os::fetch_frame_from_context(). ++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, ++ const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ assert(thread != NULL, "just checking"); ++ assert(ret_sp != NULL, "just checking"); ++ assert(ret_fp != NULL, "just checking"); ++ ++ return os::fetch_frame_from_context(uc, ret_sp, ret_fp); ++} ++ ++ExtendedPC os::fetch_frame_from_context(const void* ucVoid, ++ intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ ExtendedPC epc; ++ ucontext_t* uc = (ucontext_t*)ucVoid; ++ ++ if (uc != NULL) { ++ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); ++ if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); ++ if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); ++ } else { ++ // construct empty ExtendedPC for return value checking ++ epc = ExtendedPC(NULL); ++ if (ret_sp) *ret_sp = (intptr_t *)NULL; ++ if (ret_fp) *ret_fp = (intptr_t *)NULL; ++ } ++ ++ return epc; ++} ++ ++frame os::fetch_frame_from_context(const void* ucVoid) { ++ intptr_t* sp; ++ intptr_t* fp; ++ ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp); ++ return frame(sp, fp, epc.pc()); ++} ++ ++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) { ++ address pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (Interpreter::contains(pc)) { ++ // interpreter performs stack banging after the fixed frame header has ++ // been generated while the compilers perform it before. To maintain ++ // semantic consistency between interpreted and compiled frames, the ++ // method returns the Java sender of the current frame. ++ *fr = os::fetch_frame_from_context(uc); ++ if (!fr->is_first_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } else { ++ // more complex code with compiled code ++ assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above"); ++ CodeBlob* cb = CodeCache::find_blob(pc); ++ if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) { ++ // Not sure where the pc points to, fallback to default ++ // stack overflow handling ++ return false; ++ } else { ++ // In compiled code, the stack banging is performed before LR ++ // has been saved in the frame. RA is live, and SP and FP ++ // belong to the caller. ++ intptr_t* fp = os::Linux::ucontext_get_fp(uc); ++ intptr_t* sp = os::Linux::ucontext_get_sp(uc); ++ address pc = (address)(uc->uc_mcontext.gregs[31]); ++ *fr = frame(sp, fp, pc); ++ if (!fr->is_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ assert(!fr->is_first_frame(), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } ++ } ++ assert(fr->is_java_frame(), "Safety check"); ++ return true; ++} ++ ++// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get ++// turned off by -fomit-frame-pointer, ++frame os::get_sender_for_C_frame(frame* fr) { ++ return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); ++} ++ ++//intptr_t* _get_previous_fp() { ++intptr_t* __attribute__((noinline)) os::get_previous_fp() { ++ int *pc; ++ intptr_t sp; ++ int *pc_limit = (int*)(void*)&os::get_previous_fp; ++ int insn; ++ ++ { ++ l_pc:; ++ pc = (int*)&&l_pc; ++ __asm__ __volatile__ ("move %0, $sp" : "=r" (sp)); ++ } ++ ++ do { ++ insn = *pc; ++ switch(bitfield(insn, 16, 16)) { ++ case 0x27bd: /* addiu $sp,$sp,-i */ ++ case 0x67bd: /* daddiu $sp,$sp,-i */ ++ assert ((short)bitfield(insn, 0, 16)<0, "bad frame"); ++ sp -= (short)bitfield(insn, 0, 16); ++ return (intptr_t*)sp; ++ } ++ --pc; ++ } while (pc>=pc_limit); // The initial value of pc may be equal to pc_limit, because of GCC optimization. ++ ++ ShouldNotReachHere(); ++ return NULL; // mute compiler ++} ++ ++ ++frame os::current_frame() { ++ intptr_t* fp = (intptr_t*)get_previous_fp(); ++ frame myframe((intptr_t*)os::current_stack_pointer(), ++ (intptr_t*)fp, ++ CAST_FROM_FN_PTR(address, os::current_frame)); ++ if (os::is_first_C_frame(&myframe)) { ++ // stack is not walkable ++ return frame(); ++ } else { ++ return os::get_sender_for_C_frame(&myframe); ++ } ++} ++ ++//x86 add 2 new assemble function here! ++extern "C" int ++JVM_handle_linux_signal(int sig, ++ siginfo_t* info, ++ void* ucVoid, ++ int abort_if_unrecognized) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx", ++ info->si_signo, ++ info->si_code, ++ info->si_errno, ++ info->si_addr); ++#endif ++ ++ ucontext_t* uc = (ucontext_t*) ucVoid; ++ ++ Thread* t = Thread::current_or_null_safe(); ++ ++ SignalHandlerMark shm(t); ++ ++ // Note: it's not uncommon that JNI code uses signal/sigset to install ++ // then restore certain signal handler (e.g. to temporarily block SIGPIPE, ++ // or have a SIGILL handler when detecting CPU type). When that happens, ++ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To ++ // avoid unnecessary crash when libjsig is not preloaded, try handle signals ++ // that do not require siginfo/ucontext first. ++ ++ if (sig == SIGPIPE/* || sig == SIGXFSZ*/) { ++ // allow chained handler to go first ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++ return true; ++ } else { ++ if (PrintMiscellaneous && (WizardMode || Verbose)) { ++ warning("Ignoring SIGPIPE - see bug 4229104"); ++ } ++ return true; ++ } ++ } ++ ++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT ++ if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { ++ handle_assert_poison_fault(ucVoid, info->si_addr); ++ return 1; ++ } ++#endif ++ ++ JavaThread* thread = NULL; ++ VMThread* vmthread = NULL; ++ if (os::Linux::signal_handlers_are_installed) { ++ if (t != NULL ){ ++ if(t->is_Java_thread()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("this thread is a java thread"); ++#endif ++ thread = (JavaThread*)t; ++ } ++ else if(t->is_VM_thread()){ ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("this thread is a VM thread\n"); ++#endif ++ vmthread = (VMThread *)t; ++ } ++ } ++ } ++ ++ // Handle SafeFetch faults: ++ if (uc != NULL) { ++ address const pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (pc && StubRoutines::is_safefetch_fault(pc)) { ++ os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); ++ return 1; ++ } ++ } ++ ++ // decide if this trap can be handled by a stub ++ address stub = NULL; ++ address pc = NULL; ++ ++ pc = (address) os::Linux::ucontext_get_pc(uc); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("pc=%lx", pc); ++ os::print_context(tty, uc); ++#endif ++ //%note os_trap_1 ++ if (info != NULL && uc != NULL && thread != NULL) { ++ pc = (address) os::Linux::ucontext_get_pc(uc); ++ ++ // Handle ALL stack overflow variations here ++ if (sig == SIGSEGV) { ++ address addr = (address) info->si_addr; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("handle all stack overflow variations: "); ++ /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n", ++ addr, ++ thread->stack_base(), ++ thread->stack_base() - thread->stack_size()); ++ */ ++#endif ++ ++ // check if fault address is within thread stack ++ if (thread->on_local_stack(addr)) { ++ // stack overflow ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("stack exception check \n"); ++#endif ++ if (thread->in_stack_yellow_reserved_zone(addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is in yellow zone\n"); ++#endif ++ if (thread->thread_state() == _thread_in_Java) { ++ if (thread->in_stack_reserved_zone(addr)) { ++ frame fr; ++ if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { ++ assert(fr.is_java_frame(), "Must be a Java frame"); ++ frame activation = ++ SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); ++ if (activation.sp() != NULL) { ++ thread->disable_stack_reserved_zone(); ++ if (activation.is_interpreted_frame()) { ++ thread->set_reserved_stack_activation((address)( ++ activation.fp() + frame::interpreter_frame_initial_sp_offset)); ++ } else { ++ thread->set_reserved_stack_activation((address)activation.unextended_sp()); ++ } ++ return 1; ++ } ++ } ++ } ++ // Throw a stack overflow exception. Guard pages will be reenabled ++ // while unwinding the stack. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("this thread is in java\n"); ++#endif ++ thread->disable_stack_yellow_reserved_zone(); ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); ++ } else { ++ // Thread was in the vm or native code. Return and try to finish. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("this thread is in vm or native codes and return\n"); ++#endif ++ thread->disable_stack_yellow_reserved_zone(); ++ return 1; ++ } ++ } else if (thread->in_stack_red_zone(addr)) { ++ // Fatal red zone violation. Disable the guard pages and fall through ++ // to handle_unexpected_exception way down below. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is in red zone\n"); ++#endif ++ thread->disable_stack_red_zone(); ++ tty->print_raw_cr("An irrecoverable stack overflow has occurred."); ++ ++ // This is a likely cause, but hard to verify. Let's just print ++ // it as a hint. ++ tty->print_raw_cr("Please check if any of your loaded .so files has " ++ "enabled executable stack (see man page execstack(8))"); ++ } else { ++ // Accessing stack address below sp may cause SEGV if current ++ // thread has MAP_GROWSDOWN stack. This should only happen when ++ // current thread was created by user code with MAP_GROWSDOWN flag ++ // and then attached to VM. See notes in os_linux.cpp. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is neither in yellow zone nor in the red one\n"); ++#endif ++ if (thread->osthread()->expanding_stack() == 0) { ++ thread->osthread()->set_expanding_stack(); ++ if (os::Linux::manually_expand_stack(thread, addr)) { ++ thread->osthread()->clear_expanding_stack(); ++ return 1; ++ } ++ thread->osthread()->clear_expanding_stack(); ++ } else { ++ fatal("recursive segv. expanding stack."); ++ } ++ } ++ } //addr < ++ } //sig == SIGSEGV ++ ++ if (thread->thread_state() == _thread_in_Java) { ++ // Java thread running in Java code => find exception handler if any ++ // a fault inside compiled code, the interpreter, or a stub ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("java thread running in java code\n"); ++#endif ++ ++ // Handle signal from NativeJump::patch_verified_entry(). ++ if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig); ++#endif ++ stub = SharedRuntime::get_handle_wrong_method_stub(); ++ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig); ++#endif ++ stub = SharedRuntime::get_poll_stub(pc); ++ } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { ++ // BugId 4454115: A read from a MappedByteBuffer can fault ++ // here if the underlying file has been truncated. ++ // Do not crash the VM in such a case. ++ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); ++ CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("cb = %lx, nm = %lx\n", cb, nm); ++#endif ++ if (nm != NULL && nm->has_unsafe_access()) { ++ address next_pc = (address)((unsigned long)pc + sizeof(unsigned int)); ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) { ++ // HACK: si_code does not work on linux 2.2.12-20!!! ++ int op = pc[0] & 0x3f; ++ int op1 = pc[3] & 0x3f; ++ //FIXME, Must port to mips code!! ++ switch (op) { ++ case 0x1e: //ddiv ++ case 0x1f: //ddivu ++ case 0x1a: //div ++ case 0x1b: //divu ++ case 0x34: //trap ++ /* In MIPS, div_by_zero exception can only be triggered by explicit 'trap'. ++ * Ref: [c1_LIRAssembler_mips.cpp] arithmetic_idiv() ++ */ ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, ++ pc, ++ SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); ++ break; ++ default: ++ // TODO: handle more cases if we are using other x86 instructions ++ // that can generate SIGFPE signal on linux. ++ tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1); ++ //fatal("please update this code."); ++ } ++ } else if (sig == SIGSEGV && ++ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("continuation for implicit exception\n"); ++#endif ++ // Determination of interpreter/vtable stub/compiled code null exception ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("continuation_for_implicit_exception stub: %lx", stub); ++#endif ++ } else if (/*thread->thread_state() == _thread_in_Java && */sig == SIGILL) { ++ //Since kernel does not have emulation of PS instructions yet, the emulation must be handled here. ++ //The method is to trigger kernel emulation of float emulation. ++ int inst = *(int*)pc; ++ int ops = (inst >> 26) & 0x3f; ++ int ops_fmt = (inst >> 21) & 0x1f; ++ int op = inst & 0x3f; ++ if (ops == Assembler::cop1_op && ops_fmt == Assembler::ps_fmt) { ++ int ft, fs, fd; ++ ft = (inst >> 16) & 0x1f; ++ fs = (inst >> 11) & 0x1f; ++ fd = (inst >> 6) & 0x1f; ++ float ft_upper, ft_lower, fs_upper, fs_lower, fd_upper, fd_lower; ++ double ft_value, fs_value, fd_value; ++ ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft]; ++ fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs]; ++ __asm__ __volatile__ ( ++ "cvt.s.pl %0, %4\n\t" ++ "cvt.s.pu %1, %4\n\t" ++ "cvt.s.pl %2, %5\n\t" ++ "cvt.s.pu %3, %5\n\t" ++ : "=f" (fs_lower), "=f" (fs_upper), "=f" (ft_lower), "=f" (ft_upper) ++ : "f" (fs_value), "f" (ft_value) ++ ); ++ ++ switch (op) { ++ case Assembler::fadd_op: ++ __asm__ __volatile__ ( ++ "add.s %1, %3, %5\n\t" ++ "add.s %2, %4, %6\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) ++ : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ case Assembler::fsub_op: ++ //fd = fs - ft ++ __asm__ __volatile__ ( ++ "sub.s %1, %3, %5\n\t" ++ "sub.s %2, %4, %6\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) ++ : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ case Assembler::fmul_op: ++ __asm__ __volatile__ ( ++ "mul.s %1, %3, %5\n\t" ++ "mul.s %2, %4, %6\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) ++ : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ default: ++ tty->print_cr("unknown cop1 opcode 0x%x with SIGILL.", op); ++ } ++ } else if (ops == Assembler::cop1x_op /*&& op == Assembler::nmadd_ps_op*/) { ++ // madd.ps is not used, the code below were not tested ++ int fr, ft, fs, fd; ++ float fr_upper, fr_lower, fs_upper, fs_lower, ft_upper, ft_lower, fd_upper, fd_lower; ++ double fr_value, ft_value, fs_value, fd_value; ++ switch (op) { ++ case Assembler::madd_ps_op: ++ // fd = (fs * ft) + fr ++ fr = (inst >> 21) & 0x1f; ++ ft = (inst >> 16) & 0x1f; ++ fs = (inst >> 11) & 0x1f; ++ fd = (inst >> 6) & 0x1f; ++ fr_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fr]; ++ ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft]; ++ fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs]; ++ __asm__ __volatile__ ( ++ "cvt.s.pu %3, %9\n\t" ++ "cvt.s.pl %4, %9\n\t" ++ "cvt.s.pu %5, %10\n\t" ++ "cvt.s.pl %6, %10\n\t" ++ "cvt.s.pu %7, %11\n\t" ++ "cvt.s.pl %8, %11\n\t" ++ "madd.s %1, %3, %5, %7\n\t" ++ "madd.s %2, %4, %6, %8\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower), "=f" (fr_upper), "=f" (fr_lower), "=f" (fs_upper), "=f" (fs_lower), "=f" (ft_upper), "=f" (ft_lower) ++ : "f" (fr_value)/*9*/, "f" (fs_value)/*10*/, "f" (ft_value)/*11*/ ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ default: ++ tty->print_cr("unknown cop1x opcode 0x%x with SIGILL.", op); ++ } ++ } ++ } //SIGILL ++ } else if (sig == SIGILL && VM_Version::is_determine_features_test_running()) { ++ // thread->thread_state() != _thread_in_Java ++ // SIGILL must be caused by VM_Version::determine_features(). ++ VM_Version::set_supports_cpucfg(false); ++ stub = pc + 4; // continue with next instruction. ++ } else if (thread->thread_state() == _thread_in_vm && ++ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ ++ thread->doing_unsafe_access()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("SIGBUS in vm thread \n"); ++#endif ++ address next_pc = (address)((unsigned long)pc + sizeof(unsigned int)); ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ ++ // jni_fast_GetField can trap at certain pc's if a GC kicks in ++ // and the heap gets shrunk before the field access. ++ if ((sig == SIGSEGV) || (sig == SIGBUS)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("jni fast get trap: "); ++#endif ++ address addr = JNI_FastGetField::find_slowcase_pc(pc); ++ if (addr != (address)-1) { ++ stub = addr; ++ } ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("addr = %d, stub = %lx", addr, stub); ++#endif ++ } ++ ++ // Check to see if we caught the safepoint code in the ++ // process of write protecting the memory serialization page. ++ // It write enables the page immediately after protecting it ++ // so we can just return to retry the write. ++ if ((sig == SIGSEGV) && ++ os::is_memory_serialize_page(thread, (address) info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("write protecting the memory serialiazation page\n"); ++#endif ++ // Block current thread until the memory serialize page permission restored. ++ os::block_on_serialize_page_trap(); ++ return true; ++ } ++ } ++ ++ // Execution protection violation ++ // ++ // This should be kept as the last step in the triage. We don't ++ // have a dedicated trap number for a no-execute fault, so be ++ // conservative and allow other handlers the first shot. ++ // ++ // Note: We don't test that info->si_code == SEGV_ACCERR here. ++ // this si_code is so generic that it is almost meaningless; and ++ // the si_code for this condition may change in the future. ++ // Furthermore, a false-positive should be harmless. ++ if (UnguardOnExecutionViolation > 0 && ++ //(sig == SIGSEGV || sig == SIGBUS) && ++ //uc->uc_mcontext.gregs[REG_TRAPNO] == trap_page_fault) { ++ (sig == SIGSEGV || sig == SIGBUS ++#ifdef OPT_RANGECHECK ++ || sig == SIGSYS ++#endif ++ ) && ++ //(uc->uc_mcontext.cause == 2 || uc->uc_mcontext.cause == 3)) { ++ (uc->uc_mcontext.hi1 == 2 || uc->uc_mcontext.hi1 == 3)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("execution protection violation\n"); ++#endif ++ ++ int page_size = os::vm_page_size(); ++ address addr = (address) info->si_addr; ++ address pc = os::Linux::ucontext_get_pc(uc); ++ // Make sure the pc and the faulting address are sane. ++ // ++ // If an instruction spans a page boundary, and the page containing ++ // the beginning of the instruction is executable but the following ++ // page is not, the pc and the faulting address might be slightly ++ // different - we still want to unguard the 2nd page in this case. ++ // ++ // 15 bytes seems to be a (very) safe value for max instruction size. ++ bool pc_is_near_addr = ++ (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15); ++Untested("Unimplemented yet"); ++ bool instr_spans_page_boundary = ++/* ++ (align_size_down((intptr_t) pc ^ (intptr_t) addr, ++ (intptr_t) page_size) > 0); ++*/ ++ (align_down((intptr_t) pc ^ (intptr_t) addr, ++ (intptr_t) page_size) > 0); ++ ++ if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) { ++ static volatile address last_addr = ++ (address) os::non_memory_address_word(); ++ ++ // In conservative mode, don't unguard unless the address is in the VM ++ if (addr != last_addr && ++ (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) { ++ ++ // Set memory to RWX and retry ++Untested("Unimplemented yet"); ++/* ++ address page_start = ++ (address) align_size_down((intptr_t) addr, (intptr_t) page_size); ++*/ ++ address page_start = align_down(addr, page_size); ++ bool res = os::protect_memory((char*) page_start, page_size, ++ os::MEM_PROT_RWX); ++ ++ if (PrintMiscellaneous && Verbose) { ++ char buf[256]; ++ jio_snprintf(buf, sizeof(buf), "Execution protection violation " ++ "at " INTPTR_FORMAT ++ ", unguarding " INTPTR_FORMAT ": %s, errno=%d", addr, ++ page_start, (res ? "success" : "failed"), errno); ++ tty->print_raw_cr(buf); ++ } ++ stub = pc; ++ ++ // Set last_addr so if we fault again at the same address, we don't end ++ // up in an endless loop. ++ // ++ // There are two potential complications here. Two threads trapping at ++ // the same address at the same time could cause one of the threads to ++ // think it already unguarded, and abort the VM. Likely very rare. ++ // ++ // The other race involves two threads alternately trapping at ++ // different addresses and failing to unguard the page, resulting in ++ // an endless loop. This condition is probably even more unlikely than ++ // the first. ++ // ++ // Although both cases could be avoided by using locks or thread local ++ // last_addr, these solutions are unnecessary complication: this ++ // handler is a best-effort safety net, not a complete solution. It is ++ // disabled by default and should only be used as a workaround in case ++ // we missed any no-execute-unsafe VM code. ++ ++ last_addr = addr; ++ } ++ } ++ } ++ ++ if (stub != NULL) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("resolved stub=%lx\n",stub); ++#endif ++ // save all thread context in case we need to restore it ++ if (thread != NULL) thread->set_saved_exception_pc(pc); ++ ++ os::Linux::ucontext_set_pc(uc, stub); ++ return true; ++ } ++ ++ // signal-chaining ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("signal chaining\n"); ++#endif ++ return true; ++ } ++ ++ if (!abort_if_unrecognized) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("abort becauce of unrecognized\n"); ++#endif ++ // caller wants another chance, so give it to him ++ return false; ++ } ++ ++ if (pc == NULL && uc != NULL) { ++ pc = os::Linux::ucontext_get_pc(uc); ++ } ++ ++ // unmask current signal ++ sigset_t newset; ++ sigemptyset(&newset); ++ sigaddset(&newset, sig); ++ sigprocmask(SIG_UNBLOCK, &newset, NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("VMError in signal handler\n"); ++#endif ++ VMError::report_and_die(t, sig, pc, info, ucVoid); ++ ++ ShouldNotReachHere(); ++ return true; // Mute compiler ++} ++ ++// FCSR:...|24| 23 |22|21|... ++// ...|FS|FCC0|FO|FN|... ++void os::Linux::init_thread_fpu_state(void) { ++ if (SetFSFOFN == 999) ++ return; ++ int fs = (SetFSFOFN / 100)? 1:0; ++ int fo = ((SetFSFOFN % 100) / 10)? 1:0; ++ int fn = (SetFSFOFN % 10)? 1:0; ++ int mask = fs << 24 | fo << 22 | fn << 21; ++ ++ int fcsr = get_fpu_control_word(); ++ fcsr = fcsr | mask; ++ set_fpu_control_word(fcsr); ++ /* ++ if (fcsr != get_fpu_control_word()) ++ tty->print_cr(" fail to set to %lx, get_fpu_control_word:%lx", fcsr, get_fpu_control_word()); ++ */ ++} ++ ++int os::Linux::get_fpu_control_word(void) { ++ int fcsr; ++ __asm__ __volatile__ ( ++ ".set noat;" ++ "daddiu %0, $0, 0;" ++ "cfc1 %0, $31;" ++ : "=r" (fcsr) ++ ); ++ return fcsr; ++} ++ ++void os::Linux::set_fpu_control_word(int fpu_control) { ++ __asm__ __volatile__ ( ++ ".set noat;" ++ "ctc1 %0, $31;" ++ : ++ : "r" (fpu_control) ++ ); ++} ++ ++bool os::is_allocatable(size_t bytes) { ++ ++ if (bytes < 2 * G) { ++ return true; ++ } ++ ++ char* addr = reserve_memory(bytes, NULL); ++ ++ if (addr != NULL) { ++ release_memory(addr, bytes); ++ } ++ ++ return addr != NULL; ++} ++ ++//////////////////////////////////////////////////////////////////////////////// ++// thread stack ++ ++//size_t os::Linux::min_stack_allowed = 96 * K; ++size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K; ++size_t os::Posix::_java_thread_min_stack_allowed = 40 * K; ++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K; ++ ++ ++/* ++// Test if pthread library can support variable thread stack size. LinuxThreads ++// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads ++// in floating stack mode and NPTL support variable stack size. ++bool os::Linux::supports_variable_stack_size() { ++ if (os::Linux::is_NPTL()) { ++ // NPTL, yes ++ return true; ++ ++ } else { ++ // Note: We can't control default stack size when creating a thread. ++ // If we use non-default stack size (pthread_attr_setstacksize), both ++ // floating stack and non-floating stack LinuxThreads will return the ++ // same value. This makes it impossible to implement this function by ++ // detecting thread stack size directly. ++ // ++ // An alternative approach is to check %gs. Fixed-stack LinuxThreads ++ // do not use %gs, so its value is 0. Floating-stack LinuxThreads use ++ // %gs (either as LDT selector or GDT selector, depending on kernel) ++ // to access thread specific data. ++ // ++ // Note that %gs is a reserved glibc register since early 2001, so ++ // applications are not allowed to change its value (Ulrich Drepper from ++ // Redhat confirmed that all known offenders have been modified to use ++ // either %fs or TSD). In the worst case scenario, when VM is embedded in ++ // a native application that plays with %gs, we might see non-zero %gs ++ // even LinuxThreads is running in fixed stack mode. As the result, we'll ++ // return true and skip _thread_safety_check(), so we may not be able to ++ // detect stack-heap collisions. But otherwise it's harmless. ++ // ++ return false; ++ } ++} ++*/ ++ ++// Return default stack size for thr_type ++size_t os::Posix::default_stack_size(os::ThreadType thr_type) { ++ // Default stack size (compiler thread needs larger stack) ++ size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); ++ return s; ++} ++ ++///////////////////////////////////////////////////////////////////////////// ++// helper functions for fatal error handler ++void os::print_register_info(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ ucontext_t *uc = (ucontext_t*)context; ++ ++ st->print_cr("Register to memory mapping:"); ++ st->cr(); ++ // this is horrendously verbose but the layout of the registers in the ++ // // context does not match how we defined our abstract Register set, so ++ // // we can't just iterate through the gregs area ++ // ++ // // this is only for the "general purpose" registers ++ st->print("R0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[0]); ++ st->print("AT=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[1]); ++ st->print("V0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[2]); ++ st->print("V1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[3]); ++ st->cr(); ++ st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[4]); ++ st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[5]); ++ st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[6]); ++ st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[7]); ++ st->cr(); ++ st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[8]); ++ st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[9]); ++ st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[10]); ++ st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[11]); ++ st->cr(); ++ st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[12]); ++ st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[13]); ++ st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[14]); ++ st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[15]); ++ st->cr(); ++ st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[16]); ++ st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[17]); ++ st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[18]); ++ st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[19]); ++ st->cr(); ++ st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[20]); ++ st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[21]); ++ st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[22]); ++ st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[23]); ++ st->cr(); ++ st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[24]); ++ st->print("T9=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[25]); ++ st->print("K0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[26]); ++ st->print("K1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[27]); ++ st->cr(); ++ st->print("GP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[28]); ++ st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[29]); ++ st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[30]); ++ st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[31]); ++ st->cr(); ++ ++} ++ ++void os::print_context(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ const ucontext_t *uc = (const ucontext_t*)context; ++ st->print_cr("Registers:"); ++ st->print( "R0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[0]); ++ st->print(", AT=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[1]); ++ st->print(", V0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[2]); ++ st->print(", V1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[3]); ++ st->cr(); ++ st->print( "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[4]); ++ st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[5]); ++ st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[6]); ++ st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[7]); ++ st->cr(); ++ st->print( "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[8]); ++ st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[9]); ++ st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[10]); ++ st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[11]); ++ st->cr(); ++ st->print( "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[12]); ++ st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[13]); ++ st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[14]); ++ st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[15]); ++ st->cr(); ++ st->print( "S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[16]); ++ st->print(", S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[17]); ++ st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[18]); ++ st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[19]); ++ st->cr(); ++ st->print( "S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[20]); ++ st->print(", S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[21]); ++ st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[22]); ++ st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[23]); ++ st->cr(); ++ st->print( "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[24]); ++ st->print(", T9=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[25]); ++ st->print(", K0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[26]); ++ st->print(", K1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[27]); ++ st->cr(); ++ st->print( "GP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[28]); ++ st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[29]); ++ st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[30]); ++ st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[31]); ++ st->cr(); ++ st->cr(); ++ ++ intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); ++ st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); ++ print_hex_dump(st, (address)(sp - 32), (address)(sp + 32), sizeof(intptr_t)); ++ st->cr(); ++ ++ // Note: it may be unsafe to inspect memory near pc. For example, pc may ++ // point to garbage if entry point in an nmethod is corrupted. Leave ++ // this at the end, and hope for the best. ++ address pc = os::Linux::ucontext_get_pc(uc); ++ st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); ++ print_hex_dump(st, pc - 64, pc + 64, sizeof(char)); ++ Disassembler::decode(pc - 80, pc + 80, st); ++} ++ ++void os::setup_fpu() { ++ /* ++ //no use for MIPS ++ int fcsr; ++ address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std(); ++ __asm__ __volatile__ ( ++ ".set noat;" ++ "cfc1 %0, $31;" ++ "sw %0, 0(%1);" ++ : "=r" (fcsr) ++ : "r" (fpu_cntrl) ++ : "memory" ++ ); ++ printf("fpu_cntrl: %lx\n", fpu_cntrl); ++ */ ++} ++ ++#ifndef PRODUCT ++void os::verify_stack_alignment() { ++ assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); ++} ++#endif ++ ++int os::extra_bang_size_in_bytes() { ++ // MIPS does not require the additional stack bang. ++ return 0; ++} ++ ++bool os::is_ActiveCoresMP() { ++ return UseActiveCoresMP && _initial_active_processor_count == 1; ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp +--- a/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,39 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP ++ ++ static void setup_fpu(); ++ static bool is_allocatable(size_t bytes); ++ static intptr_t *get_previous_fp(); ++ ++ // Used to register dynamic code cache area with the OS ++ // Note: Currently only used in 64 bit Windows implementations ++ static bool register_code_area(char *low, char *high) { return true; } ++ ++ static bool is_ActiveCoresMP(); ++ ++#endif // OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp +--- a/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,58 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP ++ ++ ++inline void Prefetch::read (void *loc, intx interval) { ++ // 'pref' is implemented as NOP in Loongson 3A ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips32\n" ++ " .set noreorder\n" ++ " pref 0, 0(%[__loc]) \n" ++ " .set pop\n" ++ : [__loc] "=&r"(loc) ++ : ++ : "memory" ++ ); ++} ++ ++inline void Prefetch::write(void *loc, intx interval) { ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips32\n" ++ " .set noreorder\n" ++ " pref 1, 0(%[__loc]) \n" ++ " .set pop\n" ++ : [__loc] "=&r"(loc) ++ : ++ : "memory" ++ ); ++ ++} ++ ++#endif // OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp +--- a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,117 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "compiler/compileBroker.hpp" ++#include "memory/metaspaceShared.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++ ++void JavaThread::pd_initialize() ++{ ++ _anchor.clear(); ++} ++ ++frame JavaThread::pd_last_frame() { ++ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); ++ if (_anchor.last_Java_pc() != NULL) { ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); ++ } else { ++ // This will pick up pc from sp ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); ++ } ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is ++// currently interrupted by SIGPROF ++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, ++ void* ucontext, bool isInJava) { ++ ++ assert(Thread::current() == this, "caller must be current thread"); ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++ ++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { ++ assert(this->is_Java_thread(), "must be JavaThread"); ++ JavaThread* jt = (JavaThread *)this; ++ ++ // If we have a last_Java_frame, then we should use it even if ++ // isInJava == true. It should be more reliable than ucontext info. ++ if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { ++ *fr_addr = jt->pd_last_frame(); ++ return true; ++ } ++ ++ // At this point, we don't have a last_Java_frame, so ++ // we try to glean some information out of the ucontext ++ // if we were running Java code when SIGPROF came in. ++ if (isInJava) { ++ ucontext_t* uc = (ucontext_t*) ucontext; ++ ++ intptr_t* ret_fp; ++ intptr_t* ret_sp; ++ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, ++ &ret_sp, &ret_fp); ++ if (addr.pc() == NULL || ret_sp == NULL ) { ++ // ucontext wasn't useful ++ return false; ++ } ++ ++ if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { ++ // In the middle of a trampoline call. Bail out for safety. ++ // This happens rarely so shouldn't affect profiling. ++ return false; ++ } ++ ++ frame ret_frame(ret_sp, ret_fp, addr.pc()); ++ if (!ret_frame.safe_for_sender(jt)) { ++#ifdef COMPILER2 ++ // C2 and JVMCI use ebp as a general register see if NULL fp helps ++ frame ret_frame2(ret_sp, NULL, addr.pc()); ++ if (!ret_frame2.safe_for_sender(jt)) { ++ // nothing else to try if the frame isn't good ++ return false; ++ } ++ ret_frame = ret_frame2; ++#else ++ // nothing else to try if the frame isn't good ++ return false; ++#endif // COMPILER2_OR_JVMCI ++ } ++ *fr_addr = ret_frame; ++ return true; ++ } ++ ++ // nothing else to try ++ return false; ++} ++ ++void JavaThread::cache_global_variables() { } +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp +--- a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,66 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP ++ ++ private: ++ void pd_initialize(); ++ ++ frame pd_last_frame(); ++ ++ public: ++ // Mutators are highly dangerous.... ++ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } ++ void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } ++ ++ void set_base_of_stack_pointer(intptr_t* base_sp) { ++ } ++ ++ static ByteSize last_Java_fp_offset() { ++ return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); ++ } ++ ++ intptr_t* base_of_stack_pointer() { ++ return NULL; ++ } ++ void record_base_of_stack_pointer() { ++ } ++ ++ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, ++ bool isInJava); ++ ++ bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); ++private: ++ bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); ++public: ++ ++ // These routines are only used on cpu architectures that ++ // have separate register stacks (Itanium). ++ static bool register_stack_overflow() { return false; } ++ static void enable_register_stack_guard() {} ++ static void disable_register_stack_guard() {} ++ ++#endif // OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp +--- a/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP ++ ++// These are the OS and CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* Threads (NOTE: incomplete) */ \ ++ /******************************/ \ ++ nonstatic_field(OSThread, _thread_id, pid_t) \ ++ nonstatic_field(OSThread, _pthread_id, pthread_t) ++ ++ ++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ \ ++ /**********************/ \ ++ /* Posix Thread IDs */ \ ++ /**********************/ \ ++ \ ++ declare_integer_type(pid_t) \ ++ declare_unsigned_integer_type(pthread_t) ++ ++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#endif // OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp +--- a/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp 2024-01-30 10:00:11.931430657 +0800 +@@ -0,0 +1,28 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/os.hpp" ++#include "runtime/vm_version.hpp" +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp +--- a/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp 2024-01-30 10:00:11.934763950 +0800 +@@ -85,4 +85,6 @@ + + const size_t ZPlatformCacheLineSize = 64; + ++const bool ZPlatformLoadBarrierTestResultInRegister = false; ++ + #endif // OS_CPU_LINUX_X86_ZGLOBALS_LINUX_X86_HPP +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp +--- a/src/hotspot/share/asm/codeBuffer.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/asm/codeBuffer.cpp 2024-01-30 10:00:11.944763831 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2023. These ++ * modifications are Copyright (c) 2018, 2023, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "asm/codeBuffer.hpp" + #include "compiler/disassembler.hpp" +@@ -351,6 +357,7 @@ + assert(rtype == relocInfo::none || + rtype == relocInfo::runtime_call_type || + rtype == relocInfo::internal_word_type|| ++ NOT_ZERO(MIPS64_ONLY(rtype == relocInfo::internal_pc_type ||)) + rtype == relocInfo::section_word_type || + rtype == relocInfo::external_word_type, + "code needs relocation information"); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_Compiler.cpp b/src/hotspot/share/c1/c1_Compiler.cpp +--- a/src/hotspot/share/c1/c1_Compiler.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/c1/c1_Compiler.cpp 2024-01-30 10:00:11.944763831 +0800 +@@ -44,6 +44,12 @@ + #include "utilities/bitMap.inline.hpp" + #include "utilities/macros.hpp" + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + + Compiler::Compiler() : AbstractCompiler(compiler_c1) { + } +@@ -211,7 +217,7 @@ + case vmIntrinsics::_updateCRC32: + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: +-#if defined(SPARC) || defined(S390) || defined(PPC64) || defined(AARCH64) ++#if defined(SPARC) || defined(S390) || defined(PPC64) || defined(AARCH64) || defined(LOONGARCH64) + case vmIntrinsics::_updateBytesCRC32C: + case vmIntrinsics::_updateDirectByteBufferCRC32C: + #endif +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp +--- a/src/hotspot/share/c1/c1_LinearScan.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/c1/c1_LinearScan.cpp 2024-01-30 10:00:11.948097125 +0800 +@@ -35,6 +35,12 @@ + #include "runtime/timerTrace.hpp" + #include "utilities/bitMap.inline.hpp" + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef PRODUCT + + static LinearScanStatistic _stat_before_alloc; +@@ -1258,6 +1264,23 @@ + } + break; + } ++ case lir_cmp_cmove: { ++ assert(op->as_Op4() != NULL, "lir_cmp_cmove must be LIR_Op4"); ++ LIR_Op4* cmove = (LIR_Op4*)op; ++ ++ LIR_Opr move_from = cmove->in_opr3(); ++ LIR_Opr move_to = cmove->result_opr(); ++ ++ if (move_to->is_register() && move_from->is_register()) { ++ Interval* from = interval_at(reg_num(move_from)); ++ Interval* to = interval_at(reg_num(move_to)); ++ if (from != NULL && to != NULL) { ++ to->set_register_hint(from); ++ TRACE_LINEAR_SCAN(4, tty->print_cr("operation at op_id %d: added hint from interval %d to %d", cmove->id(), from->reg_num(), to->reg_num())); ++ } ++ } ++ break; ++ } + default: + break; + } +@@ -3350,7 +3373,9 @@ + check_live = (move->patch_code() == lir_patch_none); + } + LIR_OpBranch* branch = op->as_OpBranch(); +- if (branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) { ++ LIR_OpCmpBranch* cmp_branch = op->as_OpCmpBranch(); ++ if ((branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) || ++ (cmp_branch != NULL && cmp_branch->stub() != NULL && cmp_branch->stub()->is_exception_throw_stub())) { + // Don't bother checking the stub in this case since the + // exception stub will never return to normal control flow. + check_live = false; +@@ -6206,6 +6231,16 @@ + if (branch->ublock() == target_from) { + branch->change_ublock(target_to); + } ++ } else if (op->code() == lir_cmp_branch || op->code() == lir_cmp_float_branch) { ++ assert(op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch"); ++ LIR_OpCmpBranch* branch = (LIR_OpCmpBranch*)op; ++ ++ if (branch->block() == target_from) { ++ branch->change_block(target_to); ++ } ++ if (branch->ublock() == target_from) { ++ branch->change_ublock(target_to); ++ } + } + } + } +@@ -6328,6 +6363,20 @@ + } + } + } ++ } else if (prev_op->code() == lir_cmp_branch || prev_op->code() == lir_cmp_float_branch) { ++ assert(prev_op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch"); ++ LIR_OpCmpBranch* prev_branch = (LIR_OpCmpBranch*)prev_op; ++ ++ if (prev_branch->stub() == NULL) { ++ if (prev_branch->block() == code->at(i + 1) && prev_branch->info() == NULL) { ++ TRACE_LINEAR_SCAN(3, tty->print_cr("Negating conditional branch and deleting unconditional branch at end of block B%d", block->block_id())); ++ ++ // eliminate a conditional branch to the immediate successor ++ prev_branch->change_block(last_branch->block()); ++ prev_branch->negate_cond(); ++ instructions->trunc_to(instructions->length() - 1); ++ } ++ } + } + } + } +@@ -6403,6 +6452,13 @@ + assert(op_branch->block() == NULL || code->find(op_branch->block()) != -1, "branch target not valid"); + assert(op_branch->ublock() == NULL || code->find(op_branch->ublock()) != -1, "branch target not valid"); + } ++ ++ LIR_OpCmpBranch* op_cmp_branch = instructions->at(j)->as_OpCmpBranch(); ++ ++ if (op_cmp_branch != NULL) { ++ assert(op_cmp_branch->block() == NULL || code->find(op_cmp_branch->block()) != -1, "branch target not valid"); ++ assert(op_cmp_branch->ublock() == NULL || code->find(op_cmp_branch->ublock()) != -1, "branch target not valid"); ++ } + } + + for (j = 0; j < block->number_of_sux() - 1; j++) { +@@ -6647,6 +6703,24 @@ + break; + } + ++ case lir_cmp_branch: ++ case lir_cmp_float_branch: { ++ LIR_OpCmpBranch* branch = op->as_OpCmpBranch(); ++ if (branch->block() == NULL) { ++ inc_counter(counter_stub_branch); ++ } else { ++ inc_counter(counter_cond_branch); ++ } ++ inc_counter(counter_cmp); ++ break; ++ } ++ ++ case lir_cmp_cmove: { ++ inc_counter(counter_misc_inst); ++ inc_counter(counter_cmp); ++ break; ++ } ++ + case lir_neg: + case lir_add: + case lir_sub: +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp +--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp 2024-01-30 10:00:11.948097125 +0800 +@@ -777,6 +777,18 @@ + } + + ++void LIR_Assembler::emit_op4(LIR_Op4* op) { ++ switch (op->code()) { ++ case lir_cmp_cmove: ++ cmp_cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->in_opr3(), op->in_opr4(), op->result_opr(), op->type()); ++ break; ++ ++ default: ++ Unimplemented(); ++ break; ++ } ++} ++ + void LIR_Assembler::build_frame() { + _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); + } +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp +--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp 2024-01-30 10:00:11.948097125 +0800 +@@ -190,7 +190,9 @@ + void emit_op1(LIR_Op1* op); + void emit_op2(LIR_Op2* op); + void emit_op3(LIR_Op3* op); ++ void emit_op4(LIR_Op4* op); + void emit_opBranch(LIR_OpBranch* op); ++ void emit_opCmpBranch(LIR_OpCmpBranch* op); + void emit_opLabel(LIR_OpLabel* op); + void emit_arraycopy(LIR_OpArrayCopy* op); + void emit_updatecrc32(LIR_OpUpdateCRC32* op); +@@ -223,6 +225,7 @@ + void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions + void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op); + void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type); ++ void cmp_cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type); + + void call( LIR_OpJavaCall* op, relocInfo::relocType rtype); + void ic_call( LIR_OpJavaCall* op); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp +--- a/src/hotspot/share/c1/c1_LIR.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/c1/c1_LIR.cpp 2024-01-30 10:00:11.948097125 +0800 +@@ -250,6 +250,18 @@ + #endif + } + ++void LIR_Op4::verify() const { ++#ifdef ASSERT ++ switch (code()) { ++ case lir_cmp_cmove: ++ break; ++ ++ default: ++ assert(!result_opr()->is_register() || !result_opr()->is_oop_register(), ++ "can't produce oops from arith"); ++ } ++#endif ++} + + LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block) + : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) +@@ -308,6 +320,56 @@ + } + + ++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info) ++ : LIR_Op2(lir_cmp_branch, cond, left, right, info) ++ , _label(stub->entry()) ++ , _block(NULL) ++ , _ublock(NULL) ++ , _stub(stub) { ++} ++ ++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info) ++ : LIR_Op2(lir_cmp_branch, cond, left, right, info) ++ , _label(block->label()) ++ , _block(block) ++ , _ublock(NULL) ++ , _stub(NULL) { ++} ++ ++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info) ++ : LIR_Op2(lir_cmp_float_branch, cond, left, right, info) ++ , _label(block->label()) ++ , _block(block) ++ , _ublock(ublock) ++ , _stub(NULL) { ++} ++ ++void LIR_OpCmpBranch::change_block(BlockBegin* b) { ++ assert(_block != NULL, "must have old block"); ++ assert(_block->label() == label(), "must be equal"); ++ ++ _block = b; ++ _label = b->label(); ++} ++ ++void LIR_OpCmpBranch::change_ublock(BlockBegin* b) { ++ assert(_ublock != NULL, "must have old block"); ++ ++ _ublock = b; ++} ++ ++void LIR_OpCmpBranch::negate_cond() { ++ switch (condition()) { ++ case lir_cond_equal: set_condition(lir_cond_notEqual); break; ++ case lir_cond_notEqual: set_condition(lir_cond_equal); break; ++ case lir_cond_less: set_condition(lir_cond_greaterEqual); break; ++ case lir_cond_lessEqual: set_condition(lir_cond_greater); break; ++ case lir_cond_greaterEqual: set_condition(lir_cond_less); break; ++ case lir_cond_greater: set_condition(lir_cond_lessEqual); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ + LIR_OpTypeCheck::LIR_OpTypeCheck(LIR_Code code, LIR_Opr result, LIR_Opr object, ciKlass* klass, + LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, + bool fast_check, CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch, +@@ -509,10 +571,7 @@ + assert(opConvert->_info == NULL, "must be"); + if (opConvert->_opr->is_valid()) do_input(opConvert->_opr); + if (opConvert->_result->is_valid()) do_output(opConvert->_result); +-#ifdef PPC32 +- if (opConvert->_tmp1->is_valid()) do_temp(opConvert->_tmp1); +- if (opConvert->_tmp2->is_valid()) do_temp(opConvert->_tmp2); +-#endif ++ if (opConvert->_tmp->is_valid()) do_temp(opConvert->_tmp); + do_stub(opConvert->_stub); + + break; +@@ -611,6 +670,25 @@ + break; + } + ++// LIR_OpCmpBranch; ++ case lir_cmp_branch: // may have info, input and result register always invalid ++ case lir_cmp_float_branch: // may have info, input and result register always invalid ++ { ++ assert(op->as_OpCmpBranch() != NULL, "must be"); ++ LIR_OpCmpBranch* opCmpBranch = (LIR_OpCmpBranch*)op; ++ assert(opCmpBranch->_tmp2->is_illegal() && opCmpBranch->_tmp3->is_illegal() && ++ opCmpBranch->_tmp4->is_illegal() && opCmpBranch->_tmp5->is_illegal(), "not used"); ++ ++ if (opCmpBranch->_info) do_info(opCmpBranch->_info); ++ if (opCmpBranch->_opr1->is_valid()) do_input(opCmpBranch->_opr1); ++ if (opCmpBranch->_opr2->is_valid()) do_input(opCmpBranch->_opr2); ++ if (opCmpBranch->_tmp1->is_valid()) do_temp(opCmpBranch->_tmp1); ++ if (opCmpBranch->_stub != NULL) opCmpBranch->stub()->visit(this); ++ assert(opCmpBranch->_result->is_illegal(), "not used"); ++ ++ break; ++ } ++ + // special handling for cmove: right input operand must not be equal + // to the result operand, otherwise the backend fails + case lir_cmove: +@@ -711,6 +789,29 @@ + break; + } + ++// LIR_Op4 ++ // special handling for cmp cmove: src2(opr4) operand must not be equal ++ // to the result operand, otherwise the backend fails ++ case lir_cmp_cmove: ++ { ++ assert(op->as_Op4() != NULL, "must be"); ++ LIR_Op4* op4 = (LIR_Op4*)op; ++ ++ assert(op4->_info == NULL, "not used"); ++ assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && ++ op4->_opr3->is_valid() && op4->_opr4->is_valid() && ++ op4->_result->is_valid(), "used"); ++ ++ do_input(op4->_opr1); ++ do_input(op4->_opr2); ++ do_input(op4->_opr3); ++ do_input(op4->_opr4); ++ do_temp(op4->_opr4); ++ do_output(op4->_result); ++ ++ break; ++ } ++ + // LIR_OpJavaCall + case lir_static_call: + case lir_optvirtual_call: +@@ -1028,6 +1129,13 @@ + masm->emit_op2(this); + } + ++void LIR_OpCmpBranch::emit_code(LIR_Assembler* masm) { ++ masm->emit_opCmpBranch(this); ++ if (stub()) { ++ masm->append_code_stub(stub()); ++ } ++} ++ + void LIR_OpAllocArray::emit_code(LIR_Assembler* masm) { + masm->emit_alloc_array(this); + masm->append_code_stub(stub()); +@@ -1048,6 +1156,10 @@ + masm->emit_op3(this); + } + ++void LIR_Op4::emit_code(LIR_Assembler* masm) { ++ masm->emit_op4(this); ++} ++ + void LIR_OpLock::emit_code(LIR_Assembler* masm) { + masm->emit_lock(this); + if (stub()) { +@@ -1424,8 +1536,7 @@ + if (deoptimize_on_null) { + // Emit an explicit null check and deoptimize if opr is null + CodeStub* deopt = new DeoptimizeStub(info, Deoptimization::Reason_null_check, Deoptimization::Action_none); +- cmp(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL)); +- branch(lir_cond_equal, T_OBJECT, deopt); ++ cmp_branch(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL), T_OBJECT, deopt); + } else { + // Emit an implicit null check + append(new LIR_Op1(lir_null_check, opr, info)); +@@ -1680,6 +1791,8 @@ + case lir_cmp_l2i: s = "cmp_l2i"; break; + case lir_ucmp_fd2i: s = "ucomp_fd2i"; break; + case lir_cmp_fd2i: s = "comp_fd2i"; break; ++ case lir_cmp_branch: s = "cmp_branch"; break; ++ case lir_cmp_float_branch: s = "cmp_fbranch"; break; + case lir_cmove: s = "cmove"; break; + case lir_add: s = "add"; break; + case lir_sub: s = "sub"; break; +@@ -1705,6 +1818,8 @@ + case lir_irem: s = "irem"; break; + case lir_fmad: s = "fmad"; break; + case lir_fmaf: s = "fmaf"; break; ++ // LIR_Op4 ++ case lir_cmp_cmove: s = "cmp_cmove"; break; + // LIR_OpJavaCall + case lir_static_call: s = "static"; break; + case lir_optvirtual_call: s = "optvirtual"; break; +@@ -1856,6 +1971,26 @@ + } + } + ++// LIR_OpCmpBranch ++void LIR_OpCmpBranch::print_instr(outputStream* out) const { ++ print_condition(out, condition()); out->print(" "); ++ in_opr1()->print(out); out->print(" "); ++ in_opr2()->print(out); out->print(" "); ++ if (block() != NULL) { ++ out->print("[B%d] ", block()->block_id()); ++ } else if (stub() != NULL) { ++ out->print("["); ++ stub()->print_name(out); ++ out->print(": " INTPTR_FORMAT "]", p2i(stub())); ++ if (stub()->info() != NULL) out->print(" [bci:%d]", stub()->info()->stack()->bci()); ++ } else { ++ out->print("[label:" INTPTR_FORMAT "] ", p2i(label())); ++ } ++ if (ublock() != NULL) { ++ out->print("unordered: [B%d] ", ublock()->block_id()); ++ } ++} ++ + void LIR_Op::print_condition(outputStream* out, LIR_Condition cond) { + switch(cond) { + case lir_cond_equal: out->print("[EQ]"); break; +@@ -1876,12 +2011,9 @@ + print_bytecode(out, bytecode()); + in_opr()->print(out); out->print(" "); + result_opr()->print(out); out->print(" "); +-#ifdef PPC32 +- if(tmp1()->is_valid()) { +- tmp1()->print(out); out->print(" "); +- tmp2()->print(out); out->print(" "); ++ if(tmp()->is_valid()) { ++ tmp()->print(out); out->print(" "); + } +-#endif + } + + void LIR_OpConvert::print_bytecode(outputStream* out, Bytecodes::Code code) { +@@ -1978,6 +2110,19 @@ + result_opr()->print(out); + } + ++ ++// LIR_Op4 ++void LIR_Op4::print_instr(outputStream* out) const { ++ if (code() == lir_cmp_cmove) { ++ print_condition(out, condition()); out->print(" "); ++ } ++ in_opr1()->print(out); out->print(" "); ++ in_opr2()->print(out); out->print(" "); ++ in_opr3()->print(out); out->print(" "); ++ in_opr4()->print(out); out->print(" "); ++ result_opr()->print(out); ++} ++ + + void LIR_OpLock::print_instr(outputStream* out) const { + hdr_opr()->print(out); out->print(" "); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp +--- a/src/hotspot/share/c1/c1_LIRGenerator.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp 2024-01-30 10:00:11.948097125 +0800 +@@ -480,13 +480,11 @@ + CodeEmitInfo* null_check_info, CodeEmitInfo* range_check_info) { + CodeStub* stub = new RangeCheckStub(range_check_info, index, array); + if (index->is_constant()) { +- cmp_mem_int(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(), +- index->as_jint(), null_check_info); +- __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch +- } else { +- cmp_reg_mem(lir_cond_aboveEqual, index, array, +- arrayOopDesc::length_offset_in_bytes(), T_INT, null_check_info); +- __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch ++ cmp_mem_int_branch(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(), ++ index->as_jint(), stub, null_check_info); // forward branch ++ } else { ++ cmp_reg_mem_branch(lir_cond_aboveEqual, index, array, arrayOopDesc::length_offset_in_bytes(), ++ T_INT, stub, null_check_info); // forward branch + } + } + +@@ -494,12 +492,11 @@ + void LIRGenerator::nio_range_check(LIR_Opr buffer, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) { + CodeStub* stub = new RangeCheckStub(info, index); + if (index->is_constant()) { +- cmp_mem_int(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), info); +- __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch ++ cmp_mem_int_branch(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), ++ index->as_jint(), stub, info); // forward branch + } else { +- cmp_reg_mem(lir_cond_aboveEqual, index, buffer, +- java_nio_Buffer::limit_offset(), T_INT, info); +- __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch ++ cmp_reg_mem_branch(lir_cond_aboveEqual, index, buffer, java_nio_Buffer::limit_offset(), ++ T_INT, stub, info); // forward branch + } + __ move(index, result); + } +@@ -935,7 +932,7 @@ + return tmp; + } + +-void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { ++void LIRGenerator::profile_branch(If* if_instr, If::Condition cond, LIR_Opr left, LIR_Opr right) { + if (if_instr->should_profile()) { + ciMethod* method = if_instr->profiled_method(); + assert(method != NULL, "method should be set if branch is profiled"); +@@ -956,10 +953,17 @@ + __ metadata2reg(md->constant_encoding(), md_reg); + + LIR_Opr data_offset_reg = new_pointer_register(); +- __ cmove(lir_cond(cond), +- LIR_OprFact::intptrConst(taken_count_offset), +- LIR_OprFact::intptrConst(not_taken_count_offset), +- data_offset_reg, as_BasicType(if_instr->x()->type())); ++ if (left == LIR_OprFact::illegalOpr && right == LIR_OprFact::illegalOpr) { ++ __ cmove(lir_cond(cond), ++ LIR_OprFact::intptrConst(taken_count_offset), ++ LIR_OprFact::intptrConst(not_taken_count_offset), ++ data_offset_reg, as_BasicType(if_instr->x()->type())); ++ } else { ++ __ cmp_cmove(lir_cond(cond), left, right, ++ LIR_OprFact::intptrConst(taken_count_offset), ++ LIR_OprFact::intptrConst(not_taken_count_offset), ++ data_offset_reg, as_BasicType(if_instr->x()->type())); ++ } + + // MDO cells are intptr_t, so the data_reg width is arch-dependent. + LIR_Opr data_reg = new_pointer_register(); +@@ -1316,8 +1320,8 @@ + } + + __ move(new LIR_Address(rcvr.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), temp, info); +- __ cmp(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0)); +- __ cmove(lir_cond_notEqual, LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN); ++ __ cmp_cmove(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0), ++ LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN); + } + + +@@ -1599,8 +1603,8 @@ + + if (GenerateRangeChecks && needs_range_check) { + if (use_length) { +- __ cmp(lir_cond_belowEqual, length.result(), index.result()); +- __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result(), array.result())); ++ CodeStub* stub = new RangeCheckStub(range_check_info, index.result(), array.result()); ++ __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), T_INT, stub); + } else { + array_range_check(array.result(), index.result(), null_check_info, range_check_info); + // range_check also does the null check +@@ -1778,12 +1782,9 @@ + CodeEmitInfo* info = state_for(x); + CodeStub* stub = new RangeCheckStub(info, index.result()); + if (index.result()->is_constant()) { +- cmp_mem_int(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), info); +- __ branch(lir_cond_belowEqual, T_INT, stub); ++ cmp_mem_int_branch(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), stub, info); + } else { +- cmp_reg_mem(lir_cond_aboveEqual, index.result(), buf.result(), +- java_nio_Buffer::limit_offset(), T_INT, info); +- __ branch(lir_cond_aboveEqual, T_INT, stub); ++ cmp_reg_mem_branch(lir_cond_aboveEqual, index.result(), buf.result(), java_nio_Buffer::limit_offset(), T_INT, stub, info); + } + __ move(index.result(), result); + } else { +@@ -1861,8 +1862,8 @@ + } else if (use_length) { + // TODO: use a (modified) version of array_range_check that does not require a + // constant length to be loaded to a register +- __ cmp(lir_cond_belowEqual, length.result(), index.result()); +- __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result(), array.result())); ++ CodeStub* stub = new RangeCheckStub(range_check_info, index.result(), array.result()); ++ __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), T_INT, stub); + } else { + array_range_check(array.result(), index.result(), null_check_info, range_check_info); + // The range check performs the null check, so clear it out for the load +@@ -2235,19 +2236,14 @@ + int high_key = one_range->high_key(); + BlockBegin* dest = one_range->sux(); + if (low_key == high_key) { +- __ cmp(lir_cond_equal, value, low_key); +- __ branch(lir_cond_equal, T_INT, dest); ++ __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest); + } else if (high_key - low_key == 1) { +- __ cmp(lir_cond_equal, value, low_key); +- __ branch(lir_cond_equal, T_INT, dest); +- __ cmp(lir_cond_equal, value, high_key); +- __ branch(lir_cond_equal, T_INT, dest); ++ __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest); ++ __ cmp_branch(lir_cond_equal, value, high_key, T_INT, dest); + } else { + LabelObj* L = new LabelObj(); +- __ cmp(lir_cond_less, value, low_key); +- __ branch(lir_cond_less, T_INT, L->label()); +- __ cmp(lir_cond_lessEqual, value, high_key); +- __ branch(lir_cond_lessEqual, T_INT, dest); ++ __ cmp_branch(lir_cond_less, value, low_key, T_INT, L->label()); ++ __ cmp_branch(lir_cond_lessEqual, value, high_key, T_INT, dest); + __ branch_destination(L->label()); + } + } +@@ -2347,12 +2343,11 @@ + __ move(LIR_OprFact::intptrConst(default_count_offset), data_offset_reg); + for (int i = 0; i < len; i++) { + int count_offset = md->byte_offset_of_slot(data, MultiBranchData::case_count_offset(i)); +- __ cmp(lir_cond_equal, value, i + lo_key); + __ move(data_offset_reg, tmp_reg); +- __ cmove(lir_cond_equal, +- LIR_OprFact::intptrConst(count_offset), +- tmp_reg, +- data_offset_reg, T_INT); ++ __ cmp_cmove(lir_cond_equal, value, LIR_OprFact::intConst(i + lo_key), ++ LIR_OprFact::intptrConst(count_offset), ++ tmp_reg, ++ data_offset_reg, T_INT); + } + + LIR_Opr data_reg = new_pointer_register(); +@@ -2366,8 +2361,7 @@ + do_SwitchRanges(create_lookup_ranges(x), value, x->default_sux()); + } else { + for (int i = 0; i < len; i++) { +- __ cmp(lir_cond_equal, value, i + lo_key); +- __ branch(lir_cond_equal, T_INT, x->sux_at(i)); ++ __ cmp_branch(lir_cond_equal, value, i + lo_key, T_INT, x->sux_at(i)); + } + __ jump(x->default_sux()); + } +@@ -2405,12 +2399,11 @@ + __ move(LIR_OprFact::intptrConst(default_count_offset), data_offset_reg); + for (int i = 0; i < len; i++) { + int count_offset = md->byte_offset_of_slot(data, MultiBranchData::case_count_offset(i)); +- __ cmp(lir_cond_equal, value, x->key_at(i)); + __ move(data_offset_reg, tmp_reg); +- __ cmove(lir_cond_equal, +- LIR_OprFact::intptrConst(count_offset), +- tmp_reg, +- data_offset_reg, T_INT); ++ __ cmp_cmove(lir_cond_equal, value, LIR_OprFact::intConst(x->key_at(i)), ++ LIR_OprFact::intptrConst(count_offset), ++ tmp_reg, ++ data_offset_reg, T_INT); + } + + LIR_Opr data_reg = new_pointer_register(); +@@ -2425,8 +2418,7 @@ + } else { + int len = x->length(); + for (int i = 0; i < len; i++) { +- __ cmp(lir_cond_equal, value, x->key_at(i)); +- __ branch(lir_cond_equal, T_INT, x->sux_at(i)); ++ __ cmp_branch(lir_cond_equal, value, x->key_at(i), T_INT, x->sux_at(i)); + } + __ jump(x->default_sux()); + } +@@ -2936,8 +2928,8 @@ + f_val.dont_load_item(); + LIR_Opr reg = rlock_result(x); + +- __ cmp(lir_cond(x->cond()), left.result(), right.result()); +- __ cmove(lir_cond(x->cond()), t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type())); ++ __ cmp_cmove(lir_cond(x->cond()), left.result(), right.result(), ++ t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type())); + } + + #ifdef JFR_HAVE_INTRINSICS +@@ -2981,8 +2973,7 @@ + __ move(LIR_OprFact::oopConst(NULL), result); + LIR_Opr jobj = new_register(T_METADATA); + __ move_wide(jobj_addr, jobj); +- __ cmp(lir_cond_equal, jobj, LIR_OprFact::metadataConst(0)); +- __ branch(lir_cond_equal, T_OBJECT, L_end->label()); ++ __ cmp_branch(lir_cond_equal, jobj, LIR_OprFact::metadataConst(0), T_OBJECT, L_end->label()); + + access_load(IN_NATIVE, T_OBJECT, LIR_OprFact::address(new LIR_Address(jobj, T_OBJECT)), result); + +@@ -3287,21 +3278,24 @@ + + void LIRGenerator::increment_backedge_counter_conditionally(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeEmitInfo* info, int left_bci, int right_bci, int bci) { + if (compilation()->count_backedges()) { ++ LIR_Opr step = new_register(T_INT); ++ LIR_Opr plus_one = LIR_OprFact::intConst(InvocationCounter::count_increment); ++ LIR_Opr zero = LIR_OprFact::intConst(0); + #if defined(X86) && !defined(_LP64) + // BEWARE! On 32-bit x86 cmp clobbers its left argument so we need a temp copy. + LIR_Opr left_copy = new_register(left->type()); + __ move(left, left_copy); + __ cmp(cond, left_copy, right); +-#else +- __ cmp(cond, left, right); +-#endif +- LIR_Opr step = new_register(T_INT); +- LIR_Opr plus_one = LIR_OprFact::intConst(InvocationCounter::count_increment); +- LIR_Opr zero = LIR_OprFact::intConst(0); + __ cmove(cond, + (left_bci < bci) ? plus_one : zero, + (right_bci < bci) ? plus_one : zero, + step, left->type()); ++#else ++ __ cmp_cmove(cond, left, right, ++ (left_bci < bci) ? plus_one : zero, ++ (right_bci < bci) ? plus_one : zero, ++ step, left->type()); ++#endif + increment_backedge_counter(info, step, bci); + } + } +@@ -3340,8 +3334,7 @@ + // DeoptimizeStub will reexecute from the current state in code info. + CodeStub* deopt = new DeoptimizeStub(info, Deoptimization::Reason_tenured, + Deoptimization::Action_make_not_entrant); +- __ cmp(lir_cond_lessEqual, result, LIR_OprFact::intConst(0)); +- __ branch(lir_cond_lessEqual, T_INT, deopt); ++ __ cmp_branch(lir_cond_lessEqual, result, LIR_OprFact::intConst(0), T_INT, deopt); + } + } + +@@ -3387,8 +3380,7 @@ + int freq = frequency << InvocationCounter::count_shift; + if (freq == 0) { + if (!step->is_constant()) { +- __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0)); +- __ branch(lir_cond_notEqual, T_ILLEGAL, overflow); ++ __ cmp_branch(lir_cond_notEqual, step, LIR_OprFact::intConst(0), T_ILLEGAL, overflow); + } else { + __ branch(lir_cond_always, T_ILLEGAL, overflow); + } +@@ -3396,12 +3388,11 @@ + LIR_Opr mask = load_immediate(freq, T_INT); + if (!step->is_constant()) { + // If step is 0, make sure the overflow check below always fails +- __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0)); +- __ cmove(lir_cond_notEqual, result, LIR_OprFact::intConst(InvocationCounter::count_increment), result, T_INT); ++ __ cmp_cmove(lir_cond_notEqual, step, LIR_OprFact::intConst(0), ++ result, LIR_OprFact::intConst(InvocationCounter::count_increment), result, T_INT); + } + __ logical_and(result, mask, result); +- __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0)); +- __ branch(lir_cond_equal, T_INT, overflow); ++ __ cmp_branch(lir_cond_equal, result, LIR_OprFact::intConst(0), T_INT, overflow); + } + __ branch_destination(overflow->continuation()); + } +@@ -3514,8 +3505,7 @@ + CodeEmitInfo *info = state_for(x, x->state()); + CodeStub* stub = new PredicateFailedStub(info); + +- __ cmp(lir_cond(cond), left, right); +- __ branch(lir_cond(cond), right->type(), stub); ++ __ cmp_branch(lir_cond(cond), left, right, right->type(), stub); + } + } + +@@ -3662,8 +3652,8 @@ + __ move(new LIR_Address(klass, in_bytes(Klass::layout_helper_offset()), T_INT), layout); + int diffbit = Klass::layout_helper_boolean_diffbit(); + __ logical_and(layout, LIR_OprFact::intConst(diffbit), layout); +- __ cmp(lir_cond_notEqual, layout, LIR_OprFact::intConst(0)); +- __ cmove(lir_cond_notEqual, value_fixed, value, value_fixed, T_BYTE); ++ __ cmp_cmove(lir_cond_notEqual, layout, LIR_OprFact::intConst(0), ++ value_fixed, value, value_fixed, T_BYTE); + value = value_fixed; + return value; + } +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_LIRGenerator.hpp b/src/hotspot/share/c1/c1_LIRGenerator.hpp +--- a/src/hotspot/share/c1/c1_LIRGenerator.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/c1/c1_LIRGenerator.hpp 2024-01-30 10:00:11.948097125 +0800 +@@ -363,8 +363,10 @@ + void new_instance (LIR_Opr dst, ciInstanceKlass* klass, bool is_unresolved, LIR_Opr scratch1, LIR_Opr scratch2, LIR_Opr scratch3, LIR_Opr scratch4, LIR_Opr klass_reg, CodeEmitInfo* info); + + // machine dependent +- void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); +- void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info); ++ template ++ void cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info); ++ template ++ void cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info); + + void arraycopy_helper(Intrinsic* x, int* flags, ciArrayKlass** expected_type); + +@@ -391,7 +393,7 @@ + + LIR_Opr safepoint_poll_register(); + +- void profile_branch(If* if_instr, If::Condition cond); ++ void profile_branch(If* if_instr, If::Condition cond, LIR_Opr left = LIR_OprFact::illegalOpr, LIR_Opr right = LIR_OprFact::illegalOpr); + void increment_event_counter_impl(CodeEmitInfo* info, + ciMethod *method, LIR_Opr step, int frequency, + int bci, bool backedge, bool notify); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp +--- a/src/hotspot/share/c1/c1_LIR.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/c1/c1_LIR.hpp 2024-01-30 10:00:11.948097125 +0800 +@@ -864,9 +864,11 @@ + class LIR_OpAllocObj; + class LIR_OpRoundFP; + class LIR_Op2; ++class LIR_OpCmpBranch; + class LIR_OpDelay; + class LIR_Op3; + class LIR_OpAllocArray; ++class LIR_Op4; + class LIR_OpCall; + class LIR_OpJavaCall; + class LIR_OpRTCall; +@@ -933,6 +935,8 @@ + , lir_cmp_l2i + , lir_ucmp_fd2i + , lir_cmp_fd2i ++ , lir_cmp_branch ++ , lir_cmp_float_branch + , lir_cmove + , lir_add + , lir_sub +@@ -964,6 +968,9 @@ + , lir_fmad + , lir_fmaf + , end_op3 ++ , begin_op4 ++ , lir_cmp_cmove ++ , end_op4 + , begin_opJavaCall + , lir_static_call + , lir_optvirtual_call +@@ -1128,12 +1135,14 @@ + virtual LIR_OpAllocObj* as_OpAllocObj() { return NULL; } + virtual LIR_OpRoundFP* as_OpRoundFP() { return NULL; } + virtual LIR_OpBranch* as_OpBranch() { return NULL; } ++ virtual LIR_OpCmpBranch* as_OpCmpBranch() { return NULL; } + virtual LIR_OpRTCall* as_OpRTCall() { return NULL; } + virtual LIR_OpConvert* as_OpConvert() { return NULL; } + virtual LIR_Op0* as_Op0() { return NULL; } + virtual LIR_Op1* as_Op1() { return NULL; } + virtual LIR_Op2* as_Op2() { return NULL; } + virtual LIR_Op3* as_Op3() { return NULL; } ++ virtual LIR_Op4* as_Op4() { return NULL; } + virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; } + virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; } + virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; } +@@ -1463,15 +1472,18 @@ + private: + Bytecodes::Code _bytecode; + ConversionStub* _stub; ++ LIR_Opr _tmp; + + public: +- LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub) ++ LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub, LIR_Opr tmp) + : LIR_Op1(lir_convert, opr, result) + , _stub(stub) +- , _bytecode(code) {} ++ , _bytecode(code) ++ , _tmp(tmp) {} + + Bytecodes::Code bytecode() const { return _bytecode; } + ConversionStub* stub() const { return _stub; } ++ LIR_Opr tmp() const { return _tmp; } + + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_OpConvert* as_OpConvert() { return this; } +@@ -1626,7 +1638,7 @@ + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) + , _tmp5(LIR_OprFact::illegalOpr) { +- assert(code == lir_cmp || code == lir_assert, "code check"); ++ assert(code == lir_cmp || code == lir_cmp_branch || code == lir_cmp_float_branch || code == lir_assert, "code check"); + } + + LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) +@@ -1658,7 +1670,7 @@ + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) + , _tmp5(LIR_OprFact::illegalOpr) { +- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); ++ assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check"); + } + + LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, +@@ -1674,7 +1686,7 @@ + , _tmp3(tmp3) + , _tmp4(tmp4) + , _tmp5(tmp5) { +- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); ++ assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check"); + } + + LIR_Opr in_opr1() const { return _opr1; } +@@ -1686,10 +1698,12 @@ + LIR_Opr tmp4_opr() const { return _tmp4; } + LIR_Opr tmp5_opr() const { return _tmp5; } + LIR_Condition condition() const { +- assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition; ++ assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); ++ return _condition; + } + void set_condition(LIR_Condition condition) { +- assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition; ++ assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove, "only valid for cmp and cmove"); ++ _condition = condition; + } + + void set_fpu_stack_size(int size) { _fpu_stack_size = size; } +@@ -1703,6 +1717,43 @@ + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; + }; + ++class LIR_OpCmpBranch: public LIR_Op2 { ++ friend class LIR_OpVisitState; ++ ++ private: ++ Label* _label; ++ BlockBegin* _block; // if this is a branch to a block, this is the block ++ BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block ++ CodeStub* _stub; // if this is a branch to a stub, this is the stub ++ ++ public: ++ LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, Label* lbl, CodeEmitInfo* info = NULL) ++ : LIR_Op2(lir_cmp_branch, cond, left, right, info) ++ , _label(lbl) ++ , _block(NULL) ++ , _ublock(NULL) ++ , _stub(NULL) { } ++ ++ LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info = NULL); ++ LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info = NULL); ++ ++ // for unordered comparisons ++ LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info = NULL); ++ ++ Label* label() const { return _label; } ++ BlockBegin* block() const { return _block; } ++ BlockBegin* ublock() const { return _ublock; } ++ CodeStub* stub() const { return _stub; } ++ ++ void change_block(BlockBegin* b); ++ void change_ublock(BlockBegin* b); ++ void negate_cond(); ++ ++ virtual void emit_code(LIR_Assembler* masm); ++ virtual LIR_OpCmpBranch* as_OpCmpBranch() { return this; } ++ virtual void print_instr(outputStream* out) const PRODUCT_RETURN; ++}; ++ + class LIR_OpAllocArray : public LIR_Op { + friend class LIR_OpVisitState; + +@@ -1767,6 +1818,48 @@ + }; + + ++class LIR_Op4: public LIR_Op { ++ friend class LIR_OpVisitState; ++ ++ private: ++ LIR_Opr _opr1; ++ LIR_Opr _opr2; ++ LIR_Opr _opr3; ++ LIR_Opr _opr4; ++ BasicType _type; ++ LIR_Condition _condition; ++ ++ void verify() const; ++ ++ public: ++ LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4, LIR_Opr result, BasicType type) ++ : LIR_Op(code, result, NULL) ++ , _opr1(opr1) ++ , _opr2(opr2) ++ , _opr3(opr3) ++ , _opr4(opr4) ++ , _type(type) ++ , _condition(condition) { ++ assert(is_in_range(code, begin_op4, end_op4), "code check"); ++ assert(type != T_ILLEGAL, "cmove should have type"); ++ } ++ LIR_Opr in_opr1() const { return _opr1; } ++ LIR_Opr in_opr2() const { return _opr2; } ++ LIR_Opr in_opr3() const { return _opr3; } ++ LIR_Opr in_opr4() const { return _opr4; } ++ BasicType type() const { return _type; } ++ LIR_Condition condition() const { ++ assert(code() == lir_cmp_cmove, "only valid for cmp cmove"); return _condition; ++ } ++ void set_condition(LIR_Condition condition) { ++ assert(code() == lir_cmp_cmove, "only valid for cmp cmove"); _condition = condition; ++ } ++ ++ virtual void emit_code(LIR_Assembler* masm); ++ virtual LIR_Op4* as_Op4() { return this; } ++ virtual void print_instr(outputStream* out) const PRODUCT_RETURN; ++}; ++ + //-------------------------------- + class LabelObj: public CompilationResourceObj { + private: +@@ -2115,7 +2208,9 @@ + + void safepoint(LIR_Opr tmp, CodeEmitInfo* info) { append(new LIR_Op1(lir_safepoint, tmp, info)); } + +- void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); } ++ void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL, LIR_Opr tmp = LIR_OprFact::illegalOpr) { ++ append(new LIR_OpConvert(code, left, dst, stub, tmp)); ++ } + + void logical_and (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_and, left, right, dst)); } + void logical_or (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_or, left, right, dst)); } +@@ -2146,6 +2241,15 @@ + cmp(condition, left, LIR_OprFact::intConst(right), info); + } + ++ // machine dependent ++ template ++ void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info = NULL); ++ template ++ void cmp_branch(LIR_Condition condition, LIR_Opr left, int right, BasicType type, T tgt, CodeEmitInfo* info = NULL) { ++ cmp_branch(condition, left, LIR_OprFact::intConst(right), type, tgt, info); ++ } ++ void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered); ++ + void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); + void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info); + +@@ -2153,6 +2257,9 @@ + append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type)); + } + ++ // machine dependent ++ void cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type); ++ + void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, + LIR_Opr t1, LIR_Opr t2, LIR_Opr result = LIR_OprFact::illegalOpr); + void cas_obj(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp +--- a/src/hotspot/share/code/nmethod.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/code/nmethod.cpp 2024-01-30 10:00:11.968096887 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "jvm.h" + #include "code/codeCache.hpp" +@@ -2155,7 +2161,8 @@ + //verify_interrupt_point(iter.addr()); + break; + case relocInfo::runtime_call_type: +- case relocInfo::runtime_call_w_cp_type: { ++ NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:) ++ { + address destination = iter.reloc()->value(); + // Right now there is no way to find out which entries support + // an interrupt point. It would be nice if we had this +@@ -2392,7 +2399,8 @@ + return st.as_string(); + } + case relocInfo::runtime_call_type: +- case relocInfo::runtime_call_w_cp_type: { ++ NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:) ++ { + stringStream st; + st.print("runtime_call"); + CallRelocation* r = (CallRelocation*)iter.reloc(); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/code/relocInfo.cpp b/src/hotspot/share/code/relocInfo.cpp +--- a/src/hotspot/share/code/relocInfo.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/code/relocInfo.cpp 2024-01-30 10:00:11.971430179 +0800 +@@ -433,6 +433,7 @@ + _cached_value = x0==0? NULL: address_from_scaled_offset(x0, point); + } + ++#ifndef MIPS64 + void runtime_call_w_cp_Relocation::pack_data_to(CodeSection * dest) { + short* p = pack_1_int_to((short *)dest->locs_end(), (jint)(_offset >> 2)); + dest->set_locs_end((relocInfo*) p); +@@ -441,6 +442,7 @@ + void runtime_call_w_cp_Relocation::unpack_data() { + _offset = unpack_1_int() << 2; + } ++#endif + + void static_stub_Relocation::pack_data_to(CodeSection* dest) { + short* p = (short*) dest->locs_end(); +@@ -910,7 +912,7 @@ + break; + } + case relocInfo::runtime_call_type: +- case relocInfo::runtime_call_w_cp_type: ++ NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:) + { + CallRelocation* r = (CallRelocation*) reloc(); + tty->print(" | [destination=" INTPTR_FORMAT "]", p2i(r->destination())); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/code/relocInfo.hpp b/src/hotspot/share/code/relocInfo.hpp +--- a/src/hotspot/share/code/relocInfo.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/code/relocInfo.hpp 2024-01-30 10:00:11.971430179 +0800 +@@ -269,7 +269,11 @@ + poll_return_type = 11, // polling instruction for safepoints at return + metadata_type = 12, // metadata that used to be oops + trampoline_stub_type = 13, // stub-entry for trampoline ++#ifndef MIPS64 + runtime_call_w_cp_type = 14, // Runtime call which may load its target from the constant pool ++#else ++ internal_pc_type = 14, // tag for internal data ++#endif + data_prefix_tag = 15, // tag for a prefix (carries data arguments) + type_mask = 15 // A mask which selects only the above values + }; +@@ -304,13 +308,13 @@ + visitor(static_call) \ + visitor(static_stub) \ + visitor(runtime_call) \ +- visitor(runtime_call_w_cp) \ ++ NOT_MIPS64(visitor(runtime_call_w_cp)) \ + visitor(external_word) \ + visitor(internal_word) \ + visitor(poll) \ + visitor(poll_return) \ +- visitor(section_word) \ + visitor(trampoline_stub) \ ++ NOT_MIPS64(visitor(section_word))MIPS64_ONLY(ZERO_ONLY(visitor(section_word))NOT_ZERO(visitor(internal_pc))) + + + public: +@@ -1174,6 +1178,15 @@ + }; + + ++#ifdef MIPS64 ++// to handle the set_last_java_frame pc ++class internal_pc_Relocation : public Relocation { ++ relocInfo::relocType type() { return relocInfo::internal_pc_type; } ++ public: ++ address pc() { return pd_get_address_from_code(); } ++ void fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest); ++}; ++#else + class runtime_call_w_cp_Relocation : public CallRelocation { + relocInfo::relocType type() { return relocInfo::runtime_call_w_cp_type; } + +@@ -1202,6 +1215,7 @@ + void pack_data_to(CodeSection * dest); + void unpack_data(); + }; ++#endif + + // Trampoline Relocations. + // A trampoline allows to encode a small branch in the code, even if there +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/code/vtableStubs.cpp b/src/hotspot/share/code/vtableStubs.cpp +--- a/src/hotspot/share/code/vtableStubs.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/code/vtableStubs.cpp 2024-01-30 10:00:11.971430179 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "code/vtableStubs.hpp" + #include "compiler/compileBroker.hpp" +@@ -98,7 +104,11 @@ + + #if defined(PRODUCT) + // These values are good for the PRODUCT case (no tracing). ++#if defined MIPS64 || defined LOONGARCH64 ++ static const int first_vtableStub_size = 128; ++#else + static const int first_vtableStub_size = 64; ++#endif + static const int first_itableStub_size = 256; + #else + // These values are good for the non-PRODUCT case (when tracing can be switched on). +@@ -109,6 +119,7 @@ + // vtable itable + // aarch64: 460 324 + // arm: ? ? ++ // mips64: 728 328 + // ppc (linux, BE): 404 288 + // ppc (linux, LE): 356 276 + // ppc (AIX): 416 296 +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp +--- a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp 2024-01-30 10:00:11.981430060 +0800 +@@ -74,7 +74,6 @@ + // Read the marking-in-progress flag. + LIR_Opr flag_val = gen->new_register(T_INT); + __ load(mark_active_flag_addr, flag_val); +- __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0)); + + LIR_PatchCode pre_val_patch_code = lir_patch_none; + +@@ -103,7 +102,7 @@ + slow = new G1PreBarrierStub(pre_val); + } + +- __ branch(lir_cond_notEqual, T_INT, slow); ++ __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow); + __ branch_destination(slow->continuation()); + } + +@@ -168,10 +167,9 @@ + } + assert(new_val->is_register(), "must be a register at this point"); + +- __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD)); +- + CodeStub* slow = new G1PostBarrierStub(addr, new_val); +- __ branch(lir_cond_notEqual, LP64_ONLY(T_LONG) NOT_LP64(T_INT), slow); ++ __ cmp_branch(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD), ++ LP64_ONLY(T_LONG) NOT_LP64(T_INT), slow); + __ branch_destination(slow->continuation()); + } + +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp b/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp +--- a/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp 2024-01-30 10:00:11.991429941 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_GC_G1_G1MARKSTACK_INLINE_HPP + #define SHARE_VM_GC_G1_G1MARKSTACK_INLINE_HPP + +@@ -71,6 +77,7 @@ + _oop_stack.push(obj); + assert(_bitmap->is_marked(obj), "Must be marked now - map self"); + } else { ++ DEBUG_ONLY(OrderAccess::loadload()); + assert(_bitmap->is_marked(obj) || G1ArchiveAllocator::is_closed_archive_object(obj), + "Must be marked by other or closed archive object"); + } +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp b/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp +--- a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp 2024-01-30 10:00:12.004763116 +0800 +@@ -51,8 +51,9 @@ + inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) { + if (p != NULL) { // XXX: error if p != NULL here + oop o = RawAccess::oop_load(p); +- if (o->is_forwarded()) { +- o = o->forwardee(); ++ markOop m = o->mark_raw(); ++ if (m->is_marked()) { ++ o = (oop) m->decode_pointer(); + // Card mark + if (PSScavenge::is_obj_in_young(o)) { + PSScavenge::card_table()->inline_write_ref_field_gc(p, o); +@@ -282,13 +283,17 @@ + assert(should_scavenge(p, true), "revisiting object?"); + + oop o = RawAccess::oop_load(p); +- oop new_obj = o->is_forwarded() +- ? o->forwardee() +- : copy_to_survivor_space(o); ++ oop new_obj; ++ markOop m = o->mark_raw(); ++ if (m->is_marked()) { ++ new_obj = (oop) m->decode_pointer(); ++ } else { ++ new_obj = copy_to_survivor_space(o); ++ } + + // This code must come after the CAS test, or it will print incorrect + // information. +- if (log_develop_is_enabled(Trace, gc, scavenge) && o->is_forwarded()) { ++ if (log_develop_is_enabled(Trace, gc, scavenge) && m->is_marked()) { + log_develop_trace(gc, scavenge)("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}", + "forwarding", + new_obj->klass()->internal_name(), p2i((void *)o), p2i((void *)new_obj), new_obj->size()); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/parallel/psScavenge.inline.hpp b/src/hotspot/share/gc/parallel/psScavenge.inline.hpp +--- a/src/hotspot/share/gc/parallel/psScavenge.inline.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/gc/parallel/psScavenge.inline.hpp 2024-01-30 10:00:12.008096410 +0800 +@@ -104,8 +104,9 @@ + + oop o = *p; + oop new_obj; +- if (o->is_forwarded()) { +- new_obj = o->forwardee(); ++ markOop m = o->mark_raw(); ++ if (m->is_marked()) { ++ new_obj = (oop) m->decode_pointer(); + } else { + new_obj = _pm->copy_to_survivor_space(o); + } +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp b/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp +--- a/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp 2024-01-30 10:00:12.011429704 +0800 +@@ -192,8 +192,7 @@ + /* Normalize boolean value returned by unsafe operation, i.e., value != 0 ? value = true : value false. */ + if (mask_boolean) { + LabelObj* equalZeroLabel = new LabelObj(); +- __ cmp(lir_cond_equal, result, 0); +- __ branch(lir_cond_equal, T_BOOLEAN, equalZeroLabel->label()); ++ __ cmp_branch(lir_cond_equal, result, 0, T_BOOLEAN, equalZeroLabel->label()); + __ move(LIR_OprFact::intConst(1), result); + __ branch_destination(equalZeroLabel->label()); + } +@@ -320,14 +319,12 @@ + referent_off = gen->new_register(T_LONG); + __ move(LIR_OprFact::longConst(java_lang_ref_Reference::referent_offset), referent_off); + } +- __ cmp(lir_cond_notEqual, offset, referent_off); +- __ branch(lir_cond_notEqual, offset->type(), cont->label()); ++ __ cmp_branch(lir_cond_notEqual, offset, referent_off, offset->type(), cont->label()); + } + if (gen_source_check) { + // offset is a const and equals referent offset + // if (source == null) -> continue +- __ cmp(lir_cond_equal, base_reg, LIR_OprFact::oopConst(NULL)); +- __ branch(lir_cond_equal, T_OBJECT, cont->label()); ++ __ cmp_branch(lir_cond_equal, base_reg, LIR_OprFact::oopConst(NULL), T_OBJECT, cont->label()); + } + LIR_Opr src_klass = gen->new_register(T_METADATA); + if (gen_type_check) { +@@ -337,8 +334,7 @@ + LIR_Address* reference_type_addr = new LIR_Address(src_klass, in_bytes(InstanceKlass::reference_type_offset()), T_BYTE); + LIR_Opr reference_type = gen->new_register(T_INT); + __ move(reference_type_addr, reference_type); +- __ cmp(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE)); +- __ branch(lir_cond_equal, T_INT, cont->label()); ++ __ cmp_branch(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE), T_INT, cont->label()); + } + } + } +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp b/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp +--- a/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp 2024-01-30 10:00:12.011429704 +0800 +@@ -89,8 +89,7 @@ + __ move(card_addr, cur_value); + + LabelObj* L_already_dirty = new LabelObj(); +- __ cmp(lir_cond_equal, cur_value, dirty); +- __ branch(lir_cond_equal, T_BYTE, L_already_dirty->label()); ++ __ cmp_branch(lir_cond_equal, cur_value, dirty, T_BYTE, L_already_dirty->label()); + __ move(dirty, card_addr); + __ branch_destination(L_already_dirty->label()); + } else { +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp +--- a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp 2024-01-30 10:00:12.021429583 +0800 +@@ -73,7 +73,6 @@ + // Read the marking-in-progress flag. + LIR_Opr flag_val = gen->new_register(T_INT); + __ load(mark_active_flag_addr, flag_val); +- __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0)); + + LIR_PatchCode pre_val_patch_code = lir_patch_none; + +@@ -101,7 +100,7 @@ + slow = new ShenandoahPreBarrierStub(pre_val); + } + +- __ branch(lir_cond_notEqual, T_INT, slow); ++ __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow); + __ branch_destination(slow->continuation()); + } + +@@ -144,10 +143,9 @@ + __ logical_and(flag_val, mask_reg, masked_flag); + flag_val = masked_flag; + } +- __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0)); + + CodeStub* slow = new ShenandoahLoadReferenceBarrierStub(obj, addr, result, tmp1, tmp2); +- __ branch(lir_cond_notEqual, T_INT, slow); ++ __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow); + __ branch_destination(slow->continuation()); + + return result; +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp +--- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp 2024-01-30 10:00:12.031429464 +0800 +@@ -105,15 +105,20 @@ + + virtual void visit(LIR_OpVisitState* state) { + state->do_input(_opr); ++ if (_result->is_valid()) { ++ state->do_temp(_opr); ++ state->do_output(_result); ++ } + } + + virtual void emit_code(LIR_Assembler* ce) { +- ZBarrierSet::assembler()->generate_c1_load_barrier_test(ce, _opr); ++ ZBarrierSet::assembler()->generate_c1_load_barrier_test(ce, _opr, result_opr()); + } + + virtual void print_instr(outputStream* out) const { + _opr->print(out); + out->print(" "); ++ result_opr()->print(out); + } + + #ifndef PRODUCT +@@ -149,13 +154,21 @@ + #endif + + void ZBarrierSetC1::load_barrier(LIRAccess& access, LIR_Opr result) const { ++ LIR_Op* op = new LIR_OpZLoadBarrierTest(result); ++ + // Fast path +- __ append(new LIR_OpZLoadBarrierTest(result)); ++ __ append(op); + + // Slow path + const address runtime_stub = load_barrier_on_oop_field_preloaded_runtime_stub(access.decorators()); + CodeStub* const stub = new ZLoadBarrierStubC1(access, result, runtime_stub); +- __ branch(lir_cond_notEqual, T_ADDRESS, stub); ++ if (ZPlatformLoadBarrierTestResultInRegister) { ++ LIR_Opr res = access.gen()->new_register(result->type()); ++ op->set_result_opr(res); ++ __ cmp_branch(lir_cond_notEqual, res, LIR_OprFact::intptrConst(NULL_WORD), T_ADDRESS, stub); ++ } else { ++ __ branch(lir_cond_notEqual, T_ADDRESS, stub); ++ } + __ branch_destination(stub->continuation()); + } + +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp +--- a/src/hotspot/share/interpreter/interpreterRuntime.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp 2024-01-30 10:00:12.041429345 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/javaClasses.inline.hpp" + #include "classfile/systemDictionary.hpp" +@@ -1497,7 +1503,7 @@ + // preparing the same method will be sure to see non-null entry & mirror. + IRT_END + +-#if defined(IA32) || defined(AMD64) || defined(ARM) ++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(MIPS64) || defined(LOONGARCH64) + IRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address)) + if (src_address == dest_address) { + return; +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/interpreter/interpreterRuntime.hpp b/src/hotspot/share/interpreter/interpreterRuntime.hpp +--- a/src/hotspot/share/interpreter/interpreterRuntime.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/interpreter/interpreterRuntime.hpp 2024-01-30 10:00:12.041429345 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP + #define SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP + +@@ -146,7 +152,7 @@ + Method* method, + intptr_t* from, intptr_t* to); + +-#if defined(IA32) || defined(AMD64) || defined(ARM) ++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(MIPS64) || defined(LOONGARCH64) + // Popframe support (only needed on x86, AMD64 and ARM) + static void popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address); + #endif +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp +--- a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp 2024-01-30 10:00:12.044762639 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP + #define SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP + +@@ -114,9 +120,9 @@ + void restore_native_result(void); + #endif // SPARC + +-#ifdef AARCH64 ++#if defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH64) + void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs); +-#endif // AARCH64 ++#endif // AARCH64 || MIPS64 || LOONGARCH64 + + #ifdef PPC + void lock_method(Register Rflags, Register Rscratch1, Register Rscratch2, bool flags_preloaded=false); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +--- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp 2024-01-30 10:00:12.054762520 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_JFR_UTILITIES_JFRBIGENDIAN_HPP + #define SHARE_VM_JFR_UTILITIES_JFRBIGENDIAN_HPP + +@@ -102,7 +108,7 @@ + inline bool JfrBigEndian::platform_supports_unaligned_reads(void) { + #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390) + return true; +-#elif defined(SPARC) || defined(ARM) || defined(AARCH64) ++#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH) + return false; + #else + #warning "Unconfigured platform" +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp 2024-01-30 10:00:12.061429106 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "code/codeBlob.hpp" + #include "compiler/abstractCompiler.hpp" +@@ -714,6 +720,35 @@ + + #endif + ++ ++#ifdef LOONGARCH64 ++ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) ++ ++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ declare_constant(VM_Version::CPU_LA32) \ ++ declare_constant(VM_Version::CPU_LA64) \ ++ declare_constant(VM_Version::CPU_LLEXC) \ ++ declare_constant(VM_Version::CPU_SCDLY) \ ++ declare_constant(VM_Version::CPU_LLDBAR) \ ++ declare_constant(VM_Version::CPU_LBT_X86) \ ++ declare_constant(VM_Version::CPU_LBT_ARM) \ ++ declare_constant(VM_Version::CPU_LBT_MIPS) \ ++ declare_constant(VM_Version::CPU_CCDMA) \ ++ declare_constant(VM_Version::CPU_COMPLEX) \ ++ declare_constant(VM_Version::CPU_FP) \ ++ declare_constant(VM_Version::CPU_CRYPTO) \ ++ declare_constant(VM_Version::CPU_LSX) \ ++ declare_constant(VM_Version::CPU_LASX) \ ++ declare_constant(VM_Version::CPU_LAM) \ ++ declare_constant(VM_Version::CPU_LLSYNC) \ ++ declare_constant(VM_Version::CPU_TGTSYNC) \ ++ declare_constant(VM_Version::CPU_ULSYNC) \ ++ declare_constant(VM_Version::CPU_UAL) ++ ++#endif ++ + + #ifdef X86 + +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp +--- a/src/hotspot/share/memory/metaspace.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/memory/metaspace.cpp 2024-01-30 10:00:12.064762400 +0800 +@@ -1083,12 +1083,12 @@ + // Don't use large pages for the class space. + bool large_pages = false; + +-#if !(defined(AARCH64) || defined(PPC64)) ++#if !(defined(AARCH64) || defined(PPC64) || defined(MIPS64) || defined(LOONGARCH64)) + ReservedSpace metaspace_rs = ReservedSpace(compressed_class_space_size(), + _reserve_alignment, + large_pages, + requested_addr); +-#else // AARCH64 || PPC64 ++#else // AARCH64 || PPC64 || MIPS64 || LOONGARCH64 + + ReservedSpace metaspace_rs; + +@@ -1114,7 +1114,8 @@ + // below 32g to get a zerobased CCS. For simplicity we reuse the search + // strategy for AARCH64. + +- size_t increment = AARCH64_ONLY(4*)G; ++ // MIPS: Cannot mmap for 1G space at 4G position, and prepare for future optimization. ++ size_t increment = AARCH64_ONLY(4*)MIPS64_ONLY(4*)LOONGARCH64_ONLY(4*)G; + for (char *a = align_up(requested_addr, increment); + a < (char*)(1024*G); + a += increment) { +@@ -1145,7 +1146,7 @@ + } + } + +-#endif // AARCH64 || PPC64 ++#endif // AARCH64 || PPC64 || MIPS64 || LOONGARCH64 + + if (!metaspace_rs.is_reserved()) { + #if INCLUDE_CDS +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/oops/oop.inline.hpp b/src/hotspot/share/oops/oop.inline.hpp +--- a/src/hotspot/share/oops/oop.inline.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/oops/oop.inline.hpp 2024-01-30 10:00:12.074762281 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_OOPS_OOP_INLINE_HPP + #define SHARE_VM_OOPS_OOP_INLINE_HPP + +@@ -389,7 +395,7 @@ + // forwarding pointer. + oldMark = curMark; + } +- return forwardee(); ++ return (oop)oldMark->decode_pointer(); + } + + // Note that the forwardee is not the same thing as the displaced_mark. +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp +--- a/src/hotspot/share/opto/compile.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/opto/compile.hpp 2024-01-30 10:00:12.081428868 +0800 +@@ -1204,7 +1204,7 @@ + bool in_scratch_emit_size() const { return _in_scratch_emit_size; } + + enum ScratchBufferBlob { +-#if defined(PPC64) ++#if defined(PPC64) || defined(MIPS64) || defined(LOONGARCH64) + MAX_inst_size = 2048, + #else + MAX_inst_size = 1024, +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp +--- a/src/hotspot/share/opto/output.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/opto/output.cpp 2024-01-30 10:00:12.094762043 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "asm/assembler.inline.hpp" + #include "asm/macroAssembler.inline.hpp" +@@ -731,6 +737,27 @@ + // Add the safepoint in the DebugInfoRecorder + if( !mach->is_MachCall() ) { + mcall = NULL; ++#if defined(MIPS) || defined(LOONGARCH) ++ // safepoint_pc_offset should point to tha last instruction in safePoint. ++ // In X86 and sparc, their safePoints only contain one instruction. ++ // However, we should add current_offset with the size of safePoint in MIPS. ++ // 0x2d6ff22c: lw s2, 0x14(s2) ++ // last_pd->pc_offset()=308, pc_offset=304, bci=64 ++ // last_pd->pc_offset()=312, pc_offset=312, bci=64 ++ // src/hotspot/share/code/debugInfoRec.cpp:295, assert(last_pd->pc_offset() == pc_offset, "must be last pc") ++ // ++ // ;; Safepoint: ++ // ---> pc_offset=304 ++ // 0x2d6ff230: lui at, 0x2b7a ; OopMap{s2=Oop s5=Oop t4=Oop off=308} ++ // ;*goto ++ // ; - java.util.Hashtable::get@64 (line 353) ++ // ---> last_pd(308) ++ // 0x2d6ff234: lw at, 0xffffc100(at) ;*goto ++ // ; - java.util.Hashtable::get@64 (line 353) ++ // ; {poll} ++ // 0x2d6ff238: addiu s0, zero, 0x0 ++ safepoint_pc_offset += sfn->size(_regalloc) - 4; ++#endif + debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map); + } else { + mcall = mach->as_MachCall(); +@@ -1393,6 +1420,22 @@ + DEBUG_ONLY(uint instr_offset = cb->insts_size()); + n->emit(*cb, _regalloc); + current_offset = cb->insts_size(); ++#if defined(MIPS) || defined(LOONGARCH) ++ if (!n->is_Proj() && (cb->insts()->end() != badAddress)) { ++ // For MIPS, the first instruction of the previous node (usually a instruction sequence) sometime ++ // is not the instruction which access memory. adjust is needed. previous_offset points to the ++ // instruction which access memory. Instruction size is 4. cb->insts_size() and ++ // cb->insts()->end() are the location of current instruction. ++ int adjust = 4; ++ NativeInstruction* inst = (NativeInstruction*) (cb->insts()->end() - 4); ++ if (inst->is_sync()) { ++ // a sync may be the last instruction, see store_B_immI_enc_sync ++ adjust += 4; ++ inst = (NativeInstruction*) (cb->insts()->end() - 8); ++ } ++ previous_offset = current_offset - adjust; ++ } ++#endif + + // Above we only verified that there is enough space in the instruction section. + // However, the instruction may emit stubs that cause code buffer expansion. +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp +--- a/src/hotspot/share/opto/type.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/opto/type.cpp 2024-01-30 10:00:12.101428630 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "ci/ciMethodData.hpp" + #include "ci/ciTypeFlow.hpp" +@@ -78,6 +84,12 @@ + { Bad, T_ILLEGAL, "vectorx:", false, 0, relocInfo::none }, // VectorX + { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY + { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ ++#elif defined(LOONGARCH64) ++ { Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS ++ { Bad, T_ILLEGAL, "vectord:", false, 0, relocInfo::none }, // VectorD ++ { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX ++ { Bad, T_ILLEGAL, "vectory:", false, Op_VecY, relocInfo::none }, // VectorY ++ { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ + #else // all other + { Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS + { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/runtime/java.cpp b/src/hotspot/share/runtime/java.cpp +--- a/src/hotspot/share/runtime/java.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/runtime/java.cpp 2024-01-30 10:00:12.118095097 +0800 +@@ -68,6 +68,7 @@ + #include "runtime/thread.inline.hpp" + #include "runtime/timer.hpp" + #include "runtime/vmOperations.hpp" ++#include "runtime/vmThread.hpp" + #include "services/memTracker.hpp" + #include "utilities/dtrace.hpp" + #include "utilities/globalDefinitions.hpp" +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp +--- a/src/hotspot/share/runtime/objectMonitor.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/runtime/objectMonitor.cpp 2024-01-30 10:00:12.121428391 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2023, These ++ * modifications are Copyright (c) 2023, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/vmSymbols.hpp" + #include "jfr/jfrEvents.hpp" +@@ -308,6 +314,9 @@ + } + + assert(_owner != Self, "invariant"); ++ // Thread _succ != current assertion load reording before Thread if (_succ == current) _succ = nullptr. ++ // But expect order is firstly if (_succ == current) _succ = nullptr then _succ != current assertion. ++ DEBUG_ONLY(LOONGARCH64_ONLY(__asm__ __volatile__ ("dbar 0x700\n");)MIPS64_ONLY(OrderAccess::loadload();)) + assert(_succ != Self, "invariant"); + assert(Self->is_Java_thread(), "invariant"); + JavaThread * jt = (JavaThread *) Self; +@@ -469,6 +478,7 @@ + } + + // The Spin failed -- Enqueue and park the thread ... ++ DEBUG_ONLY(LOONGARCH64_ONLY(__asm__ __volatile__ ("dbar 0x700\n");)MIPS64_ONLY(OrderAccess::loadload();)) + assert(_succ != Self, "invariant"); + assert(_owner != Self, "invariant"); + assert(_Responsible != Self, "invariant"); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp +--- a/src/hotspot/share/runtime/os.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/runtime/os.cpp 2024-01-30 10:00:12.121428391 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "jvm.h" + #include "classfile/classLoader.hpp" +@@ -1242,7 +1248,8 @@ + if ((uintptr_t)fr->sender_sp() == (uintptr_t)-1 || is_pointer_bad(fr->sender_sp())) return true; + + uintptr_t old_fp = (uintptr_t)fr->link_or_null(); +- if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp || ++ // The check for old_fp and ufp is harmful on LoongArch and MIPS due to their special ABIs. ++ if (old_fp == 0 || old_fp == (uintptr_t)-1 NOT_LOONGARCH64_AND_MIPS64(|| old_fp == ufp) || + is_pointer_bad(fr->link_or_null())) return true; + + // stack grows downwards; if old_fp is below current fp or if the stack +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp +--- a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp 2024-01-30 10:00:12.128094978 +0800 +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2015, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ ++ + #include "precompiled.hpp" + #include "jni.h" + #include "runtime/interfaceSupport.inline.hpp" +@@ -512,6 +519,14 @@ + * sin(x) = x + (S1*x + (x *(r-y/2)+y)) + */ + ++#if defined(MIPS)|| defined(LOONGARCH) ++#undef S1 ++#undef S2 ++#undef S3 ++#undef S4 ++#undef S5 ++#undef S6 ++#endif + static const double + S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ + S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/utilities/globalDefinitions.hpp b/src/hotspot/share/utilities/globalDefinitions.hpp +--- a/src/hotspot/share/utilities/globalDefinitions.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/utilities/globalDefinitions.hpp 2024-01-30 10:00:12.141428153 +0800 +@@ -1161,6 +1161,15 @@ + return log2_long(x); + } + ++#if defined(MIPS64) || defined(LOONGARCH64) ++// returns integer round-up to the nearest multiple of s (s must be a power of two) ++inline intptr_t round_to(intptr_t x, uintx s) { ++ assert(is_power_of_2(s), "s must be a power of 2: " JLONG_FORMAT, x); ++ const uintx m = s - 1; ++ return mask_bits(x + m, ~m); ++} ++#endif ++ + inline bool is_odd (intx x) { return x & 1; } + inline bool is_even(intx x) { return !is_odd(x); } + +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp +--- a/src/hotspot/share/utilities/macros.hpp 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/hotspot/share/utilities/macros.hpp 2024-01-30 10:00:12.144761447 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_UTILITIES_MACROS_HPP + #define SHARE_VM_UTILITIES_MACROS_HPP + +@@ -535,6 +541,38 @@ + #define NOT_SPARC(code) code + #endif + ++#ifdef MIPS64 ++#ifndef MIPS ++#define MIPS ++#endif ++#define MIPS64_ONLY(code) code ++#define NOT_MIPS64(code) ++#else ++#undef MIPS ++#define MIPS64_ONLY(code) ++#define NOT_MIPS64(code) code ++#endif ++ ++#ifdef LOONGARCH64 ++#ifndef LOONGARCH ++#define LOONGARCH ++#endif ++#define LOONGARCH64_ONLY(code) code ++#define NOT_LOONGARCH64(code) ++#else ++#undef LOONGARCH ++#define LOONGARCH64_ONLY(code) ++#define NOT_LOONGARCH64(code) code ++#endif ++ ++#if defined(MIPS64) || defined(LOONGARCH64) ++#define LOONGARCH64_AND_MIPS64_ONLY(code) code ++#define NOT_LOONGARCH64_AND_MIPS64(code) ++#else ++#define LOONGARCH64_AND_MIPS64_ONLY(code) ++#define NOT_LOONGARCH64_AND_MIPS64(code) code ++#endif ++ + #if defined(PPC32) || defined(PPC64) + #ifndef PPC + #define PPC +@@ -627,16 +665,34 @@ + // OS_CPU_HEADER(vmStructs) --> vmStructs_linux_sparc.hpp + // + // basename.hpp / basename.inline.hpp ++#if defined(MIPS) && !defined(ZERO) ++#define CPU_HEADER_H(basename) XSTR(basename ## _mips.h) ++#define CPU_HEADER(basename) XSTR(basename ## _mips.hpp) ++#define CPU_HEADER_INLINE(basename) XSTR(basename ## _mips.inline.hpp) ++#elif defined(LOONGARCH) && !defined(ZERO) ++#define CPU_HEADER_H(basename) XSTR(basename ## _loongarch.h) ++#define CPU_HEADER(basename) XSTR(basename ## _loongarch.hpp) ++#define CPU_HEADER_INLINE(basename) XSTR(basename ## _loongarch.inline.hpp) ++#else + #define CPU_HEADER_H(basename) XSTR(CPU_HEADER_STEM(basename).h) + #define CPU_HEADER(basename) XSTR(CPU_HEADER_STEM(basename).hpp) + #define CPU_HEADER_INLINE(basename) XSTR(CPU_HEADER_STEM(basename).inline.hpp) ++#endif + // basename.hpp / basename.inline.hpp + #define OS_HEADER_H(basename) XSTR(OS_HEADER_STEM(basename).h) + #define OS_HEADER(basename) XSTR(OS_HEADER_STEM(basename).hpp) + #define OS_HEADER_INLINE(basename) XSTR(OS_HEADER_STEM(basename).inline.hpp) + // basename.hpp / basename.inline.hpp ++#if defined(MIPS) && !defined(ZERO) ++#define OS_CPU_HEADER(basename) XSTR(basename ## _linux_mips.hpp) ++#define OS_CPU_HEADER_INLINE(basename) XSTR(basename ## _linux_mips.inline.hpp) ++#elif defined(LOONGARCH) && !defined(ZERO) ++#define OS_CPU_HEADER(basename) XSTR(basename ## _linux_loongarch.hpp) ++#define OS_CPU_HEADER_INLINE(basename) XSTR(basename ## _linux_loongarch.inline.hpp) ++#else + #define OS_CPU_HEADER(basename) XSTR(OS_CPU_HEADER_STEM(basename).hpp) + #define OS_CPU_HEADER_INLINE(basename) XSTR(OS_CPU_HEADER_STEM(basename).inline.hpp) ++#endif + // basename.hpp / basename.inline.hpp + #define COMPILER_HEADER(basename) XSTR(COMPILER_HEADER_STEM(basename).hpp) + #define COMPILER_HEADER_INLINE(basename) XSTR(COMPILER_HEADER_STEM(basename).inline.hpp) +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h 2024-01-30 10:00:13.224748568 +0800 +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ ++ + #ifndef _LIBPROC_H_ + #define _LIBPROC_H_ + +@@ -37,13 +44,17 @@ + #include + #define user_regs_struct pt_regs + #endif +-#if defined(aarch64) || defined(arm64) ++#if defined(aarch64) || defined(arm64) || defined(loongarch64) + #include + #define user_regs_struct user_pt_regs + #elif defined(arm) + #include + #define user_regs_struct pt_regs + #endif ++#if defined(mips) || defined(mipsel) || defined(mips64) || defined(mips64el) ++#include ++#define user_regs_struct pt_regs ++#endif + + // This C bool type must be int for compatibility with Linux calls and + // it would be a mistake to equivalence it to C++ bool on many platforms +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c 2024-01-30 10:00:13.224748568 +0800 +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ ++ + #include + #include "libproc.h" + #include "proc_service.h" +@@ -54,10 +61,18 @@ + #include "sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext.h" + #endif + ++#if defined(mips64) || defined(mips64el) ++#include "sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext.h" ++#endif ++ + #ifdef aarch64 + #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h" + #endif + ++#ifdef loongarch64 ++#include "sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext.h" ++#endif ++ + static jfieldID p_ps_prochandle_ID = 0; + static jfieldID threadList_ID = 0; + static jfieldID loadObjectList_ID = 0; +@@ -397,7 +412,7 @@ + return (err == PS_OK)? array : 0; + } + +-#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) ++#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(loongarch64) + JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0 + (JNIEnv *env, jobject this_obj, jint lwp_id) { + +@@ -425,9 +440,15 @@ + #if defined(sparc) || defined(sparcv9) + #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG + #endif ++#ifdef loongarch64 ++#define NPRGREG sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_NPRGREG ++#endif + #if defined(ppc64) || defined(ppc64le) + #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG + #endif ++#if defined(mips64) || defined(mips64el) ++#define NPRGREG sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_NPRGREG ++#endif + + + array = (*env)->NewLongArray(env, NPRGREG); +@@ -534,6 +555,18 @@ + } + #endif /* aarch64 */ + ++#if defined(loongarch64) ++ ++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_##reg ++ ++ { ++ int i; ++ for (i = 0; i < 31; i++) ++ regs[i] = gregs.regs[i]; ++ regs[REG_INDEX(PC)] = gregs.csr_era; ++ } ++#endif /* loongarch64 */ ++ + #if defined(ppc64) || defined(ppc64le) + #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg + +@@ -574,6 +607,45 @@ + + #endif + ++#if defined(mips64) || defined(mips64el) ++ ++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_##reg ++ ++ regs[REG_INDEX(ZERO)] = gregs.regs[0]; ++ regs[REG_INDEX(AT)] = gregs.regs[1]; ++ regs[REG_INDEX(V0)] = gregs.regs[2]; ++ regs[REG_INDEX(V1)] = gregs.regs[3]; ++ regs[REG_INDEX(A0)] = gregs.regs[4]; ++ regs[REG_INDEX(A1)] = gregs.regs[5]; ++ regs[REG_INDEX(A2)] = gregs.regs[6]; ++ regs[REG_INDEX(A3)] = gregs.regs[7]; ++ regs[REG_INDEX(T0)] = gregs.regs[8]; ++ regs[REG_INDEX(T1)] = gregs.regs[9]; ++ regs[REG_INDEX(T2)] = gregs.regs[10]; ++ regs[REG_INDEX(T3)] = gregs.regs[11]; ++ regs[REG_INDEX(T4)] = gregs.regs[12]; ++ regs[REG_INDEX(T5)] = gregs.regs[13]; ++ regs[REG_INDEX(T6)] = gregs.regs[14]; ++ regs[REG_INDEX(T7)] = gregs.regs[15]; ++ regs[REG_INDEX(S0)] = gregs.regs[16]; ++ regs[REG_INDEX(S1)] = gregs.regs[17]; ++ regs[REG_INDEX(S2)] = gregs.regs[18]; ++ regs[REG_INDEX(S3)] = gregs.regs[19]; ++ regs[REG_INDEX(S4)] = gregs.regs[20]; ++ regs[REG_INDEX(S5)] = gregs.regs[21]; ++ regs[REG_INDEX(S6)] = gregs.regs[22]; ++ regs[REG_INDEX(S7)] = gregs.regs[23]; ++ regs[REG_INDEX(T8)] = gregs.regs[24]; ++ regs[REG_INDEX(T9)] = gregs.regs[25]; ++ regs[REG_INDEX(K0)] = gregs.regs[26]; ++ regs[REG_INDEX(K1)] = gregs.regs[27]; ++ regs[REG_INDEX(GP)] = gregs.regs[28]; ++ regs[REG_INDEX(SP)] = gregs.regs[29]; ++ regs[REG_INDEX(FP)] = gregs.regs[30]; ++ regs[REG_INDEX(S8)] = gregs.regs[30]; ++ regs[REG_INDEX(RA)] = gregs.regs[31]; ++#endif /* mips */ ++ + (*env)->ReleaseLongArrayElements(env, array, regs, JNI_COMMIT); + return array; + } +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c 2024-01-30 10:00:13.224748568 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include + #include + #include +@@ -142,7 +148,7 @@ + #define PTRACE_GETREGS_REQ PT_GETREGS + #endif + +-#ifdef PTRACE_GETREGS_REQ ++#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64) + if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { + print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid); + return false; +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java 2024-01-30 10:00:13.238081742 +0800 +@@ -23,6 +23,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package sun.jvm.hotspot.debugger.linux; + + import java.io.*; +@@ -34,12 +40,16 @@ + import sun.jvm.hotspot.debugger.amd64.*; + import sun.jvm.hotspot.debugger.aarch64.*; + import sun.jvm.hotspot.debugger.sparc.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; + import sun.jvm.hotspot.debugger.ppc64.*; + import sun.jvm.hotspot.debugger.linux.x86.*; + import sun.jvm.hotspot.debugger.linux.amd64.*; + import sun.jvm.hotspot.debugger.linux.sparc.*; + import sun.jvm.hotspot.debugger.linux.ppc64.*; + import sun.jvm.hotspot.debugger.linux.aarch64.*; ++import sun.jvm.hotspot.debugger.linux.mips64.*; ++import sun.jvm.hotspot.debugger.linux.loongarch64.*; + import sun.jvm.hotspot.utilities.*; + + class LinuxCDebugger implements CDebugger { +@@ -102,7 +112,21 @@ + Address pc = context.getRegisterAsAddress(SPARCThreadContext.R_O7); + if (pc == null) return null; + return new LinuxSPARCCFrame(dbg, sp, pc, LinuxDebuggerLocal.getAddressSize()); +- } else if (cpu.equals("ppc64")) { ++ } else if (cpu.equals("mips64")) { ++ MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext(); ++ Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ if (sp == null) return null; ++ Address pc = context.getRegisterAsAddress(MIPS64ThreadContext.PC); ++ if (pc == null) return null; ++ return new LinuxMIPS64CFrame(dbg, sp, pc); ++ } else if (cpu.equals("loongarch64")) { ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext(); ++ Address fp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP); ++ if (fp == null) return null; ++ Address pc = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC); ++ if (pc == null) return null; ++ return new LinuxLOONGARCH64CFrame(dbg, fp, pc); ++ } else if (cpu.equals("ppc64")) { + PPC64ThreadContext context = (PPC64ThreadContext) thread.getContext(); + Address sp = context.getRegisterAsAddress(PPC64ThreadContext.SP); + if (sp == null) return null; +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java 2024-01-30 10:00:13.241415036 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package sun.jvm.hotspot.debugger.linux; + + import java.lang.reflect.*; +@@ -30,6 +36,8 @@ + import sun.jvm.hotspot.debugger.linux.x86.*; + import sun.jvm.hotspot.debugger.linux.ppc64.*; + import sun.jvm.hotspot.debugger.linux.sparc.*; ++import sun.jvm.hotspot.debugger.linux.mips64.*; ++import sun.jvm.hotspot.debugger.linux.loongarch64.*; + + class LinuxThreadContextFactory { + static ThreadContext createThreadContext(LinuxDebugger dbg) { +@@ -40,7 +48,11 @@ + return new LinuxAMD64ThreadContext(dbg); + } else if (cpu.equals("sparc")) { + return new LinuxSPARCThreadContext(dbg); +- } else if (cpu.equals("ppc64")) { ++ } else if (cpu.equals("mips64")) { ++ return new LinuxMIPS64ThreadContext(dbg); ++ } else if (cpu.equals("loongarch64")) { ++ return new LinuxLOONGARCH64ThreadContext(dbg); ++ } else if (cpu.equals("ppc64")) { + return new LinuxPPC64ThreadContext(dbg); + } else { + try { +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java 2024-01-30 10:00:13.241415036 +0800 +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.linux.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++import sun.jvm.hotspot.debugger.cdbg.basic.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++ ++final public class LinuxLOONGARCH64CFrame extends BasicCFrame { ++ // package/class internals only ++ public LinuxLOONGARCH64CFrame(LinuxDebugger dbg, Address fp, Address pc) { ++ super(dbg.getCDebugger()); ++ this.fp = fp; ++ this.pc = pc; ++ this.dbg = dbg; ++ } ++ ++ // override base class impl to avoid ELF parsing ++ public ClosestSymbol closestSymbolToPC() { ++ // try native lookup in debugger. ++ return dbg.lookup(dbg.getAddressValue(pc())); ++ } ++ ++ public Address pc() { ++ return pc; ++ } ++ ++ public Address localVariableBase() { ++ return fp; ++ } ++ ++ public CFrame sender(ThreadProxy thread) { ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext(); ++ Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); ++ Address nextFP; ++ Address nextPC; ++ ++ if ((fp == null) || fp.lessThan(sp)) { ++ return null; ++ } ++ ++ try { ++ nextFP = fp.getAddressAt(-2 * ADDRESS_SIZE); ++ } catch (Exception e) { ++ return null; ++ } ++ if (nextFP == null) { ++ return null; ++ } ++ ++ try { ++ nextPC = fp.getAddressAt(-1 * ADDRESS_SIZE); ++ } catch (Exception e) { ++ return null; ++ } ++ if (nextPC == null) { ++ return null; ++ } ++ ++ return new LinuxLOONGARCH64CFrame(dbg, nextFP, nextPC); ++ } ++ ++ private static final int ADDRESS_SIZE = 8; ++ private Address pc; ++ private Address fp; ++ private LinuxDebugger dbg; ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java 2024-01-30 10:00:13.241415036 +0800 +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.linux.*; ++ ++public class LinuxLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { ++ private LinuxDebugger debugger; ++ ++ public LinuxLOONGARCH64ThreadContext(LinuxDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java 2024-01-30 10:00:13.241415036 +0800 +@@ -0,0 +1,80 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.linux.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++import sun.jvm.hotspot.debugger.cdbg.basic.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++ ++final public class LinuxMIPS64CFrame extends BasicCFrame { ++ // package/class internals only ++ public LinuxMIPS64CFrame(LinuxDebugger dbg, Address ebp, Address pc) { ++ super(dbg.getCDebugger()); ++ this.ebp = ebp; ++ this.pc = pc; ++ this.dbg = dbg; ++ } ++ ++ // override base class impl to avoid ELF parsing ++ public ClosestSymbol closestSymbolToPC() { ++ // try native lookup in debugger. ++ return dbg.lookup(dbg.getAddressValue(pc())); ++ } ++ ++ public Address pc() { ++ return pc; ++ } ++ ++ public Address localVariableBase() { ++ return ebp; ++ } ++ ++ public CFrame sender(ThreadProxy thread) { ++ MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext(); ++ Address esp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ ++ if ( (ebp == null) || ebp.lessThan(esp) ) { ++ return null; ++ } ++ ++ Address nextEBP = ebp.getAddressAt( 0 * ADDRESS_SIZE); ++ if (nextEBP == null) { ++ return null; ++ } ++ Address nextPC = ebp.getAddressAt( 1 * ADDRESS_SIZE); ++ if (nextPC == null) { ++ return null; ++ } ++ return new LinuxMIPS64CFrame(dbg, nextEBP, nextPC); ++ } ++ ++ private static final int ADDRESS_SIZE = 4; ++ private Address pc; ++ private Address ebp; ++ private LinuxDebugger dbg; ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java 2024-01-30 10:00:13.241415036 +0800 +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.linux.*; ++ ++public class LinuxMIPS64ThreadContext extends MIPS64ThreadContext { ++ private LinuxDebugger debugger; ++ ++ public LinuxMIPS64ThreadContext(LinuxDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java 2024-01-30 10:00:13.241415036 +0800 +@@ -0,0 +1,128 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.loongarch64; ++ ++import java.lang.annotation.Native; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++ ++/** Specifies the thread context on loongarch64 platforms; only a sub-portion ++ of the context is guaranteed to be present on all operating ++ systems. */ ++ ++public abstract class LOONGARCH64ThreadContext implements ThreadContext { ++ ++ // NOTE: the indices for the various registers must be maintained as ++ // listed across various operating systems. However, only a small ++ // subset of the registers' values are guaranteed to be present (and ++ // must be present for the SA's stack walking to work): EAX, EBX, ++ // ECX, EDX, ESI, EDI, EBP, ESP, and EIP. ++ ++ // One instance of the Native annotation is enough to trigger header generation ++ // for this file. ++ @Native ++ public static final int ZERO = 0; ++ public static final int RA = 1; ++ public static final int TP = 2; ++ public static final int SP = 3; ++ public static final int A0 = 4; ++ public static final int A1 = 5; ++ public static final int A2 = 6; ++ public static final int A3 = 7; ++ public static final int A4 = 8; ++ public static final int A5 = 9; ++ public static final int A6 = 10; ++ public static final int A7 = 11; ++ public static final int T0 = 12; ++ public static final int T1 = 13; ++ public static final int T2 = 14; ++ public static final int T3 = 15; ++ public static final int T4 = 16; ++ public static final int T5 = 17; ++ public static final int T6 = 18; ++ public static final int T7 = 19; ++ public static final int T8 = 20; ++ public static final int RX = 21; ++ public static final int FP = 22; ++ public static final int S0 = 23; ++ public static final int S1 = 24; ++ public static final int S2 = 25; ++ public static final int S3 = 26; ++ public static final int S4 = 27; ++ public static final int S5 = 28; ++ public static final int S6 = 29; ++ public static final int S7 = 30; ++ public static final int S8 = 31; ++ public static final int PC = 32; ++ public static final int NPRGREG = 33; ++ ++ private static final String[] regNames = { ++ "ZERO", "RA", "TP", "SP", ++ "A0", "A1", "A2", "A3", ++ "A4", "A5", "A6", "A7", ++ "T0", "T1", "T2", "T3", ++ "T4", "T5", "T6", "T7", ++ "T8", "RX", "FP", "S0", ++ "S1", "S2", "S3", "S4", ++ "S5", "S6", "S7", "S8", ++ "PC" ++ }; ++ ++ private long[] data; ++ ++ public LOONGARCH64ThreadContext() { ++ data = new long[NPRGREG]; ++ } ++ ++ public int getNumRegisters() { ++ return NPRGREG; ++ } ++ ++ public String getRegisterName(int index) { ++ return regNames[index]; ++ } ++ ++ public void setRegister(int index, long value) { ++ data[index] = value; ++ } ++ ++ public long getRegister(int index) { ++ return data[index]; ++ } ++ ++ public CFrame getTopFrame(Debugger dbg) { ++ return null; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract void setRegisterAsAddress(int index, Address value); ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract Address getRegisterAsAddress(int index); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java 2024-01-30 10:00:13.234748449 +0800 +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger; ++ ++public class MachineDescriptionLOONGARCH64 extends MachineDescriptionTwosComplement implements MachineDescription { ++ public long getAddressSize() { ++ return 8; ++ } ++ ++ ++ public boolean isBigEndian() { ++ return false; ++ } ++ ++ public boolean isLP64() { ++ return true; ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java 2024-01-30 10:00:13.234748449 +0800 +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger; ++ ++public class MachineDescriptionMIPS64 extends MachineDescriptionTwosComplement implements MachineDescription { ++ public long getAddressSize() { ++ return 8; ++ } ++ ++ ++ public boolean isBigEndian() { ++ return "big".equals(System.getProperty("sun.cpu.endian")); ++ } ++ ++ public boolean isLP64() { ++ return true; ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java 2024-01-30 10:00:13.241415036 +0800 +@@ -0,0 +1,128 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.mips64; ++ ++import java.lang.annotation.Native; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++ ++/** Specifies the thread context on mips64 platforms; only a sub-portion ++ of the context is guaranteed to be present on all operating ++ systems. */ ++ ++public abstract class MIPS64ThreadContext implements ThreadContext { ++ ++ // NOTE: the indices for the various registers must be maintained as ++ // listed across various operating systems. However, only a small ++ // subset of the registers' values are guaranteed to be present (and ++ // must be present for the SA's stack walking to work): EAX, EBX, ++ // ECX, EDX, ESI, EDI, EBP, ESP, and EIP. ++ ++ // One instance of the Native annotation is enough to trigger header generation ++ // for this file. ++ @Native ++ public static final int ZERO = 0; ++ public static final int AT = 1; ++ public static final int V0 = 2; ++ public static final int V1 = 3; ++ public static final int A0 = 4; ++ public static final int A1 = 5; ++ public static final int A2 = 6; ++ public static final int A3 = 7; ++ public static final int T0 = 8; ++ public static final int T1 = 9; ++ public static final int T2 = 10; ++ public static final int T3 = 11; ++ public static final int T4 = 12; ++ public static final int T5 = 13; ++ public static final int T6 = 14; ++ public static final int T7 = 15; ++ public static final int S0 = 16; ++ public static final int S1 = 17; ++ public static final int S2 = 18; ++ public static final int S3 = 19; ++ public static final int S4 = 20; ++ public static final int S5 = 21; ++ public static final int S6 = 22; ++ public static final int S7 = 23; ++ public static final int T8 = 24; ++ public static final int T9 = 25; ++ public static final int K0 = 26; ++ public static final int K1 = 27; ++ public static final int GP = 28; ++ public static final int SP = 29; ++ public static final int FP = 30; ++ public static final int RA = 31; ++ public static final int PC = 32; ++ public static final int NPRGREG = 33; ++ ++ private static final String[] regNames = { ++ "ZERO", "AT", "V0", "V1", ++ "A0", "A1", "A2", "A3", ++ "T0", "T1", "T2", "T3", ++ "T4", "T5", "T6", "T7", ++ "S0", "S1", "S2", "S3", ++ "S4", "S5", "S6", "S7", ++ "T8", "T9", "K0", "K1", ++ "GP", "SP", "FP", "RA", ++ "PC" ++ }; ++ ++ private long[] data; ++ ++ public MIPS64ThreadContext() { ++ data = new long[NPRGREG]; ++ } ++ ++ public int getNumRegisters() { ++ return NPRGREG; ++ } ++ ++ public String getRegisterName(int index) { ++ return regNames[index]; ++ } ++ ++ public void setRegister(int index, long value) { ++ data[index] = value; ++ } ++ ++ public long getRegister(int index) { ++ return data[index]; ++ } ++ ++ public CFrame getTopFrame(Debugger dbg) { ++ return null; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract void setRegisterAsAddress(int index, Address value); ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract Address getRegisterAsAddress(int index); ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java 2024-01-30 10:00:13.241415036 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package sun.jvm.hotspot.debugger.posix.elf; + + import java.io.FileInputStream; +@@ -63,6 +69,8 @@ + public static final int ARCH_i860 = 7; + /** MIPS architecture type. */ + public static final int ARCH_MIPS = 8; ++ /** LOONGARCH architecture type. */ ++ public static final int ARCH_LOONGARCH = 9; + + /** Returns a file type which is defined by the file type constants. */ + public short getFileType(); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java 2024-01-30 10:00:13.241415036 +0800 +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { ++ private ProcDebugger debugger; ++ ++ public ProcLOONGARCH64ThreadContext(ProcDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java 2024-01-30 10:00:13.241415036 +0800 +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcLOONGARCH64ThreadFactory implements ProcThreadFactory { ++ private ProcDebugger debugger; ++ ++ public ProcLOONGARCH64ThreadFactory(ProcDebugger debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new ProcLOONGARCH64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new ProcLOONGARCH64Thread(debugger, id); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java 2024-01-30 10:00:13.241415036 +0800 +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class ProcLOONGARCH64Thread implements ThreadProxy { ++ private ProcDebugger debugger; ++ private int id; ++ ++ public ProcLOONGARCH64Thread(ProcDebugger debugger, Address addr) { ++ this.debugger = debugger; ++ ++ // FIXME: the size here should be configurable. However, making it ++ // so would produce a dependency on the "types" package from the ++ // debugger package, which is not desired. ++ this.id = (int) addr.getCIntegerAt(0, 4, true); ++ } ++ ++ public ProcLOONGARCH64Thread(ProcDebugger debugger, long id) { ++ this.debugger = debugger; ++ this.id = (int) id; ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ ProcLOONGARCH64ThreadContext context = new ProcLOONGARCH64ThreadContext(debugger); ++ long[] regs = debugger.getThreadIntegerRegisterSet(id); ++ /* ++ _NGREG in reg.h is defined to be 19. Because we have included ++ debug registers LOONGARCH64ThreadContext.NPRGREG is 25. ++ */ ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length <= LOONGARCH64ThreadContext.NPRGREG, "size of register set is greater than " + LOONGARCH64ThreadContext.NPRGREG); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++ ++ public boolean canSetContext() throws DebuggerException { ++ return false; ++ } ++ ++ public void setContext(ThreadContext context) ++ throws IllegalThreadStateException, DebuggerException { ++ throw new DebuggerException("Unimplemented"); ++ } ++ ++ public String toString() { ++ return "t@" + id; ++ } ++ ++ public boolean equals(Object obj) { ++ if ((obj == null) || !(obj instanceof ProcLOONGARCH64Thread)) { ++ return false; ++ } ++ ++ return (((ProcLOONGARCH64Thread) obj).id == id); ++ } ++ ++ public int hashCode() { ++ return id; ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java 2024-01-30 10:00:13.241415036 +0800 +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcMIPS64ThreadContext extends MIPS64ThreadContext { ++ private ProcDebugger debugger; ++ ++ public ProcMIPS64ThreadContext(ProcDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java 2024-01-30 10:00:13.241415036 +0800 +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcMIPS64ThreadFactory implements ProcThreadFactory { ++ private ProcDebugger debugger; ++ ++ public ProcMIPS64ThreadFactory(ProcDebugger debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new ProcMIPS64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new ProcMIPS64Thread(debugger, id); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java 2024-01-30 10:00:13.241415036 +0800 +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class ProcMIPS64Thread implements ThreadProxy { ++ private ProcDebugger debugger; ++ private int id; ++ ++ public ProcMIPS64Thread(ProcDebugger debugger, Address addr) { ++ this.debugger = debugger; ++ ++ // FIXME: the size here should be configurable. However, making it ++ // so would produce a dependency on the "types" package from the ++ // debugger package, which is not desired. ++ this.id = (int) addr.getCIntegerAt(0, 4, true); ++ } ++ ++ public ProcMIPS64Thread(ProcDebugger debugger, long id) { ++ this.debugger = debugger; ++ this.id = (int) id; ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ ProcMIPS64ThreadContext context = new ProcMIPS64ThreadContext(debugger); ++ long[] regs = debugger.getThreadIntegerRegisterSet(id); ++ /* ++ _NGREG in reg.h is defined to be 19. Because we have included ++ debug registers MIPS64ThreadContext.NPRGREG is 25. ++ */ ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length <= MIPS64ThreadContext.NPRGREG, "size of register set is greater than " + MIPS64ThreadContext.NPRGREG); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++ ++ public boolean canSetContext() throws DebuggerException { ++ return false; ++ } ++ ++ public void setContext(ThreadContext context) ++ throws IllegalThreadStateException, DebuggerException { ++ throw new DebuggerException("Unimplemented"); ++ } ++ ++ public String toString() { ++ return "t@" + id; ++ } ++ ++ public boolean equals(Object obj) { ++ if ((obj == null) || !(obj instanceof ProcMIPS64Thread)) { ++ return false; ++ } ++ ++ return (((ProcMIPS64Thread) obj).id == id); ++ } ++ ++ public int hashCode() { ++ return id; ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java 2024-01-30 10:00:13.241415036 +0800 +@@ -32,11 +32,13 @@ + import sun.jvm.hotspot.debugger.cdbg.*; + import sun.jvm.hotspot.debugger.proc.amd64.*; + import sun.jvm.hotspot.debugger.proc.aarch64.*; ++import sun.jvm.hotspot.debugger.proc.mips64.*; + import sun.jvm.hotspot.debugger.proc.sparc.*; + import sun.jvm.hotspot.debugger.proc.ppc64.*; + import sun.jvm.hotspot.debugger.proc.x86.*; + import sun.jvm.hotspot.debugger.ppc64.*; + import sun.jvm.hotspot.debugger.amd64.*; ++import sun.jvm.hotspot.debugger.mips64.*; + import sun.jvm.hotspot.debugger.aarch64.*; + import sun.jvm.hotspot.debugger.sparc.*; + import sun.jvm.hotspot.debugger.x86.*; +@@ -90,6 +92,10 @@ + threadFactory = new ProcAMD64ThreadFactory(this); + pcRegIndex = AMD64ThreadContext.RIP; + fpRegIndex = AMD64ThreadContext.RBP; ++ } else if (cpu.equals("mips64") || cpu.equals("mips64el")) { ++ threadFactory = new ProcMIPS64ThreadFactory(this); ++ pcRegIndex = MIPS64ThreadContext.PC; ++ fpRegIndex = MIPS64ThreadContext.FP; + } else if (cpu.equals("aarch64")) { + threadFactory = new ProcAARCH64ThreadFactory(this); + pcRegIndex = AARCH64ThreadContext.PC; +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java 2024-01-30 10:00:13.244748330 +0800 +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteLOONGARCH64ThreadContext(RemoteDebuggerClient debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java 2024-01-30 10:00:13.244748330 +0800 +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteLOONGARCH64ThreadFactory implements RemoteThreadFactory { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteLOONGARCH64ThreadFactory(RemoteDebuggerClient debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new RemoteLOONGARCH64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new RemoteLOONGARCH64Thread(debugger, id); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java 2024-01-30 10:00:13.244748330 +0800 +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class RemoteLOONGARCH64Thread extends RemoteThread { ++ public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, Address addr) { ++ super(debugger, addr); ++ } ++ ++ public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, long id) { ++ super(debugger, id); ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ RemoteLOONGARCH64ThreadContext context = new RemoteLOONGARCH64ThreadContext(debugger); ++ long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : ++ debugger.getThreadIntegerRegisterSet(id); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length == LOONGARCH64ThreadContext.NPRGREG, "size of register set must match"); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java 2024-01-30 10:00:13.244748330 +0800 +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteMIPS64ThreadContext extends MIPS64ThreadContext { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteMIPS64ThreadContext(RemoteDebuggerClient debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java 2024-01-30 10:00:13.244748330 +0800 +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteMIPS64ThreadFactory implements RemoteThreadFactory { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteMIPS64ThreadFactory(RemoteDebuggerClient debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new RemoteMIPS64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new RemoteMIPS64Thread(debugger, id); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java 2024-01-30 10:00:13.244748330 +0800 +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class RemoteMIPS64Thread extends RemoteThread { ++ public RemoteMIPS64Thread(RemoteDebuggerClient debugger, Address addr) { ++ super(debugger, addr); ++ } ++ ++ public RemoteMIPS64Thread(RemoteDebuggerClient debugger, long id) { ++ super(debugger, id); ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ RemoteMIPS64ThreadContext context = new RemoteMIPS64ThreadContext(debugger); ++ long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : ++ debugger.getThreadIntegerRegisterSet(id); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length == MIPS64ThreadContext.NPRGREG, "size of register set must match"); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java 2024-01-30 10:00:13.241415036 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package sun.jvm.hotspot.debugger.remote; + + import java.rmi.*; +@@ -34,6 +40,8 @@ + import sun.jvm.hotspot.debugger.remote.x86.*; + import sun.jvm.hotspot.debugger.remote.amd64.*; + import sun.jvm.hotspot.debugger.remote.ppc64.*; ++import sun.jvm.hotspot.debugger.remote.mips64.*; ++import sun.jvm.hotspot.debugger.remote.loongarch64.*; + + /** An implementation of Debugger which wraps a + RemoteDebugger, providing remote debugging via RMI. +@@ -76,6 +84,16 @@ + cachePageSize = 4096; + cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); + unalignedAccessesOkay = true; ++ } else if (cpu.equals("mips64") || cpu.equals("mips64el")) { ++ threadFactory = new RemoteMIPS64ThreadFactory(this); ++ cachePageSize = 4096; ++ cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); ++ unalignedAccessesOkay = true; ++ } else if (cpu.equals("loongarch64")) { ++ threadFactory = new RemoteLOONGARCH64ThreadFactory(this); ++ cachePageSize = 4096; ++ cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); ++ unalignedAccessesOkay = true; + } else { + try { + Class tf = Class.forName("sun.jvm.hotspot.debugger.remote." + +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java 2024-01-30 10:00:13.228081861 +0800 +@@ -23,6 +23,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ + package sun.jvm.hotspot; + + import java.rmi.RemoteException; +@@ -39,6 +45,8 @@ + import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; + import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit; + import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit; ++import sun.jvm.hotspot.debugger.MachineDescriptionMIPS64; ++import sun.jvm.hotspot.debugger.MachineDescriptionLOONGARCH64; + import sun.jvm.hotspot.debugger.NoSuchSymbolException; + import sun.jvm.hotspot.debugger.bsd.BsdDebuggerLocal; + import sun.jvm.hotspot.debugger.linux.LinuxDebuggerLocal; +@@ -598,6 +606,10 @@ + } else { + machDesc = new MachineDescriptionSPARC32Bit(); + } ++ } else if (cpu.equals("mips64")) { ++ machDesc = new MachineDescriptionMIPS64(); ++ } else if (cpu.equals("loongarch64")) { ++ machDesc = new MachineDescriptionLOONGARCH64(); + } else { + try { + machDesc = (MachineDescription) +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java 2024-01-30 10:00:13.264748090 +0800 +@@ -0,0 +1,133 @@ ++/* ++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.linux_loongarch64; ++ ++import java.io.*; ++import java.util.*; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.runtime.loongarch64.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class LinuxLOONGARCH64JavaThreadPDAccess implements JavaThreadPDAccess { ++ private static AddressField lastJavaFPField; ++ private static AddressField osThreadField; ++ ++ // Field from OSThread ++ private static CIntegerField osThreadThreadIDField; ++ ++ // This is currently unneeded but is being kept in case we change ++ // the currentFrameGuess algorithm ++ private static final long GUESS_SCAN_RANGE = 128 * 1024; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaThread"); ++ osThreadField = type.getAddressField("_osthread"); ++ ++ Type anchorType = db.lookupType("JavaFrameAnchor"); ++ lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); ++ ++ Type osThreadType = db.lookupType("OSThread"); ++ osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); ++ } ++ ++ public Address getLastJavaFP(Address addr) { ++ return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); ++ } ++ ++ public Address getLastJavaPC(Address addr) { ++ return null; ++ } ++ ++ public Address getBaseOfStackPointer(Address addr) { ++ return null; ++ } ++ ++ public Frame getLastFramePD(JavaThread thread, Address addr) { ++ Address fp = thread.getLastJavaFP(); ++ if (fp == null) { ++ return null; // no information ++ } ++ return new LOONGARCH64Frame(thread.getLastJavaSP(), fp); ++ } ++ ++ public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { ++ return new LOONGARCH64RegisterMap(thread, updateMap); ++ } ++ ++ public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext(); ++ LOONGARCH64CurrentFrameGuess guesser = new LOONGARCH64CurrentFrameGuess(context, thread); ++ if (!guesser.run(GUESS_SCAN_RANGE)) { ++ return null; ++ } ++ if (guesser.getPC() == null) { ++ return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP()); ++ } else { ++ return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); ++ } ++ } ++ ++ public void printThreadIDOn(Address addr, PrintStream tty) { ++ tty.print(getThreadProxy(addr)); ++ } ++ ++ public void printInfoOn(Address threadAddr, PrintStream tty) { ++ tty.print("Thread id: "); ++ printThreadIDOn(threadAddr, tty); ++ // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); ++ } ++ ++ public Address getLastSP(Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext(); ++ return context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); ++ } ++ ++ public ThreadProxy getThreadProxy(Address addr) { ++ // Addr is the address of the JavaThread. ++ // Fetch the OSThread (for now and for simplicity, not making a ++ // separate "OSThread" class in this package) ++ Address osThreadAddr = osThreadField.getValue(addr); ++ // Get the address of the _thread_id from the OSThread ++ Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); ++ ++ JVMDebugger debugger = VM.getVM().getDebugger(); ++ return debugger.getThreadForIdentifierAddress(threadIdAddr); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java 2024-01-30 10:00:13.264748090 +0800 +@@ -0,0 +1,133 @@ ++/* ++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.linux_mips64; ++ ++import java.io.*; ++import java.util.*; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.runtime.mips64.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class LinuxMIPS64JavaThreadPDAccess implements JavaThreadPDAccess { ++ private static AddressField osThreadField; ++ ++ // Field from OSThread ++ private static CIntegerField osThreadThreadIDField; ++ ++ // This is currently unneeded but is being kept in case we change ++ // the currentFrameGuess algorithm ++ private static final long GUESS_SCAN_RANGE = 128 * 1024; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaThread"); ++ osThreadField = type.getAddressField("_osthread"); ++ ++ Type osThreadType = db.lookupType("OSThread"); ++ osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); ++ } ++ ++ public Address getLastJavaFP(Address addr) { ++ return null; ++ } ++ ++ public Address getLastJavaPC(Address addr) { ++ return null; ++ } ++ ++ public Address getBaseOfStackPointer(Address addr) { ++ return null; ++ } ++ ++ public Frame getLastFramePD(JavaThread thread, Address addr) { ++ Address fp = thread.getLastJavaFP(); ++ if (fp == null) { ++ return null; // no information ++ } ++ return new MIPS64Frame(thread.getLastJavaSP(), fp); ++ } ++ ++ public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { ++ return new MIPS64RegisterMap(thread, updateMap); ++ } ++ ++ public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext(); ++ MIPS64CurrentFrameGuess guesser = new MIPS64CurrentFrameGuess(context, thread); ++ if (!guesser.run(GUESS_SCAN_RANGE)) { ++ return null; ++ } ++ if (guesser.getPC() == null) { ++ return new MIPS64Frame(guesser.getSP(), guesser.getFP()); ++ } else { ++ return new MIPS64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); ++ } ++ } ++ ++ public void printThreadIDOn(Address addr, PrintStream tty) { ++ tty.print(getThreadProxy(addr)); ++ } ++ ++ public void printInfoOn(Address threadAddr, PrintStream tty) { ++ tty.print("Thread id: "); ++ printThreadIDOn(threadAddr, tty); ++ // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); ++ } ++ ++ public Address getLastSP(Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext(); ++ return context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ } ++ ++ public Address getLastFP(Address addr) { ++ return getLastSP(addr).getAddressAt(0); ++ } ++ ++ public ThreadProxy getThreadProxy(Address addr) { ++ // Addr is the address of the JavaThread. ++ // Fetch the OSThread (for now and for simplicity, not making a ++ // separate "OSThread" class in this package) ++ Address osThreadAddr = osThreadField.getValue(addr); ++ // Get the address of the _thread_id from the OSThread ++ Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); ++ ++ JVMDebugger debugger = VM.getVM().getDebugger(); ++ return debugger.getThreadForIdentifierAddress(threadIdAddr); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java 2024-01-30 10:00:13.264748090 +0800 +@@ -0,0 +1,250 @@ ++/* ++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.interpreter.*; ++import sun.jvm.hotspot.runtime.*; ++ ++/**

Should be able to be used on all loongarch64 platforms we support ++ (Win32, Solaris/loongarch64, and soon Linux) to implement JavaThread's ++ "currentFrameGuess()" functionality. Input is an LOONGARCH64ThreadContext; ++ output is SP, FP, and PC for an LOONGARCH64Frame. Instantiation of the ++ LOONGARCH64Frame is left to the caller, since we may need to subclass ++ LOONGARCH64Frame to support signal handler frames on Unix platforms.

++ ++

Algorithm is to walk up the stack within a given range (say, ++ 512K at most) looking for a plausible PC and SP for a Java frame, ++ also considering those coming in from the context. If we find a PC ++ that belongs to the VM (i.e., in generated code like the ++ interpreter or CodeCache) then we try to find an associated EBP. ++ We repeat this until we either find a complete frame or run out of ++ stack to look at.

*/ ++ ++public class LOONGARCH64CurrentFrameGuess { ++ private LOONGARCH64ThreadContext context; ++ private JavaThread thread; ++ private Address spFound; ++ private Address fpFound; ++ private Address pcFound; ++ ++ private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") ++ != null; ++ ++ public LOONGARCH64CurrentFrameGuess(LOONGARCH64ThreadContext context, ++ JavaThread thread) { ++ this.context = context; ++ this.thread = thread; ++ } ++ ++ /** Returns false if not able to find a frame within a reasonable range. */ ++ public boolean run(long regionInBytesToSearch) { ++ Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); ++ Address pc = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC); ++ Address fp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP); ++ if (sp == null) { ++ // Bail out if no last java frame eithe ++ if (thread.getLastJavaSP() != null) { ++ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); ++ return true; ++ } ++ // Bail out ++ return false; ++ } ++ Address end = sp.addOffsetTo(regionInBytesToSearch); ++ VM vm = VM.getVM(); ++ ++ setValues(null, null, null); // Assume we're not going to find anything ++ ++ if (vm.isJavaPCDbg(pc)) { ++ if (vm.isClientCompiler()) { ++ // If the topmost frame is a Java frame, we are (pretty much) ++ // guaranteed to have a viable EBP. We should be more robust ++ // than this (we have the potential for losing entire threads' ++ // stack traces) but need to see how much work we really have ++ // to do here. Searching the stack for an (SP, FP) pair is ++ // hard since it's easy to misinterpret inter-frame stack ++ // pointers as base-of-frame pointers; we also don't know the ++ // sizes of C1 frames (not registered in the nmethod) so can't ++ // derive them from ESP. ++ ++ setValues(sp, fp, pc); ++ return true; ++ } else { ++ if (vm.getInterpreter().contains(pc)) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + ++ sp + ", fp = " + fp + ", pc = " + pc); ++ } ++ setValues(sp, fp, pc); ++ return true; ++ } ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. In addition, an earlier attempt at a ++ // non-searching algorithm (see below) failed because the ++ // stack pointer from the thread context was pointing ++ // (considerably) beyond the ostensible end of the stack, into ++ // garbage; walking from the topmost frame back caused a crash. ++ // ++ // This algorithm takes the current PC as a given and tries to ++ // find the correct corresponding SP by walking up the stack ++ // and repeatedly performing stackwalks (very inefficient). ++ // ++ // FIXME: there is something wrong with stackwalking across ++ // adapter frames...this is likely to be the root cause of the ++ // failure with the simpler algorithm below. ++ ++ for (long offset = 0; ++ offset < regionInBytesToSearch; ++ offset += vm.getAddressSize()) { ++ try { ++ Address curSP = sp.addOffsetTo(offset); ++ Frame frame = new LOONGARCH64Frame(curSP, null, pc); ++ RegisterMap map = thread.newRegisterMap(false); ++ while (frame != null) { ++ if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { ++ // We were able to traverse all the way to the ++ // bottommost Java frame. ++ // This sp looks good. Keep it. ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); ++ } ++ setValues(curSP, null, pc); ++ return true; ++ } ++ frame = frame.sender(map); ++ } ++ } catch (Exception e) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); ++ } ++ // Bad SP. Try another. ++ } ++ } ++ ++ // Were not able to find a plausible SP to go with this PC. ++ // Bail out. ++ return false; ++ ++ /* ++ // Original algorithm which does not work because SP was ++ // pointing beyond where it should have: ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. We see whether the PC is in the ++ // interpreter and take care of that, otherwise we run code ++ // (unfortunately) duplicated from LOONGARCH64Frame.senderForCompiledFrame. ++ ++ CodeCache cc = vm.getCodeCache(); ++ if (cc.contains(pc)) { ++ CodeBlob cb = cc.findBlob(pc); ++ ++ // See if we can derive a frame pointer from SP and PC ++ // NOTE: This is the code duplicated from LOONGARCH64Frame ++ Address saved_fp = null; ++ int llink_offset = cb.getLinkOffset(); ++ if (llink_offset >= 0) { ++ // Restore base-pointer, since next frame might be an interpreter frame. ++ Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); ++ saved_fp = fp_addr.getAddressAt(0); ++ } ++ ++ setValues(sp, saved_fp, pc); ++ return true; ++ } ++ */ ++ } ++ } else { ++ // If the current program counter was not known to us as a Java ++ // PC, we currently assume that we are in the run-time system ++ // and attempt to look to thread-local storage for saved ESP and ++ // EBP. Note that if these are null (because we were, in fact, ++ // in Java code, i.e., vtable stubs or similar, and the SA ++ // didn't have enough insight into the target VM to understand ++ // that) then we are going to lose the entire stack trace for ++ // the thread, which is sub-optimal. FIXME. ++ ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + ++ thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); ++ } ++ if (thread.getLastJavaSP() == null) { ++ return false; // No known Java frames on stack ++ } ++ ++ // The runtime has a nasty habit of not saving fp in the frame ++ // anchor, leaving us to grovel about in the stack to find a ++ // plausible address. Fortunately, this only happens in ++ // compiled code; there we always have a valid PC, and we always ++ // push LR and FP onto the stack as a pair, with FP at the lower ++ // address. ++ pc = thread.getLastJavaPC(); ++ fp = thread.getLastJavaFP(); ++ sp = thread.getLastJavaSP(); ++ ++ if (fp == null) { ++ CodeCache cc = vm.getCodeCache(); ++ if (cc.contains(pc)) { ++ CodeBlob cb = cc.findBlob(pc); ++ if (DEBUG) { ++ System.out.println("FP is null. Found blob frame size " + cb.getFrameSize()); ++ } ++ // See if we can derive a frame pointer from SP and PC ++ long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize(); ++ if (link_offset >= 0) { ++ fp = sp.addOffsetTo(link_offset); ++ } ++ } ++ } ++ ++ // We found a PC in the frame anchor. Check that it's plausible, and ++ // if it is, use it. ++ if (vm.isJavaPCDbg(pc)) { ++ setValues(sp, fp, pc); ++ } else { ++ setValues(sp, fp, null); ++ } ++ ++ return true; ++ } ++ } ++ ++ public Address getSP() { return spFound; } ++ public Address getFP() { return fpFound; } ++ /** May be null if getting values from thread-local storage; take ++ care to call the correct LOONGARCH64Frame constructor to recover this if ++ necessary */ ++ public Address getPC() { return pcFound; } ++ ++ private void setValues(Address sp, Address fp, Address pc) { ++ spFound = sp; ++ fpFound = fp; ++ pcFound = pc; ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java 2024-01-30 10:00:13.264748090 +0800 +@@ -0,0 +1,526 @@ ++/* ++ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import java.util.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.compiler.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.oops.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++ ++/** Specialization of and implementation of abstract methods of the ++ Frame class for the loongarch64 family of CPUs. */ ++ ++public class LOONGARCH64Frame extends Frame { ++ private static final boolean DEBUG; ++ static { ++ DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") != null; ++ } ++ ++ // Java frames ++ private static final int JAVA_FRAME_LINK_OFFSET = 0; ++ private static final int JAVA_FRAME_RETURN_ADDR_OFFSET = 1; ++ private static final int JAVA_FRAME_SENDER_SP_OFFSET = 2; ++ ++ // Native frames ++ private static final int NATIVE_FRAME_LINK_OFFSET = -2; ++ private static final int NATIVE_FRAME_RETURN_ADDR_OFFSET = -1; ++ private static final int NATIVE_FRAME_SENDER_SP_OFFSET = 0; ++ ++ // Interpreter frames ++ private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1; ++ private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ private static final int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ ++ // Entry frames ++ private static final int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -9; ++ ++ private static VMReg fp = new VMReg(22 << 1); ++ ++ // an additional field beyond sp and pc: ++ Address raw_fp; // frame pointer ++ private Address raw_unextendedSP; ++ ++ private LOONGARCH64Frame() { ++ } ++ ++ private void adjustForDeopt() { ++ if ( pc != null) { ++ // Look for a deopt pc and if it is deopted convert to original pc ++ CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); ++ if (cb != null && cb.isJavaMethod()) { ++ NMethod nm = (NMethod) cb; ++ if (pc.equals(nm.deoptHandlerBegin())) { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); ++ } ++ // adjust pc if frame is deoptimized. ++ pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); ++ deoptimized = true; ++ } ++ } ++ } ++ } ++ ++ public LOONGARCH64Frame(Address raw_sp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("LOONGARCH64Frame(sp, fp, pc): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public LOONGARCH64Frame(Address raw_sp, Address raw_fp) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = raw_fp.getAddressAt(1 * VM.getVM().getAddressSize()); ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("LOONGARCH64Frame(sp, fp): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public LOONGARCH64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_unextendedSp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("LOONGARCH64Frame(sp, unextendedSP, fp, pc): " + this); ++ dumpStack(); ++ } ++ ++ } ++ ++ public Object clone() { ++ LOONGARCH64Frame frame = new LOONGARCH64Frame(); ++ frame.raw_sp = raw_sp; ++ frame.raw_unextendedSP = raw_unextendedSP; ++ frame.raw_fp = raw_fp; ++ frame.pc = pc; ++ frame.deoptimized = deoptimized; ++ return frame; ++ } ++ ++ public boolean equals(Object arg) { ++ if (arg == null) { ++ return false; ++ } ++ ++ if (!(arg instanceof LOONGARCH64Frame)) { ++ return false; ++ } ++ ++ LOONGARCH64Frame other = (LOONGARCH64Frame) arg; ++ ++ return (AddressOps.equal(getSP(), other.getSP()) && ++ AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && ++ AddressOps.equal(getFP(), other.getFP()) && ++ AddressOps.equal(getPC(), other.getPC())); ++ } ++ ++ public int hashCode() { ++ if (raw_sp == null) { ++ return 0; ++ } ++ ++ return raw_sp.hashCode(); ++ } ++ ++ public String toString() { ++ return "sp: " + (getSP() == null? "null" : getSP().toString()) + ++ ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + ++ ", fp: " + (getFP() == null? "null" : getFP().toString()) + ++ ", pc: " + (pc == null? "null" : pc.toString()); ++ } ++ ++ // accessors for the instance variables ++ public Address getFP() { return raw_fp; } ++ public Address getSP() { return raw_sp; } ++ public Address getID() { return raw_sp; } ++ ++ // FIXME: not implemented yet (should be done for Solaris/LOONGARCH) ++ public boolean isSignalHandlerFrameDbg() { return false; } ++ public int getSignalNumberDbg() { return 0; } ++ public String getSignalNameDbg() { return null; } ++ ++ public boolean isInterpretedFrameValid() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "Not an interpreted frame"); ++ } ++ ++ // These are reasonable sanity checks ++ if (getFP() == null || getFP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getSP() == null || getSP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { ++ return false; ++ } ++ ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (getFP().lessThanOrEqual(getSP())) { ++ // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { ++ // stack frames shouldn't be large. ++ return false; ++ } ++ ++ return true; ++ } ++ ++ // FIXME: not applicable in current system ++ // void patch_pc(Thread* thread, address pc); ++ ++ public Frame sender(RegisterMap regMap, CodeBlob cb) { ++ LOONGARCH64RegisterMap map = (LOONGARCH64RegisterMap) regMap; ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map.setIncludeArgumentOops(false); ++ ++ if (isEntryFrame()) return senderForEntryFrame(map); ++ if (isInterpretedFrame()) return senderForInterpreterFrame(map); ++ ++ if(cb == null) { ++ cb = VM.getVM().getCodeCache().findBlob(getPC()); ++ } else { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); ++ } ++ } ++ ++ if (cb != null) { ++ return senderForCompiledFrame(map, cb); ++ } ++ ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return new LOONGARCH64Frame(getSenderSP(), getLink(), getSenderPC()); ++ } ++ ++ private Frame senderForEntryFrame(LOONGARCH64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForEntryFrame"); ++ } ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ LOONGARCH64JavaCallWrapper jcw = (LOONGARCH64JavaCallWrapper) getEntryFrameCallWrapper(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); ++ Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); ++ } ++ LOONGARCH64Frame fr; ++ if (jcw.getLastJavaPC() != null) { ++ fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); ++ } else { ++ fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); ++ } ++ map.clear(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); ++ } ++ return fr; ++ } ++ ++ //------------------------------------------------------------------------------ ++ // frame::adjust_unextended_sp ++ private void adjustUnextendedSP() { ++ // On loongarch, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ CodeBlob cb = cb(); ++ NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); ++ if (senderNm != null) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (senderNm.isDeoptEntry(getPC()) || ++ senderNm.isDeoptMhEntry(getPC())) { ++ // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp)); ++ } ++ } ++ } ++ ++ private Frame senderForInterpreterFrame(LOONGARCH64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForInterpreterFrame"); ++ } ++ Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ Address sp = getSenderSP(); ++ // We do not need to update the callee-save register mapping because above ++ // us is either another interpreter frame or a converter-frame, but never ++ // directly a compiled frame. ++ // 11/24/04 SFG. With the removal of adapter frames this is no longer true. ++ // However c2 no longer uses callee save register for java calls so there ++ // are no callee register to find. ++ ++ if (map.getUpdateMap()) ++ updateMapWithSavedLink(map, addressOfStackSlot(JAVA_FRAME_LINK_OFFSET)); ++ ++ return new LOONGARCH64Frame(sp, unextendedSP, getLink(), getSenderPC()); ++ } ++ ++ private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { ++ map.setLocation(fp, savedFPAddr); ++ } ++ ++ private Frame senderForCompiledFrame(LOONGARCH64RegisterMap map, CodeBlob cb) { ++ if (DEBUG) { ++ System.out.println("senderForCompiledFrame"); ++ } ++ ++ // ++ // NOTE: some of this code is (unfortunately) duplicated in LOONGARCH64CurrentFrameGuess ++ // ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // frame owned by optimizing compiler ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); ++ } ++ Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); ++ ++ // On Intel the return_address is always the word on the stack ++ Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ ++ // This is the saved value of EBP which may or may not really be an FP. ++ // It is only an FP if the sender is an interpreter frame (or C1?). ++ Address savedFPAddr = senderSP.addOffsetTo(- JAVA_FRAME_SENDER_SP_OFFSET * VM.getVM().getAddressSize()); ++ ++ if (map.getUpdateMap()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map.setIncludeArgumentOops(cb.callerMustGCArguments()); ++ ++ if (cb.getOopMaps() != null) { ++ ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); ++ } ++ ++ // Since the prolog does the save and restore of EBP there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ updateMapWithSavedLink(map, savedFPAddr); ++ } ++ ++ return new LOONGARCH64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); ++ } ++ ++ protected boolean hasSenderPD() { ++ // FIXME ++ // Check for null ebp? Need to do some tests. ++ return true; ++ } ++ ++ public long frameSize() { ++ return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); ++ } ++ ++ public Address getLink() { ++ if (isJavaFrame()) ++ return addressOfStackSlot(JAVA_FRAME_LINK_OFFSET).getAddressAt(0); ++ return addressOfStackSlot(NATIVE_FRAME_LINK_OFFSET).getAddressAt(0); ++ } ++ ++ public Address getUnextendedSP() { return raw_unextendedSP; } ++ ++ // Return address: ++ public Address getSenderPCAddr() { ++ if (isJavaFrame()) ++ return addressOfStackSlot(JAVA_FRAME_RETURN_ADDR_OFFSET); ++ return addressOfStackSlot(NATIVE_FRAME_RETURN_ADDR_OFFSET); ++ } ++ ++ public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } ++ ++ public Address getSenderSP() { ++ if (isJavaFrame()) ++ return addressOfStackSlot(JAVA_FRAME_SENDER_SP_OFFSET); ++ return addressOfStackSlot(NATIVE_FRAME_SENDER_SP_OFFSET); ++ } ++ ++ public Address addressOfInterpreterFrameLocals() { ++ return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); ++ } ++ ++ private Address addressOfInterpreterFrameBCX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); ++ } ++ ++ public int getInterpreterFrameBCI() { ++ // FIXME: this is not atomic with respect to GC and is unsuitable ++ // for use in a non-debugging, or reflective, system. Need to ++ // figure out how to express this. ++ Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); ++ Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); ++ Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); ++ return bcpToBci(bcp, method); ++ } ++ ++ public Address addressOfInterpreterFrameMDX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); ++ } ++ ++ // FIXME ++ //inline int frame::interpreter_frame_monitor_size() { ++ // return BasicObjectLock::size(); ++ //} ++ ++ // expression stack ++ // (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++ public Address addressOfInterpreterFrameExpressionStack() { ++ Address monitorEnd = interpreterFrameMonitorEnd().address(); ++ return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); ++ } ++ ++ public int getInterpreterFrameExpressionStackDirection() { return -1; } ++ ++ // top of expression stack ++ public Address addressOfInterpreterFrameTOS() { ++ return getSP(); ++ } ++ ++ /** Expression stack from top down */ ++ public Address addressOfInterpreterFrameTOSAt(int slot) { ++ return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); ++ } ++ ++ public Address getInterpreterFrameSenderSP() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "interpreted frame expected"); ++ } ++ return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ } ++ ++ // Monitors ++ public BasicObjectLock interpreterFrameMonitorBegin() { ++ return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); ++ } ++ ++ public BasicObjectLock interpreterFrameMonitorEnd() { ++ Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); ++ if (Assert.ASSERTS_ENABLED) { ++ // make sure the pointer points inside the frame ++ Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); ++ Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); ++ } ++ return new BasicObjectLock(result); ++ } ++ ++ public int interpreterFrameMonitorSize() { ++ return BasicObjectLock.size(); ++ } ++ ++ // Method ++ public Address addressOfInterpreterFrameMethod() { ++ return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); ++ } ++ ++ // Constant pool cache ++ public Address addressOfInterpreterFrameCPCache() { ++ return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); ++ } ++ ++ // Entry frames ++ public JavaCallWrapper getEntryFrameCallWrapper() { ++ return new LOONGARCH64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); ++ } ++ ++ protected Address addressOfSavedOopResult() { ++ // offset is 2 for compiler2 and 3 for compiler1 ++ return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * ++ VM.getVM().getAddressSize()); ++ } ++ ++ protected Address addressOfSavedReceiver() { ++ return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); ++ } ++ ++ private void dumpStack() { ++ if (getFP() != null) { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } else { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java 2024-01-30 10:00:13.264748090 +0800 +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import java.util.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class LOONGARCH64JavaCallWrapper extends JavaCallWrapper { ++ private static AddressField lastJavaFPField; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaFrameAnchor"); ++ ++ lastJavaFPField = type.getAddressField("_last_Java_fp"); ++ } ++ ++ public LOONGARCH64JavaCallWrapper(Address addr) { ++ super(addr); ++ } ++ ++ public Address getLastJavaFP() { ++ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java 2024-01-30 10:00:13.264748090 +0800 +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class LOONGARCH64RegisterMap extends RegisterMap { ++ ++ /** This is the only public constructor */ ++ public LOONGARCH64RegisterMap(JavaThread thread, boolean updateMap) { ++ super(thread, updateMap); ++ } ++ ++ protected LOONGARCH64RegisterMap(RegisterMap map) { ++ super(map); ++ } ++ ++ public Object clone() { ++ LOONGARCH64RegisterMap retval = new LOONGARCH64RegisterMap(this); ++ return retval; ++ } ++ ++ // no PD state to clear or copy: ++ protected void clearPD() {} ++ protected void initializePD() {} ++ protected void initializeFromPD(RegisterMap map) {} ++ protected Address getLocationPD(VMReg reg) { return null; } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java 2024-01-30 10:00:13.264748090 +0800 +@@ -0,0 +1,217 @@ ++/* ++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.interpreter.*; ++import sun.jvm.hotspot.runtime.*; ++ ++/**

Should be able to be used on all mips64 platforms we support ++ (Win32, Solaris/mips64, and soon Linux) to implement JavaThread's ++ "currentFrameGuess()" functionality. Input is an MIPS64ThreadContext; ++ output is SP, FP, and PC for an MIPS64Frame. Instantiation of the ++ MIPS64Frame is left to the caller, since we may need to subclass ++ MIPS64Frame to support signal handler frames on Unix platforms.

++ ++

Algorithm is to walk up the stack within a given range (say, ++ 512K at most) looking for a plausible PC and SP for a Java frame, ++ also considering those coming in from the context. If we find a PC ++ that belongs to the VM (i.e., in generated code like the ++ interpreter or CodeCache) then we try to find an associated EBP. ++ We repeat this until we either find a complete frame or run out of ++ stack to look at.

*/ ++ ++public class MIPS64CurrentFrameGuess { ++ private MIPS64ThreadContext context; ++ private JavaThread thread; ++ private Address spFound; ++ private Address fpFound; ++ private Address pcFound; ++ ++ private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") ++ != null; ++ ++ public MIPS64CurrentFrameGuess(MIPS64ThreadContext context, ++ JavaThread thread) { ++ this.context = context; ++ this.thread = thread; ++ } ++ ++ /** Returns false if not able to find a frame within a reasonable range. */ ++ public boolean run(long regionInBytesToSearch) { ++ Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ Address pc = context.getRegisterAsAddress(MIPS64ThreadContext.PC); ++ Address fp = context.getRegisterAsAddress(MIPS64ThreadContext.FP); ++ if (sp == null) { ++ // Bail out if no last java frame eithe ++ if (thread.getLastJavaSP() != null) { ++ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); ++ return true; ++ } ++ // Bail out ++ return false; ++ } ++ Address end = sp.addOffsetTo(regionInBytesToSearch); ++ VM vm = VM.getVM(); ++ ++ setValues(null, null, null); // Assume we're not going to find anything ++ ++ if (vm.isJavaPCDbg(pc)) { ++ if (vm.isClientCompiler()) { ++ // If the topmost frame is a Java frame, we are (pretty much) ++ // guaranteed to have a viable EBP. We should be more robust ++ // than this (we have the potential for losing entire threads' ++ // stack traces) but need to see how much work we really have ++ // to do here. Searching the stack for an (SP, FP) pair is ++ // hard since it's easy to misinterpret inter-frame stack ++ // pointers as base-of-frame pointers; we also don't know the ++ // sizes of C1 frames (not registered in the nmethod) so can't ++ // derive them from ESP. ++ ++ setValues(sp, fp, pc); ++ return true; ++ } else { ++ if (vm.getInterpreter().contains(pc)) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + ++ sp + ", fp = " + fp + ", pc = " + pc); ++ } ++ setValues(sp, fp, pc); ++ return true; ++ } ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. In addition, an earlier attempt at a ++ // non-searching algorithm (see below) failed because the ++ // stack pointer from the thread context was pointing ++ // (considerably) beyond the ostensible end of the stack, into ++ // garbage; walking from the topmost frame back caused a crash. ++ // ++ // This algorithm takes the current PC as a given and tries to ++ // find the correct corresponding SP by walking up the stack ++ // and repeatedly performing stackwalks (very inefficient). ++ // ++ // FIXME: there is something wrong with stackwalking across ++ // adapter frames...this is likely to be the root cause of the ++ // failure with the simpler algorithm below. ++ ++ for (long offset = 0; ++ offset < regionInBytesToSearch; ++ offset += vm.getAddressSize()) { ++ try { ++ Address curSP = sp.addOffsetTo(offset); ++ Frame frame = new MIPS64Frame(curSP, null, pc); ++ RegisterMap map = thread.newRegisterMap(false); ++ while (frame != null) { ++ if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { ++ // We were able to traverse all the way to the ++ // bottommost Java frame. ++ // This sp looks good. Keep it. ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); ++ } ++ setValues(curSP, null, pc); ++ return true; ++ } ++ frame = frame.sender(map); ++ } ++ } catch (Exception e) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); ++ } ++ // Bad SP. Try another. ++ } ++ } ++ ++ // Were not able to find a plausible SP to go with this PC. ++ // Bail out. ++ return false; ++ ++ /* ++ // Original algorithm which does not work because SP was ++ // pointing beyond where it should have: ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. We see whether the PC is in the ++ // interpreter and take care of that, otherwise we run code ++ // (unfortunately) duplicated from MIPS64Frame.senderForCompiledFrame. ++ ++ CodeCache cc = vm.getCodeCache(); ++ if (cc.contains(pc)) { ++ CodeBlob cb = cc.findBlob(pc); ++ ++ // See if we can derive a frame pointer from SP and PC ++ // NOTE: This is the code duplicated from MIPS64Frame ++ Address saved_fp = null; ++ int llink_offset = cb.getLinkOffset(); ++ if (llink_offset >= 0) { ++ // Restore base-pointer, since next frame might be an interpreter frame. ++ Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); ++ saved_fp = fp_addr.getAddressAt(0); ++ } ++ ++ setValues(sp, saved_fp, pc); ++ return true; ++ } ++ */ ++ } ++ } else { ++ // If the current program counter was not known to us as a Java ++ // PC, we currently assume that we are in the run-time system ++ // and attempt to look to thread-local storage for saved ESP and ++ // EBP. Note that if these are null (because we were, in fact, ++ // in Java code, i.e., vtable stubs or similar, and the SA ++ // didn't have enough insight into the target VM to understand ++ // that) then we are going to lose the entire stack trace for ++ // the thread, which is sub-optimal. FIXME. ++ ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + ++ thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); ++ } ++ if (thread.getLastJavaSP() == null) { ++ return false; // No known Java frames on stack ++ } ++ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); ++ return true; ++ } ++ } ++ ++ public Address getSP() { return spFound; } ++ public Address getFP() { return fpFound; } ++ /** May be null if getting values from thread-local storage; take ++ care to call the correct MIPS64Frame constructor to recover this if ++ necessary */ ++ public Address getPC() { return pcFound; } ++ ++ private void setValues(Address sp, Address fp, Address pc) { ++ spFound = sp; ++ fpFound = fp; ++ pcFound = pc; ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java 2024-01-30 10:00:13.264748090 +0800 +@@ -0,0 +1,537 @@ ++/* ++ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import java.util.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.compiler.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.oops.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++ ++/** Specialization of and implementation of abstract methods of the ++ Frame class for the mips64 family of CPUs. */ ++ ++public class MIPS64Frame extends Frame { ++ private static final boolean DEBUG; ++ static { ++ DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") != null; ++ } ++ ++ // All frames ++ private static final int LINK_OFFSET = 0; ++ private static final int RETURN_ADDR_OFFSET = 1; ++ private static final int SENDER_SP_OFFSET = 2; ++ ++ // Interpreter frames ++ private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1; ++ private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; ++ private static int INTERPRETER_FRAME_MIRROR_OFFSET; ++ private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only ++ private static int INTERPRETER_FRAME_CACHE_OFFSET; ++ private static int INTERPRETER_FRAME_LOCALS_OFFSET; ++ private static int INTERPRETER_FRAME_BCX_OFFSET; ++ private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET; ++ private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET; ++ ++ // Entry frames ++ private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET; ++ ++ private static VMReg rbp; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; ++ INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; ++ INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; ++ INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; ++ INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; ++ INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; ++ INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ ++ ENTRY_FRAME_CALL_WRAPPER_OFFSET = db.lookupIntConstant("frame::entry_frame_call_wrapper_offset"); ++ if (VM.getVM().getAddressSize() == 4) { ++ rbp = new VMReg(5); ++ } else { ++ rbp = new VMReg(5 << 1); ++ } ++ } ++ ++ ++ // an additional field beyond sp and pc: ++ Address raw_fp; // frame pointer ++ private Address raw_unextendedSP; ++ ++ private MIPS64Frame() { ++ } ++ ++ private void adjustForDeopt() { ++ if ( pc != null) { ++ // Look for a deopt pc and if it is deopted convert to original pc ++ CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); ++ if (cb != null && cb.isJavaMethod()) { ++ NMethod nm = (NMethod) cb; ++ if (pc.equals(nm.deoptHandlerBegin())) { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); ++ } ++ // adjust pc if frame is deoptimized. ++ pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); ++ deoptimized = true; ++ } ++ } ++ } ++ } ++ ++ public MIPS64Frame(Address raw_sp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("MIPS64Frame(sp, fp, pc): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public MIPS64Frame(Address raw_sp, Address raw_fp) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("MIPS64Frame(sp, fp): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public MIPS64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_unextendedSp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("MIPS64Frame(sp, unextendedSP, fp, pc): " + this); ++ dumpStack(); ++ } ++ ++ } ++ ++ public Object clone() { ++ MIPS64Frame frame = new MIPS64Frame(); ++ frame.raw_sp = raw_sp; ++ frame.raw_unextendedSP = raw_unextendedSP; ++ frame.raw_fp = raw_fp; ++ frame.pc = pc; ++ frame.deoptimized = deoptimized; ++ return frame; ++ } ++ ++ public boolean equals(Object arg) { ++ if (arg == null) { ++ return false; ++ } ++ ++ if (!(arg instanceof MIPS64Frame)) { ++ return false; ++ } ++ ++ MIPS64Frame other = (MIPS64Frame) arg; ++ ++ return (AddressOps.equal(getSP(), other.getSP()) && ++ AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && ++ AddressOps.equal(getFP(), other.getFP()) && ++ AddressOps.equal(getPC(), other.getPC())); ++ } ++ ++ public int hashCode() { ++ if (raw_sp == null) { ++ return 0; ++ } ++ ++ return raw_sp.hashCode(); ++ } ++ ++ public String toString() { ++ return "sp: " + (getSP() == null? "null" : getSP().toString()) + ++ ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + ++ ", fp: " + (getFP() == null? "null" : getFP().toString()) + ++ ", pc: " + (pc == null? "null" : pc.toString()); ++ } ++ ++ // accessors for the instance variables ++ public Address getFP() { return raw_fp; } ++ public Address getSP() { return raw_sp; } ++ public Address getID() { return raw_sp; } ++ ++ // FIXME: not implemented yet (should be done for Solaris/MIPS) ++ public boolean isSignalHandlerFrameDbg() { return false; } ++ public int getSignalNumberDbg() { return 0; } ++ public String getSignalNameDbg() { return null; } ++ ++ public boolean isInterpretedFrameValid() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "Not an interpreted frame"); ++ } ++ ++ // These are reasonable sanity checks ++ if (getFP() == null || getFP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getSP() == null || getSP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { ++ return false; ++ } ++ ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (getFP().lessThanOrEqual(getSP())) { ++ // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { ++ // stack frames shouldn't be large. ++ return false; ++ } ++ ++ return true; ++ } ++ ++ // FIXME: not applicable in current system ++ // void patch_pc(Thread* thread, address pc); ++ ++ public Frame sender(RegisterMap regMap, CodeBlob cb) { ++ MIPS64RegisterMap map = (MIPS64RegisterMap) regMap; ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map.setIncludeArgumentOops(false); ++ ++ if (isEntryFrame()) return senderForEntryFrame(map); ++ if (isInterpretedFrame()) return senderForInterpreterFrame(map); ++ ++ if(cb == null) { ++ cb = VM.getVM().getCodeCache().findBlob(getPC()); ++ } else { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); ++ } ++ } ++ ++ if (cb != null) { ++ return senderForCompiledFrame(map, cb); ++ } ++ ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return new MIPS64Frame(getSenderSP(), getLink(), getSenderPC()); ++ } ++ ++ private Frame senderForEntryFrame(MIPS64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForEntryFrame"); ++ } ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ MIPS64JavaCallWrapper jcw = (MIPS64JavaCallWrapper) getEntryFrameCallWrapper(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); ++ Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); ++ } ++ MIPS64Frame fr; ++ if (jcw.getLastJavaPC() != null) { ++ fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); ++ } else { ++ fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); ++ } ++ map.clear(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); ++ } ++ return fr; ++ } ++ ++ //------------------------------------------------------------------------------ ++ // frame::adjust_unextended_sp ++ private void adjustUnextendedSP() { ++ // On mips, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ CodeBlob cb = cb(); ++ NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); ++ if (senderNm != null) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (senderNm.isDeoptEntry(getPC()) || ++ senderNm.isDeoptMhEntry(getPC())) { ++ // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp)); ++ } ++ } ++ } ++ ++ private Frame senderForInterpreterFrame(MIPS64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForInterpreterFrame"); ++ } ++ Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ Address sp = addressOfStackSlot(SENDER_SP_OFFSET); ++ // We do not need to update the callee-save register mapping because above ++ // us is either another interpreter frame or a converter-frame, but never ++ // directly a compiled frame. ++ // 11/24/04 SFG. With the removal of adapter frames this is no longer true. ++ // However c2 no longer uses callee save register for java calls so there ++ // are no callee register to find. ++ ++ if (map.getUpdateMap()) ++ updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); ++ ++ return new MIPS64Frame(sp, unextendedSP, getLink(), getSenderPC()); ++ } ++ ++ private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { ++ map.setLocation(rbp, savedFPAddr); ++ } ++ ++ private Frame senderForCompiledFrame(MIPS64RegisterMap map, CodeBlob cb) { ++ if (DEBUG) { ++ System.out.println("senderForCompiledFrame"); ++ } ++ ++ // ++ // NOTE: some of this code is (unfortunately) duplicated in MIPS64CurrentFrameGuess ++ // ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // frame owned by optimizing compiler ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); ++ } ++ Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); ++ ++ // On Intel the return_address is always the word on the stack ++ Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ ++ // This is the saved value of EBP which may or may not really be an FP. ++ // It is only an FP if the sender is an interpreter frame (or C1?). ++ Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize()); ++ ++ if (map.getUpdateMap()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map.setIncludeArgumentOops(cb.callerMustGCArguments()); ++ ++ if (cb.getOopMaps() != null) { ++ ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); ++ } ++ ++ // Since the prolog does the save and restore of EBP there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ updateMapWithSavedLink(map, savedFPAddr); ++ } ++ ++ return new MIPS64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); ++ } ++ ++ protected boolean hasSenderPD() { ++ // FIXME ++ // Check for null ebp? Need to do some tests. ++ return true; ++ } ++ ++ public long frameSize() { ++ return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); ++ } ++ ++ public Address getLink() { ++ return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); ++ } ++ ++ public Address getUnextendedSP() { return raw_unextendedSP; } ++ ++ // Return address: ++ public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } ++ public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } ++ ++ public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } ++ ++ public Address addressOfInterpreterFrameLocals() { ++ return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); ++ } ++ ++ private Address addressOfInterpreterFrameBCX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); ++ } ++ ++ public int getInterpreterFrameBCI() { ++ // FIXME: this is not atomic with respect to GC and is unsuitable ++ // for use in a non-debugging, or reflective, system. Need to ++ // figure out how to express this. ++ Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); ++ Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); ++ Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); ++ return bcpToBci(bcp, method); ++ } ++ ++ public Address addressOfInterpreterFrameMDX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); ++ } ++ ++ // FIXME ++ //inline int frame::interpreter_frame_monitor_size() { ++ // return BasicObjectLock::size(); ++ //} ++ ++ // expression stack ++ // (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++ public Address addressOfInterpreterFrameExpressionStack() { ++ Address monitorEnd = interpreterFrameMonitorEnd().address(); ++ return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); ++ } ++ ++ public int getInterpreterFrameExpressionStackDirection() { return -1; } ++ ++ // top of expression stack ++ public Address addressOfInterpreterFrameTOS() { ++ return getSP(); ++ } ++ ++ /** Expression stack from top down */ ++ public Address addressOfInterpreterFrameTOSAt(int slot) { ++ return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); ++ } ++ ++ public Address getInterpreterFrameSenderSP() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "interpreted frame expected"); ++ } ++ return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ } ++ ++ // Monitors ++ public BasicObjectLock interpreterFrameMonitorBegin() { ++ return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); ++ } ++ ++ public BasicObjectLock interpreterFrameMonitorEnd() { ++ Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); ++ if (Assert.ASSERTS_ENABLED) { ++ // make sure the pointer points inside the frame ++ Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); ++ Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); ++ } ++ return new BasicObjectLock(result); ++ } ++ ++ public int interpreterFrameMonitorSize() { ++ return BasicObjectLock.size(); ++ } ++ ++ // Method ++ public Address addressOfInterpreterFrameMethod() { ++ return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); ++ } ++ ++ // Constant pool cache ++ public Address addressOfInterpreterFrameCPCache() { ++ return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); ++ } ++ ++ // Entry frames ++ public JavaCallWrapper getEntryFrameCallWrapper() { ++ return new MIPS64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); ++ } ++ ++ protected Address addressOfSavedOopResult() { ++ // offset is 2 for compiler2 and 3 for compiler1 ++ return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * ++ VM.getVM().getAddressSize()); ++ } ++ ++ protected Address addressOfSavedReceiver() { ++ return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); ++ } ++ ++ private void dumpStack() { ++ if (getFP() != null) { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } else { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java 2024-01-30 10:00:13.264748090 +0800 +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import java.util.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class MIPS64JavaCallWrapper extends JavaCallWrapper { ++ private static AddressField lastJavaFPField; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaFrameAnchor"); ++ ++ lastJavaFPField = type.getAddressField("_last_Java_fp"); ++ } ++ ++ public MIPS64JavaCallWrapper(Address addr) { ++ super(addr); ++ } ++ ++ public Address getLastJavaFP() { ++ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java 2024-01-30 10:00:13.268081384 +0800 +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class MIPS64RegisterMap extends RegisterMap { ++ ++ /** This is the only public constructor */ ++ public MIPS64RegisterMap(JavaThread thread, boolean updateMap) { ++ super(thread, updateMap); ++ } ++ ++ protected MIPS64RegisterMap(RegisterMap map) { ++ super(map); ++ } ++ ++ public Object clone() { ++ MIPS64RegisterMap retval = new MIPS64RegisterMap(this); ++ return retval; ++ } ++ ++ // no PD state to clear or copy: ++ protected void clearPD() {} ++ protected void initializePD() {} ++ protected void initializeFromPD(RegisterMap map) {} ++ protected Address getLocationPD(VMReg reg) { return null; } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java 2024-01-30 10:00:13.264748090 +0800 +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package sun.jvm.hotspot.runtime; + + import java.util.*; +@@ -39,6 +45,8 @@ + import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess; ++import sun.jvm.hotspot.runtime.linux_mips64.LinuxMIPS64JavaThreadPDAccess; ++import sun.jvm.hotspot.runtime.linux_loongarch64.LinuxLOONGARCH64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess; + import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.bsd_amd64.BsdAMD64JavaThreadPDAccess; +@@ -99,6 +107,10 @@ + access = new LinuxPPC64JavaThreadPDAccess(); + } else if (cpu.equals("aarch64")) { + access = new LinuxAARCH64JavaThreadPDAccess(); ++ } else if (cpu.equals("mips64")) { ++ access = new LinuxMIPS64JavaThreadPDAccess(); ++ } else if (cpu.equals("loongarch64")) { ++ access = new LinuxLOONGARCH64JavaThreadPDAccess(); + } else { + try { + access = (JavaThreadPDAccess) +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java 2024-01-30 10:00:13.274747971 +0800 +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ ++ + package sun.jvm.hotspot.utilities; + + /** Provides canonicalized OS and CPU information for the rest of the +@@ -54,7 +61,7 @@ + + public static boolean knownCPU(String cpu) { + final String[] KNOWN = +- new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"}; ++ new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "mips64", "mips64el", "loongarch64"}; + + for(String s : KNOWN) { + if(s.equals(cpu)) +@@ -101,6 +108,12 @@ + if (cpu.equals("ppc64le")) + return "ppc64"; + ++ if (cpu.equals("mips64el")) ++ return "mips64"; ++ ++ if (cpu.equals("loongarch64")) ++ return "loongarch64"; ++ + return cpu; + + } +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java +--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java 2024-01-30 10:00:13.304747615 +0800 +@@ -0,0 +1,220 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; ++ ++import static java.util.Collections.emptyMap; ++import static jdk.vm.ci.common.InitTimer.timer; ++ ++import java.util.EnumSet; ++import java.util.Map; ++ ++import jdk.vm.ci.loongarch64.LoongArch64; ++import jdk.vm.ci.loongarch64.LoongArch64.CPUFeature; ++import jdk.vm.ci.code.Architecture; ++import jdk.vm.ci.code.RegisterConfig; ++import jdk.vm.ci.code.TargetDescription; ++import jdk.vm.ci.code.stack.StackIntrospection; ++import jdk.vm.ci.common.InitTimer; ++import jdk.vm.ci.hotspot.HotSpotCodeCacheProvider; ++import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider; ++import jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory; ++import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime; ++import jdk.vm.ci.hotspot.HotSpotMetaAccessProvider; ++import jdk.vm.ci.hotspot.HotSpotStackIntrospection; ++import jdk.vm.ci.meta.ConstantReflectionProvider; ++import jdk.vm.ci.runtime.JVMCIBackend; ++ ++public class LoongArch64HotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFactory { ++ ++ protected EnumSet computeFeatures(@SuppressWarnings("unused") LoongArch64HotSpotVMConfig config) { ++ // Configure the feature set using the HotSpot flag settings. ++ EnumSet features = EnumSet.noneOf(LoongArch64.CPUFeature.class); ++ ++ if ((config.vmVersionFeatures & config.loongarch64LA32) != 0) { ++ features.add(LoongArch64.CPUFeature.LA32); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LA64) != 0) { ++ features.add(LoongArch64.CPUFeature.LA64); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LLEXC) != 0) { ++ features.add(LoongArch64.CPUFeature.LLEXC); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64SCDLY) != 0) { ++ features.add(LoongArch64.CPUFeature.SCDLY); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LLDBAR) != 0) { ++ features.add(LoongArch64.CPUFeature.LLDBAR); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LBT_X86) != 0) { ++ features.add(LoongArch64.CPUFeature.LBT_X86); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LBT_ARM) != 0) { ++ features.add(LoongArch64.CPUFeature.LBT_ARM); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LBT_MIPS) != 0) { ++ features.add(LoongArch64.CPUFeature.LBT_MIPS); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64CCDMA) != 0) { ++ features.add(LoongArch64.CPUFeature.CCDMA); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64COMPLEX) != 0) { ++ features.add(LoongArch64.CPUFeature.COMPLEX); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64FP) != 0) { ++ features.add(LoongArch64.CPUFeature.FP); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64CRYPTO) != 0) { ++ features.add(LoongArch64.CPUFeature.CRYPTO); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LSX) != 0) { ++ features.add(LoongArch64.CPUFeature.LSX); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LASX) != 0) { ++ features.add(LoongArch64.CPUFeature.LASX); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LAM) != 0) { ++ features.add(LoongArch64.CPUFeature.LAM); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LLSYNC) != 0) { ++ features.add(LoongArch64.CPUFeature.LLSYNC); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64TGTSYNC) != 0) { ++ features.add(LoongArch64.CPUFeature.TGTSYNC); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64ULSYNC) != 0) { ++ features.add(LoongArch64.CPUFeature.ULSYNC); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64UAL) != 0) { ++ features.add(LoongArch64.CPUFeature.UAL); ++ } ++ ++ return features; ++ } ++ ++ protected EnumSet computeFlags(@SuppressWarnings("unused") LoongArch64HotSpotVMConfig config) { ++ EnumSet flags = EnumSet.noneOf(LoongArch64.Flag.class); ++ ++ if (config.useLSX) { ++ flags.add(LoongArch64.Flag.useLSX); ++ } ++ ++ if (config.useLASX) { ++ flags.add(LoongArch64.Flag.useLASX); ++ } ++ ++ return flags; ++ } ++ ++ protected TargetDescription createTarget(LoongArch64HotSpotVMConfig config) { ++ final int stackFrameAlignment = 16; ++ final int implicitNullCheckLimit = 4096; ++ final boolean inlineObjects = true; ++ Architecture arch = new LoongArch64(computeFeatures(config), computeFlags(config)); ++ return new TargetDescription(arch, true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects); ++ } ++ ++ protected HotSpotConstantReflectionProvider createConstantReflection(HotSpotJVMCIRuntime runtime) { ++ return new HotSpotConstantReflectionProvider(runtime); ++ } ++ ++ protected RegisterConfig createRegisterConfig(LoongArch64HotSpotVMConfig config, TargetDescription target) { ++ return new LoongArch64HotSpotRegisterConfig(target, config.useCompressedOops); ++ } ++ ++ protected HotSpotCodeCacheProvider createCodeCache(HotSpotJVMCIRuntime runtime, TargetDescription target, RegisterConfig regConfig) { ++ return new HotSpotCodeCacheProvider(runtime, runtime.getConfig(), target, regConfig); ++ } ++ ++ protected HotSpotMetaAccessProvider createMetaAccess(HotSpotJVMCIRuntime runtime) { ++ return new HotSpotMetaAccessProvider(runtime); ++ } ++ ++ @Override ++ public String getArchitecture() { ++ return "loongarch64"; ++ } ++ ++ @Override ++ public String toString() { ++ return "JVMCIBackend:" + getArchitecture(); ++ } ++ ++ @Override ++ @SuppressWarnings("try") ++ public JVMCIBackend createJVMCIBackend(HotSpotJVMCIRuntime runtime, JVMCIBackend host) { ++ ++ assert host == null; ++ LoongArch64HotSpotVMConfig config = new LoongArch64HotSpotVMConfig(runtime.getConfigStore()); ++ TargetDescription target = createTarget(config); ++ ++ RegisterConfig regConfig; ++ HotSpotCodeCacheProvider codeCache; ++ ConstantReflectionProvider constantReflection; ++ HotSpotMetaAccessProvider metaAccess; ++ StackIntrospection stackIntrospection; ++ try (InitTimer t = timer("create providers")) { ++ try (InitTimer rt = timer("create MetaAccess provider")) { ++ metaAccess = createMetaAccess(runtime); ++ } ++ try (InitTimer rt = timer("create RegisterConfig")) { ++ regConfig = createRegisterConfig(config, target); ++ } ++ try (InitTimer rt = timer("create CodeCache provider")) { ++ codeCache = createCodeCache(runtime, target, regConfig); ++ } ++ try (InitTimer rt = timer("create ConstantReflection provider")) { ++ constantReflection = createConstantReflection(runtime); ++ } ++ try (InitTimer rt = timer("create StackIntrospection provider")) { ++ stackIntrospection = new HotSpotStackIntrospection(runtime); ++ } ++ } ++ try (InitTimer rt = timer("instantiate backend")) { ++ return createBackend(metaAccess, codeCache, constantReflection, stackIntrospection); ++ } ++ } ++ ++ protected JVMCIBackend createBackend(HotSpotMetaAccessProvider metaAccess, HotSpotCodeCacheProvider codeCache, ConstantReflectionProvider constantReflection, ++ StackIntrospection stackIntrospection) { ++ return new JVMCIBackend(metaAccess, codeCache, constantReflection, stackIntrospection); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java +--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java 2024-01-30 10:00:13.304747615 +0800 +@@ -0,0 +1,297 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; ++ ++import static jdk.vm.ci.loongarch64.LoongArch64.ra; ++import static jdk.vm.ci.loongarch64.LoongArch64.a0; ++import static jdk.vm.ci.loongarch64.LoongArch64.a1; ++import static jdk.vm.ci.loongarch64.LoongArch64.a2; ++import static jdk.vm.ci.loongarch64.LoongArch64.a3; ++import static jdk.vm.ci.loongarch64.LoongArch64.a4; ++import static jdk.vm.ci.loongarch64.LoongArch64.a5; ++import static jdk.vm.ci.loongarch64.LoongArch64.a6; ++import static jdk.vm.ci.loongarch64.LoongArch64.a7; ++import static jdk.vm.ci.loongarch64.LoongArch64.SCR1; ++import static jdk.vm.ci.loongarch64.LoongArch64.SCR2; ++import static jdk.vm.ci.loongarch64.LoongArch64.t0; ++import static jdk.vm.ci.loongarch64.LoongArch64.v0; ++import static jdk.vm.ci.loongarch64.LoongArch64.s5; ++import static jdk.vm.ci.loongarch64.LoongArch64.s6; ++import static jdk.vm.ci.loongarch64.LoongArch64.sp; ++import static jdk.vm.ci.loongarch64.LoongArch64.fp; ++import static jdk.vm.ci.loongarch64.LoongArch64.tp; ++import static jdk.vm.ci.loongarch64.LoongArch64.rx; ++import static jdk.vm.ci.loongarch64.LoongArch64.f0; ++import static jdk.vm.ci.loongarch64.LoongArch64.f1; ++import static jdk.vm.ci.loongarch64.LoongArch64.f2; ++import static jdk.vm.ci.loongarch64.LoongArch64.f3; ++import static jdk.vm.ci.loongarch64.LoongArch64.f4; ++import static jdk.vm.ci.loongarch64.LoongArch64.f5; ++import static jdk.vm.ci.loongarch64.LoongArch64.f6; ++import static jdk.vm.ci.loongarch64.LoongArch64.f7; ++import static jdk.vm.ci.loongarch64.LoongArch64.fv0; ++import static jdk.vm.ci.loongarch64.LoongArch64.zero; ++ ++import java.util.ArrayList; ++import java.util.HashSet; ++import java.util.List; ++import java.util.Set; ++ ++import jdk.vm.ci.loongarch64.LoongArch64; ++import jdk.vm.ci.code.Architecture; ++import jdk.vm.ci.code.CallingConvention; ++import jdk.vm.ci.code.CallingConvention.Type; ++import jdk.vm.ci.code.Register; ++import jdk.vm.ci.code.RegisterArray; ++import jdk.vm.ci.code.RegisterAttributes; ++import jdk.vm.ci.code.RegisterConfig; ++import jdk.vm.ci.code.StackSlot; ++import jdk.vm.ci.code.TargetDescription; ++import jdk.vm.ci.code.ValueKindFactory; ++import jdk.vm.ci.common.JVMCIError; ++import jdk.vm.ci.hotspot.HotSpotCallingConventionType; ++import jdk.vm.ci.meta.AllocatableValue; ++import jdk.vm.ci.meta.JavaKind; ++import jdk.vm.ci.meta.JavaType; ++import jdk.vm.ci.meta.PlatformKind; ++import jdk.vm.ci.meta.Value; ++import jdk.vm.ci.meta.ValueKind; ++ ++public class LoongArch64HotSpotRegisterConfig implements RegisterConfig { ++ ++ private final TargetDescription target; ++ ++ private final RegisterArray allocatable; ++ ++ /** ++ * The caller saved registers always include all parameter registers. ++ */ ++ private final RegisterArray callerSaved; ++ ++ private final boolean allAllocatableAreCallerSaved; ++ ++ private final RegisterAttributes[] attributesMap; ++ ++ @Override ++ public RegisterArray getAllocatableRegisters() { ++ return allocatable; ++ } ++ ++ @Override ++ public RegisterArray filterAllocatableRegisters(PlatformKind kind, RegisterArray registers) { ++ ArrayList list = new ArrayList<>(); ++ for (Register reg : registers) { ++ if (target.arch.canStoreValue(reg.getRegisterCategory(), kind)) { ++ list.add(reg); ++ } ++ } ++ ++ return new RegisterArray(list); ++ } ++ ++ @Override ++ public RegisterAttributes[] getAttributesMap() { ++ return attributesMap.clone(); ++ } ++ ++ private final RegisterArray javaGeneralParameterRegisters = new RegisterArray(t0, a0, a1, a2, a3, a4, a5, a6, a7); ++ private final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(a0, a1, a2, a3, a4, a5, a6, a7); ++ private final RegisterArray floatParameterRegisters = new RegisterArray(f0, f1, f2, f3, f4, f5, f6, f7); ++ ++ public static final Register heapBaseRegister = s5; ++ public static final Register TREG = s6; ++ ++ private static final RegisterArray reservedRegisters = new RegisterArray(fp, ra, zero, sp, tp, rx, SCR1, SCR2, TREG); ++ ++ private static RegisterArray initAllocatable(Architecture arch, boolean reserveForHeapBase) { ++ RegisterArray allRegisters = arch.getAvailableValueRegisters(); ++ Register[] registers = new Register[allRegisters.size() - reservedRegisters.size() - (reserveForHeapBase ? 1 : 0)]; ++ List reservedRegistersList = reservedRegisters.asList(); ++ ++ int idx = 0; ++ for (Register reg : allRegisters) { ++ if (reservedRegistersList.contains(reg)) { ++ // skip reserved registers ++ continue; ++ } ++ if (reserveForHeapBase && reg.equals(heapBaseRegister)) { ++ // skip heap base register ++ continue; ++ } ++ ++ registers[idx++] = reg; ++ } ++ ++ assert idx == registers.length; ++ return new RegisterArray(registers); ++ } ++ ++ public LoongArch64HotSpotRegisterConfig(TargetDescription target, boolean useCompressedOops) { ++ this(target, initAllocatable(target.arch, useCompressedOops)); ++ assert callerSaved.size() >= allocatable.size(); ++ } ++ ++ public LoongArch64HotSpotRegisterConfig(TargetDescription target, RegisterArray allocatable) { ++ this.target = target; ++ ++ this.allocatable = allocatable; ++ Set callerSaveSet = new HashSet<>(); ++ allocatable.addTo(callerSaveSet); ++ floatParameterRegisters.addTo(callerSaveSet); ++ javaGeneralParameterRegisters.addTo(callerSaveSet); ++ nativeGeneralParameterRegisters.addTo(callerSaveSet); ++ callerSaved = new RegisterArray(callerSaveSet); ++ ++ allAllocatableAreCallerSaved = true; ++ attributesMap = RegisterAttributes.createMap(this, LoongArch64.allRegisters); ++ } ++ ++ @Override ++ public RegisterArray getCallerSaveRegisters() { ++ return callerSaved; ++ } ++ ++ @Override ++ public RegisterArray getCalleeSaveRegisters() { ++ return null; ++ } ++ ++ @Override ++ public boolean areAllAllocatableRegistersCallerSaved() { ++ return allAllocatableAreCallerSaved; ++ } ++ ++ @Override ++ public CallingConvention getCallingConvention(Type type, JavaType returnType, JavaType[] parameterTypes, ValueKindFactory valueKindFactory) { ++ HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type; ++ if (type == HotSpotCallingConventionType.NativeCall) { ++ return callingConvention(nativeGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory); ++ } ++ // On x64, parameter locations are the same whether viewed ++ // from the caller or callee perspective ++ return callingConvention(javaGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory); ++ } ++ ++ @Override ++ public RegisterArray getCallingConventionRegisters(Type type, JavaKind kind) { ++ HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type; ++ switch (kind) { ++ case Boolean: ++ case Byte: ++ case Short: ++ case Char: ++ case Int: ++ case Long: ++ case Object: ++ return hotspotType == HotSpotCallingConventionType.NativeCall ? nativeGeneralParameterRegisters : javaGeneralParameterRegisters; ++ case Float: ++ case Double: ++ return floatParameterRegisters; ++ default: ++ throw JVMCIError.shouldNotReachHere(); ++ } ++ } ++ ++ private CallingConvention callingConvention(RegisterArray generalParameterRegisters, JavaType returnType, JavaType[] parameterTypes, HotSpotCallingConventionType type, ++ ValueKindFactory valueKindFactory) { ++ AllocatableValue[] locations = new AllocatableValue[parameterTypes.length]; ++ ++ int currentGeneral = 0; ++ int currentFloat = 0; ++ int currentStackOffset = 0; ++ ++ for (int i = 0; i < parameterTypes.length; i++) { ++ final JavaKind kind = parameterTypes[i].getJavaKind().getStackKind(); ++ ++ switch (kind) { ++ case Byte: ++ case Boolean: ++ case Short: ++ case Char: ++ case Int: ++ case Long: ++ case Object: ++ if (currentGeneral < generalParameterRegisters.size()) { ++ Register register = generalParameterRegisters.get(currentGeneral++); ++ locations[i] = register.asValue(valueKindFactory.getValueKind(kind)); ++ } ++ break; ++ case Float: ++ case Double: ++ if (currentFloat < floatParameterRegisters.size()) { ++ Register register = floatParameterRegisters.get(currentFloat++); ++ locations[i] = register.asValue(valueKindFactory.getValueKind(kind)); ++ } else if (currentGeneral < generalParameterRegisters.size()) { ++ Register register = generalParameterRegisters.get(currentGeneral++); ++ locations[i] = register.asValue(valueKindFactory.getValueKind(kind)); ++ } ++ break; ++ default: ++ throw JVMCIError.shouldNotReachHere(); ++ } ++ ++ if (locations[i] == null) { ++ ValueKind valueKind = valueKindFactory.getValueKind(kind); ++ locations[i] = StackSlot.get(valueKind, currentStackOffset, !type.out); ++ currentStackOffset += Math.max(valueKind.getPlatformKind().getSizeInBytes(), target.wordSize); ++ } ++ } ++ ++ JavaKind returnKind = returnType == null ? JavaKind.Void : returnType.getJavaKind(); ++ AllocatableValue returnLocation = returnKind == JavaKind.Void ? Value.ILLEGAL : getReturnRegister(returnKind).asValue(valueKindFactory.getValueKind(returnKind.getStackKind())); ++ return new CallingConvention(currentStackOffset, returnLocation, locations); ++ } ++ ++ @Override ++ public Register getReturnRegister(JavaKind kind) { ++ switch (kind) { ++ case Boolean: ++ case Byte: ++ case Char: ++ case Short: ++ case Int: ++ case Long: ++ case Object: ++ return v0; ++ case Float: ++ case Double: ++ return fv0; ++ case Void: ++ case Illegal: ++ return null; ++ default: ++ throw new UnsupportedOperationException("no return register for type " + kind); ++ } ++ } ++ ++ @Override ++ public Register getFrameRegister() { ++ return sp; ++ } ++ ++ @Override ++ public String toString() { ++ return String.format("Allocatable: " + getAllocatableRegisters() + "%n" + "CallerSave: " + getCallerSaveRegisters() + "%n"); ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java +--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java 2024-01-30 10:00:13.304747615 +0800 +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; ++ ++import jdk.vm.ci.hotspot.HotSpotVMConfigAccess; ++import jdk.vm.ci.hotspot.HotSpotVMConfigStore; ++import jdk.vm.ci.services.Services; ++ ++/** ++ * Used to access native configuration details. ++ * ++ * All non-static, public fields in this class are so that they can be compiled as constants. ++ */ ++class LoongArch64HotSpotVMConfig extends HotSpotVMConfigAccess { ++ ++ LoongArch64HotSpotVMConfig(HotSpotVMConfigStore config) { ++ super(config); ++ } ++ ++ final boolean useCompressedOops = getFlag("UseCompressedOops", Boolean.class); ++ ++ // CPU Capabilities ++ ++ /* ++ * These flags are set based on the corresponding command line flags. ++ */ ++ final boolean useLSX = getFlag("UseLSX", Boolean.class); ++ final boolean useLASX = getFlag("UseLASX", Boolean.class); ++ ++ final long vmVersionFeatures = getFieldValue("Abstract_VM_Version::_features", Long.class, "uint64_t"); ++ ++ /* ++ * These flags are set if the corresponding support is in the hardware. ++ */ ++ // Checkstyle: stop ++ final long loongarch64LA32 = getConstant("VM_Version::CPU_LA32", Long.class); ++ final long loongarch64LA64 = getConstant("VM_Version::CPU_LA64", Long.class); ++ final long loongarch64LLEXC = getConstant("VM_Version::CPU_LLEXC", Long.class); ++ final long loongarch64SCDLY = getConstant("VM_Version::CPU_SCDLY", Long.class); ++ final long loongarch64LLDBAR = getConstant("VM_Version::CPU_LLDBAR", Long.class); ++ final long loongarch64LBT_X86 = getConstant("VM_Version::CPU_LBT_X86", Long.class); ++ final long loongarch64LBT_ARM = getConstant("VM_Version::CPU_LBT_ARM", Long.class); ++ final long loongarch64LBT_MIPS = getConstant("VM_Version::CPU_LBT_MIPS", Long.class); ++ final long loongarch64CCDMA = getConstant("VM_Version::CPU_CCDMA", Long.class); ++ final long loongarch64COMPLEX = getConstant("VM_Version::CPU_COMPLEX", Long.class); ++ final long loongarch64FP = getConstant("VM_Version::CPU_FP", Long.class); ++ final long loongarch64CRYPTO = getConstant("VM_Version::CPU_CRYPTO", Long.class); ++ final long loongarch64LSX = getConstant("VM_Version::CPU_LSX", Long.class); ++ final long loongarch64LASX = getConstant("VM_Version::CPU_LASX", Long.class); ++ final long loongarch64LAM = getConstant("VM_Version::CPU_LAM", Long.class); ++ final long loongarch64LLSYNC = getConstant("VM_Version::CPU_LLSYNC", Long.class); ++ final long loongarch64TGTSYNC = getConstant("VM_Version::CPU_TGTSYNC", Long.class); ++ final long loongarch64ULSYNC = getConstant("VM_Version::CPU_ULSYNC", Long.class); ++ final long loongarch64UAL = getConstant("VM_Version::CPU_UAL", Long.class); ++ // Checkstyle: resume ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java +--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java 2024-01-30 10:00:13.304747615 +0800 +@@ -0,0 +1,28 @@ ++/* ++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++/** ++ * The LoongArch64 HotSpot specific portions of the JVMCI API. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java +--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java 2024-01-30 10:00:13.308080909 +0800 +@@ -0,0 +1,247 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.loongarch64; ++ ++import java.nio.ByteOrder; ++import java.util.EnumSet; ++ ++import jdk.vm.ci.code.Architecture; ++import jdk.vm.ci.code.Register; ++import jdk.vm.ci.code.Register.RegisterCategory; ++import jdk.vm.ci.code.RegisterArray; ++import jdk.vm.ci.meta.JavaKind; ++import jdk.vm.ci.meta.PlatformKind; ++ ++/** ++ * Represents the LoongArch64 architecture. ++ */ ++public class LoongArch64 extends Architecture { ++ ++ public static final RegisterCategory CPU = new RegisterCategory("CPU"); ++ ++ // General purpose CPU registers ++ public static final Register zero = new Register(0, 0, "r0", CPU); ++ public static final Register ra = new Register(1, 1, "r1", CPU); ++ public static final Register tp = new Register(2, 2, "r2", CPU); ++ public static final Register sp = new Register(3, 3, "r3", CPU); ++ public static final Register a0 = new Register(4, 4, "r4", CPU); ++ public static final Register a1 = new Register(5, 5, "r5", CPU); ++ public static final Register a2 = new Register(6, 6, "r6", CPU); ++ public static final Register a3 = new Register(7, 7, "r7", CPU); ++ public static final Register a4 = new Register(8, 8, "r8", CPU); ++ public static final Register a5 = new Register(9, 9, "r9", CPU); ++ public static final Register a6 = new Register(10, 10, "r10", CPU); ++ public static final Register a7 = new Register(11, 11, "r11", CPU); ++ public static final Register t0 = new Register(12, 12, "r12", CPU); ++ public static final Register t1 = new Register(13, 13, "r13", CPU); ++ public static final Register t2 = new Register(14, 14, "r14", CPU); ++ public static final Register t3 = new Register(15, 15, "r15", CPU); ++ public static final Register t4 = new Register(16, 16, "r16", CPU); ++ public static final Register t5 = new Register(17, 17, "r17", CPU); ++ public static final Register t6 = new Register(18, 18, "r18", CPU); ++ public static final Register t7 = new Register(19, 19, "r19", CPU); ++ public static final Register t8 = new Register(20, 20, "r20", CPU); ++ public static final Register rx = new Register(21, 21, "r21", CPU); ++ public static final Register fp = new Register(22, 22, "r22", CPU); ++ public static final Register s0 = new Register(23, 23, "r23", CPU); ++ public static final Register s1 = new Register(24, 24, "r24", CPU); ++ public static final Register s2 = new Register(25, 25, "r25", CPU); ++ public static final Register s3 = new Register(26, 26, "r26", CPU); ++ public static final Register s4 = new Register(27, 27, "r27", CPU); ++ public static final Register s5 = new Register(28, 28, "r28", CPU); ++ public static final Register s6 = new Register(29, 29, "r29", CPU); ++ public static final Register s7 = new Register(30, 30, "r30", CPU); ++ public static final Register s8 = new Register(31, 31, "r31", CPU); ++ ++ public static final Register SCR1 = t7; ++ public static final Register SCR2 = t4; ++ public static final Register v0 = a0; ++ ++ // @formatter:off ++ public static final RegisterArray cpuRegisters = new RegisterArray( ++ zero, ra, tp, sp, a0, a1, a2, a3, ++ a4, a5, a6, a7, t0, t1, t2, t3, ++ t4, t5, t6, t7, t8, rx, fp, s0, ++ s1, s2, s3, s4, s5, s6, s7, s8 ++ ); ++ // @formatter:on ++ ++ public static final RegisterCategory SIMD = new RegisterCategory("SIMD"); ++ ++ // Simd registers ++ public static final Register f0 = new Register(32, 0, "f0", SIMD); ++ public static final Register f1 = new Register(33, 1, "f1", SIMD); ++ public static final Register f2 = new Register(34, 2, "f2", SIMD); ++ public static final Register f3 = new Register(35, 3, "f3", SIMD); ++ public static final Register f4 = new Register(36, 4, "f4", SIMD); ++ public static final Register f5 = new Register(37, 5, "f5", SIMD); ++ public static final Register f6 = new Register(38, 6, "f6", SIMD); ++ public static final Register f7 = new Register(39, 7, "f7", SIMD); ++ public static final Register f8 = new Register(40, 8, "f8", SIMD); ++ public static final Register f9 = new Register(41, 9, "f9", SIMD); ++ public static final Register f10 = new Register(42, 10, "f10", SIMD); ++ public static final Register f11 = new Register(43, 11, "f11", SIMD); ++ public static final Register f12 = new Register(44, 12, "f12", SIMD); ++ public static final Register f13 = new Register(45, 13, "f13", SIMD); ++ public static final Register f14 = new Register(46, 14, "f14", SIMD); ++ public static final Register f15 = new Register(47, 15, "f15", SIMD); ++ public static final Register f16 = new Register(48, 16, "f16", SIMD); ++ public static final Register f17 = new Register(49, 17, "f17", SIMD); ++ public static final Register f18 = new Register(50, 18, "f18", SIMD); ++ public static final Register f19 = new Register(51, 19, "f19", SIMD); ++ public static final Register f20 = new Register(52, 20, "f20", SIMD); ++ public static final Register f21 = new Register(53, 21, "f21", SIMD); ++ public static final Register f22 = new Register(54, 22, "f22", SIMD); ++ public static final Register f23 = new Register(55, 23, "f23", SIMD); ++ public static final Register f24 = new Register(56, 24, "f24", SIMD); ++ public static final Register f25 = new Register(57, 25, "f25", SIMD); ++ public static final Register f26 = new Register(58, 26, "f26", SIMD); ++ public static final Register f27 = new Register(59, 27, "f27", SIMD); ++ public static final Register f28 = new Register(60, 28, "f28", SIMD); ++ public static final Register f29 = new Register(61, 29, "f29", SIMD); ++ public static final Register f30 = new Register(62, 30, "f30", SIMD); ++ public static final Register f31 = new Register(63, 31, "f31", SIMD); ++ ++ public static final Register fv0 = f0; ++ ++ // @formatter:off ++ public static final RegisterArray simdRegisters = new RegisterArray( ++ f0, f1, f2, f3, f4, f5, f6, f7, ++ f8, f9, f10, f11, f12, f13, f14, f15, ++ f16, f17, f18, f19, f20, f21, f22, f23, ++ f24, f25, f26, f27, f28, f29, f30, f31 ++ ); ++ // @formatter:on ++ ++ // @formatter:off ++ public static final RegisterArray allRegisters = new RegisterArray( ++ zero, ra, tp, sp, a0, a1, a2, a3, ++ a4, a5, a6, a7, t0, t1, t2, t3, ++ t4, t5, t6, t7, t8, rx, fp, s0, ++ s1, s2, s3, s4, s5, s6, s7, s8, ++ ++ f0, f1, f2, f3, f4, f5, f6, f7, ++ f8, f9, f10, f11, f12, f13, f14, f15, ++ f16, f17, f18, f19, f20, f21, f22, f23, ++ f24, f25, f26, f27, f28, f29, f30, f31 ++ ); ++ // @formatter:on ++ ++ /** ++ * Basic set of CPU features mirroring what is returned from the cpuid instruction. See: ++ * {@code VM_Version::cpuFeatureFlags}. ++ */ ++ public enum CPUFeature { ++ LA32, ++ LA64, ++ LLEXC, ++ SCDLY, ++ LLDBAR, ++ LBT_X86, ++ LBT_ARM, ++ LBT_MIPS, ++ CCDMA, ++ COMPLEX, ++ FP, ++ CRYPTO, ++ LSX, ++ LASX, ++ LAM, ++ LLSYNC, ++ TGTSYNC, ++ ULSYNC, ++ UAL ++ } ++ ++ private final EnumSet features; ++ ++ /** ++ * Set of flags to control code emission. ++ */ ++ public enum Flag { ++ useLSX, ++ useLASX ++ } ++ ++ private final EnumSet flags; ++ ++ public LoongArch64(EnumSet features, EnumSet flags) { ++ super("loongarch64", LoongArch64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, 0, 0, 0); ++ this.features = features; ++ this.flags = flags; ++ } ++ ++ public EnumSet getFeatures() { ++ return features; ++ } ++ ++ public EnumSet getFlags() { ++ return flags; ++ } ++ ++ @Override ++ public PlatformKind getPlatformKind(JavaKind javaKind) { ++ switch (javaKind) { ++ case Boolean: ++ case Byte: ++ return LoongArch64Kind.BYTE; ++ case Short: ++ case Char: ++ return LoongArch64Kind.WORD; ++ case Int: ++ return LoongArch64Kind.DWORD; ++ case Long: ++ case Object: ++ return LoongArch64Kind.QWORD; ++ case Float: ++ return LoongArch64Kind.SINGLE; ++ case Double: ++ return LoongArch64Kind.DOUBLE; ++ default: ++ return null; ++ } ++ } ++ ++ @Override ++ public boolean canStoreValue(RegisterCategory category, PlatformKind platformKind) { ++ LoongArch64Kind kind = (LoongArch64Kind) platformKind; ++ if (kind.isInteger()) { ++ return category.equals(CPU); ++ } else if (kind.isSIMD()) { ++ return category.equals(SIMD); ++ } ++ return false; ++ } ++ ++ @Override ++ public LoongArch64Kind getLargestStorableKind(RegisterCategory category) { ++ if (category.equals(CPU)) { ++ return LoongArch64Kind.QWORD; ++ } else if (category.equals(SIMD)) { ++ return LoongArch64Kind.V256_QWORD; ++ } else { ++ return null; ++ } ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java +--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java 2024-01-30 10:00:13.308080909 +0800 +@@ -0,0 +1,163 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.loongarch64; ++ ++import jdk.vm.ci.meta.PlatformKind; ++ ++public enum LoongArch64Kind implements PlatformKind { ++ ++ // scalar ++ BYTE(1), ++ WORD(2), ++ DWORD(4), ++ QWORD(8), ++ UBYTE(1), ++ UWORD(2), ++ UDWORD(4), ++ SINGLE(4), ++ DOUBLE(8), ++ ++ // SIMD ++ V128_BYTE(16, BYTE), ++ V128_WORD(16, WORD), ++ V128_DWORD(16, DWORD), ++ V128_QWORD(16, QWORD), ++ V128_SINGLE(16, SINGLE), ++ V128_DOUBLE(16, DOUBLE), ++ V256_BYTE(32, BYTE), ++ V256_WORD(32, WORD), ++ V256_DWORD(32, DWORD), ++ V256_QWORD(32, QWORD), ++ V256_SINGLE(32, SINGLE), ++ V256_DOUBLE(32, DOUBLE); ++ ++ private final int size; ++ private final int vectorLength; ++ ++ private final LoongArch64Kind scalar; ++ private final EnumKey key = new EnumKey<>(this); ++ ++ LoongArch64Kind(int size) { ++ this.size = size; ++ this.scalar = this; ++ this.vectorLength = 1; ++ } ++ ++ LoongArch64Kind(int size, LoongArch64Kind scalar) { ++ this.size = size; ++ this.scalar = scalar; ++ ++ assert size % scalar.size == 0; ++ this.vectorLength = size / scalar.size; ++ } ++ ++ public LoongArch64Kind getScalar() { ++ return scalar; ++ } ++ ++ @Override ++ public int getSizeInBytes() { ++ return size; ++ } ++ ++ @Override ++ public int getVectorLength() { ++ return vectorLength; ++ } ++ ++ @Override ++ public Key getKey() { ++ return key; ++ } ++ ++ public boolean isInteger() { ++ switch (this) { ++ case BYTE: ++ case WORD: ++ case DWORD: ++ case QWORD: ++ case UBYTE: ++ case UWORD: ++ case UDWORD: ++ return true; ++ default: ++ return false; ++ } ++ } ++ ++ public boolean isSIMD() { ++ switch (this) { ++ case SINGLE: ++ case DOUBLE: ++ case V128_BYTE: ++ case V128_WORD: ++ case V128_DWORD: ++ case V128_QWORD: ++ case V128_SINGLE: ++ case V128_DOUBLE: ++ case V256_BYTE: ++ case V256_WORD: ++ case V256_DWORD: ++ case V256_QWORD: ++ case V256_SINGLE: ++ case V256_DOUBLE: ++ return true; ++ default: ++ return false; ++ } ++ } ++ ++ @Override ++ public char getTypeChar() { ++ switch (this) { ++ case BYTE: ++ return 'b'; ++ case WORD: ++ return 'w'; ++ case DWORD: ++ return 'd'; ++ case QWORD: ++ return 'q'; ++ case SINGLE: ++ return 'S'; ++ case DOUBLE: ++ return 'D'; ++ case V128_BYTE: ++ case V128_WORD: ++ case V128_DWORD: ++ case V128_QWORD: ++ case V128_SINGLE: ++ case V128_DOUBLE: ++ case V256_BYTE: ++ case V256_WORD: ++ case V256_DWORD: ++ case V256_QWORD: ++ case V256_SINGLE: ++ case V256_DOUBLE: ++ return 'v'; ++ default: ++ return '-'; ++ } ++ } ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java +--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java 2024-01-30 10:00:13.308080909 +0800 +@@ -0,0 +1,28 @@ ++/* ++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++/** ++ * The LoongArch64 platform independent portions of the JVMCI API. ++ */ ++package jdk.vm.ci.loongarch64; +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/module-info.java b/src/jdk.internal.vm.ci/share/classes/module-info.java +--- a/src/jdk.internal.vm.ci/share/classes/module-info.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/jdk.internal.vm.ci/share/classes/module-info.java 2024-01-30 10:00:13.314747494 +0800 +@@ -23,6 +23,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + module jdk.internal.vm.ci { + exports jdk.vm.ci.services to jdk.internal.vm.compiler; + exports jdk.vm.ci.runtime to +@@ -37,6 +43,7 @@ + + provides jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory with + jdk.vm.ci.hotspot.aarch64.AArch64HotSpotJVMCIBackendFactory, ++ jdk.vm.ci.hotspot.loongarch64.LoongArch64HotSpotJVMCIBackendFactory, + jdk.vm.ci.hotspot.amd64.AMD64HotSpotJVMCIBackendFactory, + jdk.vm.ci.hotspot.sparc.SPARCHotSpotJVMCIBackendFactory; + } +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/utils/hsdis/Makefile b/src/utils/hsdis/Makefile +--- a/src/utils/hsdis/Makefile 2024-01-10 05:19:49.000000000 +0800 ++++ b/src/utils/hsdis/Makefile 2024-01-30 10:00:13.851407763 +0800 +@@ -94,6 +94,9 @@ + endif + CFLAGS += -O + DLDFLAGS += -shared ++ifeq ($(ARCH), mips64) ++DLDFLAGS += -Wl,-z,noexecstack ++endif + LDFLAGS += -ldl + OUTFLAGS += -o $@ + else +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java +--- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java 2024-01-30 10:00:13.961406452 +0800 +@@ -22,11 +22,17 @@ + */ + + /* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ ++/* + * @test + * @library /test/lib / + * @modules java.base/jdk.internal.misc + * java.management +- * @requires vm.cpu.features ~= ".*aes.*" & !vm.graal.enabled ++ * @requires (vm.cpu.features ~= ".*aes.*" | os.arch == "loongarch64") & !vm.graal.enabled + * @build sun.hotspot.WhiteBox + * @run driver ClassFileInstaller sun.hotspot.WhiteBox + * sun.hotspot.WhiteBox$WhiteBoxPermission +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java +--- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java 2024-01-30 10:00:13.961406452 +0800 +@@ -22,13 +22,19 @@ + */ + + /* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ ++/* + * @test + * @library /test/lib / + * @modules java.base/jdk.internal.misc + * java.management + * + * @build sun.hotspot.WhiteBox +- * @requires !(vm.cpu.features ~= ".*aes.*") ++ * @requires !(vm.cpu.features ~= ".*aes.*" | os.arch == "loongarch64") + * @requires vm.compiler1.enabled | !vm.graal.enabled + * @run driver ClassFileInstaller sun.hotspot.WhiteBox + * sun.hotspot.WhiteBox$WhiteBoxPermission +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java 2024-01-30 10:00:13.974739627 +0800 +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package compiler.intrinsics.sha.cli.testcases; + + import compiler.intrinsics.sha.cli.SHAOptionsBase; +@@ -32,19 +38,20 @@ + + /** + * Generic test case for SHA-related options targeted to any CPU except +- * AArch64, PPC, S390x, SPARC and X86. ++ * AArch64, PPC, S390x, SPARC, LoongArch64 and X86. + */ + public class GenericTestCaseForOtherCPU extends + SHAOptionsBase.TestCase { + public GenericTestCaseForOtherCPU(String optionName) { +- // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86. ++ // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC, LoongArch64 and X86. + super(optionName, new NotPredicate( + new OrPredicate(Platform::isAArch64, + new OrPredicate(Platform::isS390x, + new OrPredicate(Platform::isSparc, + new OrPredicate(Platform::isPPC, ++ new OrPredicate(Platform::isLoongArch64, + new OrPredicate(Platform::isX64, +- Platform::isX86))))))); ++ Platform::isX86)))))))); + } + + @Override +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java 2024-01-30 10:00:13.984739508 +0800 +@@ -29,6 +29,7 @@ + import jdk.vm.ci.code.TargetDescription; + import jdk.vm.ci.code.test.amd64.AMD64TestAssembler; + import jdk.vm.ci.code.test.sparc.SPARCTestAssembler; ++import jdk.vm.ci.code.test.loongarch64.LoongArch64TestAssembler; + import jdk.vm.ci.hotspot.HotSpotCompiledCode; + import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime; + import jdk.vm.ci.hotspot.HotSpotResolvedJavaMethod; +@@ -37,6 +38,7 @@ + import jdk.vm.ci.runtime.JVMCI; + import jdk.vm.ci.runtime.JVMCIBackend; + import jdk.vm.ci.sparc.SPARC; ++import jdk.vm.ci.loongarch64.LoongArch64; + import org.junit.Assert; + + import java.lang.reflect.Method; +@@ -72,6 +74,8 @@ + return new AMD64TestAssembler(codeCache, config); + } else if (arch instanceof SPARC) { + return new SPARCTestAssembler(codeCache, config); ++ } else if (arch instanceof LoongArch64) { ++ return new LoongArch64TestAssembler(codeCache, config); + } else { + Assert.fail("unsupported architecture"); + return null; +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java 2024-01-30 10:00:13.984739508 +0800 +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,7 +32,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.DataPatchTest + */ + +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java 2024-01-30 10:00:13.984739508 +0800 +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.code + * jdk.internal.vm.ci/jdk.vm.ci.code.site +@@ -32,7 +32,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.common + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.InterpreterFrameSizeTest + */ + +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java 2024-01-30 10:00:13.984739508 +0800 +@@ -0,0 +1,568 @@ ++/* ++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++package jdk.vm.ci.code.test.loongarch64; ++ ++import jdk.vm.ci.loongarch64.LoongArch64; ++import jdk.vm.ci.loongarch64.LoongArch64Kind; ++import jdk.vm.ci.code.CallingConvention; ++import jdk.vm.ci.code.CodeCacheProvider; ++import jdk.vm.ci.code.DebugInfo; ++import jdk.vm.ci.code.Register; ++import jdk.vm.ci.code.RegisterArray; ++import jdk.vm.ci.code.RegisterValue; ++import jdk.vm.ci.code.StackSlot; ++import jdk.vm.ci.code.site.ConstantReference; ++import jdk.vm.ci.code.site.DataSectionReference; ++import jdk.vm.ci.code.test.TestAssembler; ++import jdk.vm.ci.code.test.TestHotSpotVMConfig; ++import jdk.vm.ci.hotspot.HotSpotCallingConventionType; ++import jdk.vm.ci.hotspot.HotSpotConstant; ++import jdk.vm.ci.hotspot.HotSpotForeignCallTarget; ++import jdk.vm.ci.meta.AllocatableValue; ++import jdk.vm.ci.meta.JavaKind; ++import jdk.vm.ci.meta.VMConstant; ++ ++public class LoongArch64TestAssembler extends TestAssembler { ++ ++ private static final Register scratchRegister = LoongArch64.SCR1; ++ private static final Register doubleScratch = LoongArch64.f23; ++ private static final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(LoongArch64.a0, ++ LoongArch64.a1, LoongArch64.a2, ++ LoongArch64.a3, LoongArch64.a4, ++ LoongArch64.a5, LoongArch64.a6, ++ LoongArch64.a7); ++ private static final RegisterArray floatParameterRegisters = new RegisterArray(LoongArch64.f0, ++ LoongArch64.f1, LoongArch64.f2, ++ LoongArch64.f3, LoongArch64.f4, ++ LoongArch64.f5, LoongArch64.f6, ++ LoongArch64.f7); ++ private static int currentGeneral = 0; ++ private static int currentFloat = 0; ++ public LoongArch64TestAssembler(CodeCacheProvider codeCache, TestHotSpotVMConfig config) { ++ super(codeCache, config, ++ 16 /* initialFrameSize */, 16 /* stackAlignment */, ++ LoongArch64Kind.UDWORD /* narrowOopKind */, ++ /* registers */ ++ LoongArch64.a0, LoongArch64.a1, LoongArch64.a2, LoongArch64.a3, ++ LoongArch64.a4, LoongArch64.a5, LoongArch64.a6, LoongArch64.a7); ++ } ++ ++ private static int low(int x, int l) { ++ assert l < 32; ++ return (x >> 0) & ((1 << l)-1); ++ } ++ ++ private static int low16(int x) { ++ return low(x, 16); ++ } ++ ++ private void emitNop() { ++ code.emitInt(0x3400000); ++ } ++ ++ private void emitPcaddu12i(Register rj, int si20) { ++ // pcaddu12i ++ code.emitInt((0b0001110 << 25) ++ | (low(si20, 20) << 5) ++ | rj.encoding); ++ } ++ ++ private void emitAdd(Register rd, Register rj, Register rk) { ++ // add_d ++ code.emitInt((0b00000000000100001 << 15) ++ | (rk.encoding << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitAdd(Register rd, Register rj, int si12) { ++ // addi_d ++ code.emitInt((0b0000001011 << 22) ++ | (low(si12, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitSub(Register rd, Register rj, Register rk) { ++ // sub_d ++ code.emitInt((0b00000000000100011 << 15) ++ | (rk.encoding << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitShiftLeft(Register rd, Register rj, int shift) { ++ // slli_d ++ code.emitInt((0b00000000010000 << 18) ++ | (low(( (0b01 << 6) | shift ), 8) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLu12i_w(Register rj, int imm20) { ++ // lu12i_w ++ code.emitInt((0b0001010 << 25) ++ | (low(imm20, 20)<<5) ++ | rj.encoding); ++ } ++ ++ private void emitOri(Register rd, Register rj, int ui12) { ++ // ori ++ code.emitInt((0b0000001110 << 22) ++ | (low(ui12, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLu32i_d(Register rj, int imm20) { ++ // lu32i_d ++ code.emitInt((0b0001011 << 25) ++ | (low(imm20, 20)<<5) ++ | rj.encoding); ++ } ++ ++ private void emitLu52i_d(Register rd, Register rj, int imm12) { ++ // lu52i_d ++ code.emitInt((0b0000001100 << 22) ++ | (low(imm12, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLoadImmediate(Register rd, int imm32) { ++ emitLu12i_w(rd, (imm32 >> 12) & 0xfffff); ++ emitOri(rd, rd, imm32 & 0xfff); ++ } ++ ++ private void emitLi52(Register rj, long imm) { ++ emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff)); ++ emitOri(rj, rj, (int) (imm & 0xfff)); ++ emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff)); ++ } ++ ++ private void emitLi64(Register rj, long imm) { ++ emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff)); ++ emitOri(rj, rj, (int) (imm & 0xfff)); ++ emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff)); ++ emitLu52i_d(rj, rj, (int) ((imm >> 52) & 0xfff)); ++ } ++ ++ private void emitOr(Register rd, Register rj, Register rk) { ++ // orr ++ code.emitInt((0b00000000000101010 << 15) ++ | (rk.encoding << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitMove(Register rd, Register rs) { ++ // move ++ emitOr(rd, rs, LoongArch64.zero); ++ } ++ ++ private void emitMovfr2gr(Register rd, LoongArch64Kind kind, Register rj) { ++ // movfr2gr_s/movfr2gr_d ++ int opc = 0; ++ switch (kind) { ++ case SINGLE: opc = 0b0000000100010100101101; break; ++ case DOUBLE: opc = 0b0000000100010100101110; break; ++ default: throw new IllegalArgumentException(); ++ } ++ code.emitInt((opc << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLoadRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) { ++ // load ++ assert offset >= 0; ++ int opc = 0; ++ switch (kind) { ++ case BYTE: opc = 0b0010100000; break; ++ case WORD: opc = 0b0010100001; break; ++ case DWORD: opc = 0b0010100010; break; ++ case QWORD: opc = 0b0010100011; break; ++ case UDWORD: opc = 0b0010101010; break; ++ case SINGLE: opc = 0b0010101100; break; ++ case DOUBLE: opc = 0b0010101110; break; ++ default: throw new IllegalArgumentException(); ++ } ++ code.emitInt((opc << 22) ++ | (low(offset, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitStoreRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) { ++ // store ++ assert offset >= 0; ++ int opc = 0; ++ switch (kind) { ++ case BYTE: opc = 0b0010100100; break; ++ case WORD: opc = 0b0010100101; break; ++ case DWORD: opc = 0b0010100110; break; ++ case QWORD: opc = 0b0010100111; break; ++ case SINGLE: opc = 0b0010101101; break; ++ case DOUBLE: opc = 0b0010101111; break; ++ default: throw new IllegalArgumentException(); ++ } ++ code.emitInt((opc << 22) ++ | (low(offset, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitJirl(Register rd, Register rj, int offs) { ++ // jirl ++ code.emitInt((0b010011 << 26) ++ | (low16(offs >> 2) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ @Override ++ public void emitGrowStack(int size) { ++ assert size % 16 == 0; ++ if (size > -4096 && size < 0) { ++ emitAdd(LoongArch64.sp, LoongArch64.sp, -size); ++ } else if (size == 0) { ++ // No-op ++ } else if (size < 4096) { ++ emitAdd(LoongArch64.sp, LoongArch64.sp, -size); ++ } else if (size < 65535) { ++ emitLoadImmediate(scratchRegister, size); ++ emitSub(LoongArch64.sp, LoongArch64.sp, scratchRegister); ++ } else { ++ throw new IllegalArgumentException(); ++ } ++ } ++ ++ @Override ++ public void emitPrologue() { ++ // Must be patchable by NativeJump::patch_verified_entry ++ emitNop(); ++ emitGrowStack(32); ++ emitStoreRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 24); ++ emitStoreRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 16); ++ emitGrowStack(-16); ++ emitMove(LoongArch64.fp, LoongArch64.sp); ++ setDeoptRescueSlot(newStackSlot(LoongArch64Kind.QWORD)); ++ } ++ ++ @Override ++ public void emitEpilogue() { ++ recordMark(config.MARKID_DEOPT_HANDLER_ENTRY); ++ recordCall(new HotSpotForeignCallTarget(config.handleDeoptStub), 4*4, true, null); ++ emitCall(0xdeaddeaddeadL); ++ } ++ ++ @Override ++ public void emitCallPrologue(CallingConvention cc, Object... prim) { ++ emitGrowStack(cc.getStackSize()); ++ frameSize += cc.getStackSize(); ++ AllocatableValue[] args = cc.getArguments(); ++ for (int i = 0; i < args.length; i++) { ++ emitLoad(args[i], prim[i]); ++ } ++ currentGeneral = 0; ++ currentFloat = 0; ++ } ++ ++ @Override ++ public void emitCallEpilogue(CallingConvention cc) { ++ emitGrowStack(-cc.getStackSize()); ++ frameSize -= cc.getStackSize(); ++ } ++ ++ @Override ++ public void emitCall(long addr) { ++ // long call (absolute) ++ // lu12i_w(T4, split_low20(value >> 12)); ++ // lu32i_d(T4, split_low20(value >> 32)); ++ // jirl(RA, T4, split_low12(value)); ++ emitLu12i_w(LoongArch64.t4, (int) ((addr >> 12) & 0xfffff)); ++ emitLu32i_d(LoongArch64.t4, (int) ((addr >> 32) & 0xfffff)); ++ emitJirl(LoongArch64.ra, LoongArch64.t4, (int) (addr & 0xfff)); ++ } ++ ++ @Override ++ public void emitLoad(AllocatableValue av, Object prim) { ++ if (av instanceof RegisterValue) { ++ Register reg = ((RegisterValue) av).getRegister(); ++ if (prim instanceof Float) { ++ if (currentFloat < floatParameterRegisters.size()) { ++ currentFloat++; ++ emitLoadFloat(reg, (Float) prim); ++ } else if (currentGeneral < nativeGeneralParameterRegisters.size()) { ++ currentGeneral++; ++ emitLoadFloat(doubleScratch, (Float) prim); ++ emitMovfr2gr(reg, LoongArch64Kind.SINGLE, doubleScratch); ++ } ++ } else if (prim instanceof Double) { ++ if (currentFloat < floatParameterRegisters.size()) { ++ currentFloat++; ++ emitLoadDouble(reg, (Double) prim); ++ } else if (currentGeneral < nativeGeneralParameterRegisters.size()) { ++ currentGeneral++; ++ emitLoadDouble(doubleScratch, (Double) prim); ++ emitMovfr2gr(reg, LoongArch64Kind.DOUBLE, doubleScratch); ++ } ++ } else if (prim instanceof Integer) { ++ emitLoadInt(reg, (Integer) prim); ++ } else if (prim instanceof Long) { ++ emitLoadLong(reg, (Long) prim); ++ } ++ } else if (av instanceof StackSlot) { ++ StackSlot slot = (StackSlot) av; ++ if (prim instanceof Float) { ++ emitFloatToStack(slot, emitLoadFloat(doubleScratch, (Float) prim)); ++ } else if (prim instanceof Double) { ++ emitDoubleToStack(slot, emitLoadDouble(doubleScratch, (Double) prim)); ++ } else if (prim instanceof Integer) { ++ emitIntToStack(slot, emitLoadInt(scratchRegister, (Integer) prim)); ++ } else if (prim instanceof Long) { ++ emitLongToStack(slot, emitLoadLong(scratchRegister, (Long) prim)); ++ } else { ++ assert false : "Unimplemented"; ++ } ++ } else { ++ throw new IllegalArgumentException("Unknown value " + av); ++ } ++ } ++ ++ @Override ++ public Register emitLoadPointer(HotSpotConstant c) { ++ recordDataPatchInCode(new ConstantReference((VMConstant) c)); ++ ++ Register ret = newRegister(); ++ // need to match patchable_li52 instruction sequence ++ // lu12i_ori_lu32i ++ emitLi52(ret, 0xdeaddead); ++ return ret; ++ } ++ ++ @Override ++ public Register emitLoadPointer(Register b, int offset) { ++ Register ret = newRegister(); ++ emitLoadRegister(ret, LoongArch64Kind.QWORD, b, offset); ++ return ret; ++ } ++ ++ @Override ++ public Register emitLoadNarrowPointer(DataSectionReference ref) { ++ recordDataPatchInCode(ref); ++ ++ Register ret = newRegister(); ++ emitPcaddu12i(ret, 0xdead >> 12); ++ emitAdd(ret, ret, 0xdead & 0xfff); ++ emitLoadRegister(ret, LoongArch64Kind.UDWORD, ret, 0); ++ return ret; ++ } ++ ++ @Override ++ public Register emitLoadPointer(DataSectionReference ref) { ++ recordDataPatchInCode(ref); ++ ++ Register ret = newRegister(); ++ emitPcaddu12i(ret, 0xdead >> 12); ++ emitAdd(ret, ret, 0xdead & 0xfff); ++ emitLoadRegister(ret, LoongArch64Kind.QWORD, ret, 0); ++ return ret; ++ } ++ ++ private Register emitLoadDouble(Register reg, double c) { ++ DataSectionReference ref = new DataSectionReference(); ++ ref.setOffset(data.position()); ++ data.emitDouble(c); ++ ++ recordDataPatchInCode(ref); ++ emitPcaddu12i(scratchRegister, 0xdead >> 12); ++ emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff); ++ emitLoadRegister(reg, LoongArch64Kind.DOUBLE, scratchRegister, 0); ++ return reg; ++ } ++ ++ private Register emitLoadFloat(Register reg, float c) { ++ DataSectionReference ref = new DataSectionReference(); ++ ref.setOffset(data.position()); ++ data.emitFloat(c); ++ ++ recordDataPatchInCode(ref); ++ emitPcaddu12i(scratchRegister, 0xdead >> 12); ++ emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff); ++ emitLoadRegister(reg, LoongArch64Kind.SINGLE, scratchRegister, 0); ++ return reg; ++ } ++ ++ @Override ++ public Register emitLoadFloat(float c) { ++ Register ret = LoongArch64.fv0; ++ return emitLoadFloat(ret, c); ++ } ++ ++ private Register emitLoadLong(Register reg, long c) { ++ emitLi64(reg, c); ++ return reg; ++ } ++ ++ @Override ++ public Register emitLoadLong(long c) { ++ Register ret = newRegister(); ++ return emitLoadLong(ret, c); ++ } ++ ++ private Register emitLoadInt(Register reg, int c) { ++ emitLoadImmediate(reg, c); ++ return reg; ++ } ++ ++ @Override ++ public Register emitLoadInt(int c) { ++ Register ret = newRegister(); ++ return emitLoadInt(ret, c); ++ } ++ ++ @Override ++ public Register emitIntArg0() { ++ return codeCache.getRegisterConfig() ++ .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int) ++ .get(0); ++ } ++ ++ @Override ++ public Register emitIntArg1() { ++ return codeCache.getRegisterConfig() ++ .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int) ++ .get(1); ++ } ++ ++ @Override ++ public Register emitIntAdd(Register a, Register b) { ++ emitAdd(a, a, b); ++ return a; ++ } ++ ++ @Override ++ public void emitTrap(DebugInfo info) { ++ // Dereference null pointer ++ emitMove(scratchRegister, LoongArch64.zero); ++ recordImplicitException(info); ++ emitLoadRegister(LoongArch64.zero, LoongArch64Kind.QWORD, scratchRegister, 0); ++ } ++ ++ @Override ++ public void emitIntRet(Register a) { ++ emitMove(LoongArch64.v0, a); ++ emitMove(LoongArch64.sp, LoongArch64.fp); ++ emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8); ++ emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0); ++ emitGrowStack(-16); ++ emitJirl(LoongArch64.zero, LoongArch64.ra, 0); ++ } ++ ++ @Override ++ public void emitFloatRet(Register a) { ++ assert a == LoongArch64.fv0 : "Unimplemented move " + a; ++ emitMove(LoongArch64.sp, LoongArch64.fp); ++ emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8); ++ emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0); ++ emitGrowStack(-16); ++ emitJirl(LoongArch64.zero, LoongArch64.ra, 0); ++ } ++ ++ @Override ++ public void emitPointerRet(Register a) { ++ emitIntRet(a); ++ } ++ ++ @Override ++ public StackSlot emitPointerToStack(Register a) { ++ return emitLongToStack(a); ++ } ++ ++ @Override ++ public StackSlot emitNarrowPointerToStack(Register a) { ++ return emitIntToStack(a); ++ } ++ ++ @Override ++ public Register emitUncompressPointer(Register compressed, long base, int shift) { ++ if (shift > 0) { ++ emitShiftLeft(compressed, compressed, shift); ++ } ++ ++ if (base != 0) { ++ emitLoadLong(scratchRegister, base); ++ emitAdd(compressed, compressed, scratchRegister); ++ } ++ ++ return compressed; ++ } ++ ++ private StackSlot emitDoubleToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.DOUBLE, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitDoubleToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.DOUBLE); ++ return emitDoubleToStack(ret, a); ++ } ++ ++ private StackSlot emitFloatToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.SINGLE, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitFloatToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.SINGLE); ++ return emitFloatToStack(ret, a); ++ } ++ ++ private StackSlot emitIntToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.DWORD, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitIntToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.DWORD); ++ return emitIntToStack(ret, a); ++ } ++ ++ private StackSlot emitLongToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.QWORD, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitLongToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.QWORD); ++ return emitLongToStack(ret, a); ++ } ++ ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java 2024-01-30 10:00:13.984739508 +0800 +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -33,7 +33,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.MaxOopMapStackOffsetTest + */ + +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java 2024-01-30 10:00:13.984739508 +0800 +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library /test/lib / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.code +@@ -33,7 +33,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.common + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java sparc/SPARCTestAssembler.java amd64/AMD64TestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java sparc/SPARCTestAssembler.java amd64/AMD64TestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm/native -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Xbootclasspath/a:. jdk.vm.ci.code.test.NativeCallTest + */ + package jdk.vm.ci.code.test; +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java 2024-01-30 10:00:13.984739508 +0800 +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,7 +32,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.SimpleCodeInstallationTest + */ + +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java 2024-01-30 10:00:13.984739508 +0800 +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,7 +32,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.SimpleDebugInfoTest + */ + +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java 2024-01-30 10:00:13.984739508 +0800 +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,7 +32,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.VirtualObjectDebugInfoTest + */ + +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java 2024-01-30 10:00:14.021405737 +0800 +@@ -21,10 +21,17 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + + /* @test + * @bug 8167409 + * @requires (os.arch != "aarch64") & (os.arch != "arm") ++ * @requires (os.arch != "mips64el") & (os.arch != "loongarch64") + * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs + */ + package compiler.runtime.criticalnatives.argumentcorruption; +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java 2024-01-30 10:00:14.021405737 +0800 +@@ -21,10 +21,17 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + + /* @test + * @bug 8167408 + * @requires (os.arch != "aarch64") & (os.arch != "arm") ++ * @requires (os.arch != "mips64el") & (os.arch != "loongarch64") + * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp + */ + package compiler.runtime.criticalnatives.lookup; +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java 2024-01-30 10:00:14.024739030 +0800 +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package compiler.testlibrary.sha.predicate; + + import jdk.test.lib.Platform; +@@ -63,10 +69,12 @@ + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null), + new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha1" }, null), + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha1" }, null), ++ // Basic instructions are used to implement SHA1 Intrinsics on LA, so "sha1" feature is not needed. ++ new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null), + // x86 variants + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), +- new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null)))))); ++ new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null))))))); + + public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256" }, null), +@@ -74,12 +82,14 @@ + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, null), + new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("ppc64le.*", new String[] { "sha" }, null), ++ // Basic instructions are used to implement SHA256 Intrinsics on LA, so "sha256" feature is not needed. ++ new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null), + // x86 variants + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), +- new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); ++ new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))))); + + public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512" }, null), +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java 2024-01-30 10:00:14.074738434 +0800 +@@ -22,6 +22,12 @@ + */ + + /* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ ++/* + * @test ReservedStackTest + * + * @requires vm.opt.DeoptimizeALot != true +@@ -239,7 +245,7 @@ + return Platform.isAix() || + (Platform.isLinux() && + (Platform.isPPC() || Platform.isS390x() || Platform.isX64() || +- Platform.isX86())) || ++ Platform.isX86() || Platform.isMIPS() || Platform.isLoongArch64())) || + Platform.isOSX() || + Platform.isSolaris(); + } +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +--- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java 2024-01-30 10:00:14.158070775 +0800 +@@ -45,7 +45,7 @@ + */ + public class TestMutuallyExclusivePlatformPredicates { + private static enum MethodGroup { +- ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), ++ ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86", "isMIPS", "isLoongArch64"), + BITNESS("is32bit", "is64bit"), + OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"), + VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"), +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java +--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java 2024-01-30 10:00:15.451388695 +0800 +@@ -23,6 +23,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package jdk.jfr.event.os; + + import java.util.List; +@@ -54,8 +60,8 @@ + Events.assertField(event, "hwThreads").atLeast(1); + Events.assertField(event, "cores").atLeast(1); + Events.assertField(event, "sockets").atLeast(1); +- Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390"); +- Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390"); ++ Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch"); ++ Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch"); + } + } + } +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/jdk/sun/security/pkcs11/PKCS11Test.java b/test/jdk/sun/security/pkcs11/PKCS11Test.java +--- a/test/jdk/sun/security/pkcs11/PKCS11Test.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/jdk/sun/security/pkcs11/PKCS11Test.java 2024-01-30 10:00:15.654719606 +0800 +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + // common infrastructure for SunPKCS11 tests + + import java.io.BufferedReader; +@@ -747,6 +753,9 @@ + "/usr/lib64/" }); + osMap.put("Linux-ppc64-64", new String[] { "/usr/lib64/" }); + osMap.put("Linux-ppc64le-64", new String[] { "/usr/lib64/" }); ++ osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"}); ++ osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/", ++ "/usr/lib64/" }); + osMap.put("Linux-s390x-64", new String[] { "/usr/lib64/" }); + osMap.put("Windows-x86-32", new String[] {}); + osMap.put("Windows-amd64-64", new String[] {}); +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java +--- a/test/lib/jdk/test/lib/Platform.java 2024-01-10 05:19:49.000000000 +0800 ++++ b/test/lib/jdk/test/lib/Platform.java 2024-01-30 10:00:16.081381187 +0800 +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package jdk.test.lib; + + import java.io.FileNotFoundException; +@@ -226,6 +232,14 @@ + return isArch("(i386)|(x86(?!_64))"); + } + ++ public static boolean isLoongArch64() { ++ return isArch("loongarch64"); ++ } ++ ++ public static boolean isMIPS() { ++ return isArch("mips.*"); ++ } ++ + public static String getOsArch() { + return osArch; + } +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java b/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java +--- a/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java 2024-01-30 10:00:16.094714362 +0800 +@@ -0,0 +1,87 @@ ++// ++// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++package org.openjdk.bench.java.lang; ++ ++import java.util.Random; ++import java.util.concurrent.TimeUnit; ++import org.openjdk.jmh.annotations.*; ++import org.openjdk.jmh.infra.Blackhole; ++ ++@OutputTimeUnit(TimeUnit.MILLISECONDS) ++@State(Scope.Thread) ++@BenchmarkMode(Mode.Throughput) ++public class RotateBenchmark { ++ ++ @Param({"1024"}) ++ public int TESTSIZE; ++ ++ @Param({"20"}) ++ public int SHIFT; ++ ++ public long [] larr; ++ public int [] iarr; ++ ++ public long [] lres; ++ public int [] ires; ++ ++ ++ @Setup(Level.Trial) ++ public void BmSetup() { ++ Random r = new Random(1024); ++ larr = new long[TESTSIZE]; ++ iarr = new int[TESTSIZE]; ++ lres = new long[TESTSIZE]; ++ ires = new int[TESTSIZE]; ++ ++ for (int i = 0; i < TESTSIZE; i++) { ++ larr[i] = r.nextLong(); ++ } ++ ++ for (int i = 0; i < TESTSIZE; i++) { ++ iarr[i] = r.nextInt(); ++ } ++ } ++ ++ @Benchmark ++ public void testRotateLeftI() { ++ for (int i = 0; i < TESTSIZE; i++) ++ ires[i] = Integer.rotateLeft(iarr[i], SHIFT); ++ } ++ @Benchmark ++ public void testRotateRightI() { ++ for (int i = 0; i < TESTSIZE; i++) ++ ires[i] = Integer.rotateRight(iarr[i], SHIFT); ++ } ++ @Benchmark ++ public void testRotateLeftL() { ++ for (int i = 0; i < TESTSIZE; i++) ++ lres[i] = Long.rotateLeft(larr[i], SHIFT); ++ } ++ @Benchmark ++ public void testRotateRightL() { ++ for (int i = 0; i < TESTSIZE; i++) ++ lres[i] = Long.rotateRight(larr[i], SHIFT); ++ } ++ ++} +diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java b/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java +--- a/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java 1970-01-01 08:00:00.000000000 +0800 ++++ b/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java 2024-01-30 10:00:16.094714362 +0800 +@@ -0,0 +1,125 @@ ++/* ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package org.openjdk.bench.vm.compiler; ++ ++import org.openjdk.jmh.annotations.*; ++import org.openjdk.jmh.infra.*; ++ ++import java.util.concurrent.TimeUnit; ++import java.util.Random; ++ ++@BenchmarkMode(Mode.Throughput) ++@OutputTimeUnit(TimeUnit.SECONDS) ++@State(Scope.Thread) ++public class MacroLogicOpt { ++ @Param({"64","128","256","512","1024","2048","4096"}) private int VECLEN; ++ ++ private int [] ai = new int[VECLEN]; ++ private int [] bi = new int[VECLEN]; ++ private int [] ci = new int[VECLEN]; ++ private int [] ri = new int[VECLEN]; ++ ++ private long [] al = new long[VECLEN]; ++ private long [] bl = new long[VECLEN]; ++ private long [] cl = new long[VECLEN]; ++ private long [] dl = new long[VECLEN]; ++ private long [] el = new long[VECLEN]; ++ private long [] fl = new long[VECLEN]; ++ private long [] rl = new long[VECLEN]; ++ ++ private Random r = new Random(); ++ ++ @Setup ++ public void init() { ++ ai = new int[VECLEN]; ++ bi = new int[VECLEN]; ++ ci = new int[VECLEN]; ++ ri = new int[VECLEN]; ++ ++ al = new long[VECLEN]; ++ bl = new long[VECLEN]; ++ cl = new long[VECLEN]; ++ dl = new long[VECLEN]; ++ el = new long[VECLEN]; ++ fl = new long[VECLEN]; ++ rl = new long[VECLEN]; ++ for (int i=0; i FINGERPRINT_MAP = new HashMap<>() { diff --git a/jdk-updates-jdk11u-jdk-11.0.22-ga.tar.xz b/jdk-updates-jdk11u-jdk-11.0.22-ga.tar.xz deleted file mode 100644 index 78684a6b6786cdb8bb6da27dfcda3fed126946db..0000000000000000000000000000000000000000 Binary files a/jdk-updates-jdk11u-jdk-11.0.22-ga.tar.xz and /dev/null differ diff --git a/jdk-updates-jdk11u-jdk-11.0.23-ga.tar.xz b/jdk-updates-jdk11u-jdk-11.0.23-ga.tar.xz new file mode 100644 index 0000000000000000000000000000000000000000..921bdb22895680fb23af8e04daff21e532027915 Binary files /dev/null and b/jdk-updates-jdk11u-jdk-11.0.23-ga.tar.xz differ diff --git a/openjdk-11.spec b/openjdk-11.spec index 720b4d76e7179a06ba5867015d3f2b9c82245b8e..f32c013106d8515c2e8db7df741f337f8956f859 100644 --- a/openjdk-11.spec +++ b/openjdk-11.spec @@ -55,6 +55,7 @@ %global aarch64 aarch64 %global riscv64 riscv64 +%global ppc64le ppc64le # By default, we build a debug build during main build on JIT architectures %if %{with slowdebug} @@ -110,15 +111,21 @@ %ifarch %{aarch64} %global archinstall aarch64 %endif +%ifarch loongarch64 +%global archinstall loongarch64 +%endif %ifarch %{riscv64} %global archinstall riscv64 %endif +%ifarch %{ppc64le} +%global archinstall ppc64le +%endif %global with_systemtap 1 # New Version-String scheme-style defines %global majorver 11 -%global securityver 22 +%global securityver 23 # buildjdkver is usually same as %%{majorver}, # but in time of bootstrap of next jdk, it is majorver-1, # and this it is better to change it here, on single place @@ -134,12 +141,12 @@ %global origin_nice OpenJDK %global top_level_dir_name %{origin} %global minorver 0 -%global buildver 7 +%global buildver 8 %global patchver 0 %global project jdk-updates %global repo jdk11u -%global revision jdk-11.0.22-ga +%global revision jdk-11.0.23-ga %global full_revision %{project}-%{repo}-%{revision} # priority must be 7 digits in total # setting to 1, so debug ones can have 0 @@ -561,9 +568,11 @@ exit 0 %{_jvmdir}/%{sdkdir -- %{?1}}/bin/jstatd %{_jvmdir}/%{sdkdir -- %{?1}}/bin/rmic %{_jvmdir}/%{sdkdir -- %{?1}}/bin/serialver +%ifnarch loongarch64 %ifarch %{aarch64} x86_64 %{_jvmdir}/%{sdkdir -- %{?1}}/bin/jaotc %endif +%endif %{_jvmdir}/%{sdkdir -- %{?1}}/include %{_jvmdir}/%{sdkdir -- %{?1}}/lib/ct.sym %if %{with_systemtap} @@ -803,6 +812,14 @@ Source11: nss.cfg.in # due to memory leak). Patch1000: rh1648249-add_commented_out_nss_cfg_provider_to_java_security.patch +############################################ +# +# LoongArch64 specific patches +# +############################################ + +Patch2001: LoongArch64-support.patch + ############################################# # # OpenJDK specific patches @@ -893,7 +910,7 @@ Patch92: 8295068-SSLEngine-throws-NPE-parsing-Certificate.patch # riscv64 specific patches # ############################################ -Patch2000: 2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch +Patch2000: Add-riscv64-support.patch BuildRequires: elfutils-extra BuildRequires: autoconf @@ -1126,6 +1143,7 @@ fi pushd %{top_level_dir_name} # OpenJDK patches +%ifnarch loongarch64 ppc64le %ifarch riscv64 %patch2000 -p1 %else @@ -1190,6 +1208,10 @@ pushd %{top_level_dir_name} %patch91 -p1 %patch92 -p1 %endif +%endif +%ifarch loongarch64 +%patch2001 -p1 +%endif popd # openjdk # %patch1000 @@ -1280,8 +1302,10 @@ bash ../configure \ --with-version-build=%{buildver} \ --with-version-pre="" \ --with-version-opt="" \ +%ifnarch loongarch64 ppc64le --with-vendor-version-string="%{vendor_version_string}" \ --with-vendor-name="Bisheng" \ +%endif --with-vendor-url="https://openeuler.org/" \ --with-vendor-bug-url="https://gitee.com/src-openeuler/openjdk-11/issues/" \ --with-vendor-vm-bug-url="https://gitee.com/src-openeuler/openjdk-11/issues/" \ @@ -1395,6 +1419,7 @@ done # javaCalls.cpp:58 should map to: # http://hg.openjdk.java.net/jdk8u/jdk8u/hotspot/file/ff3b27e6bcc2/src/share/vm/runtime/javaCalls.cpp#l58 # Using line number 1 might cause build problems. See: +%ifnarch loongarch64 gdb -q "$JAVA_HOME/bin/java" < - 1:11.0.23.9-0 +- modified 8224675-Late-GC-barrier-insertion-for-ZGC.patch +- modified delete_expired_certificates.patch + +* Wed Mar 13 2024 jiahua.yu - 1:11.0.22.7-3 +- init support for arch ppc64le + +* Mon Feb 26 2024 misaka00251 - 1:11.0.22.7-2 +- Fix build on riscv64 + +* Tue Feb 20 2024 Leslie Zhai - 1:11.0.22.7-1 +- init support of LoongArch64 + * Wed Jan 17 2024 DXwangg - 1:11.0.22.7-0 - update to 11.0.22+7(GA) - modified delete_expired_certificates.patch